Commit 05234ee7 authored by Lukáš Matthew Čejka's avatar Lukáš Matthew Čejka
Browse files

Attempted to fix non-working CUDA code for getting non-zero elements of

a row. Commiting for backup purposes.
parent 8a3d1a9a
Loading
Loading
Loading
Loading
+10 −1
Original line number Diff line number Diff line
@@ -13,6 +13,9 @@
#include <TNL/Matrices/Sparse.h>
#include <TNL/Containers/Vector.h>

#include <TNL/Devices/Cuda.h>
#include <TNL/Exceptions/CudaBadAlloc.h>

namespace TNL {
namespace Matrices {
   
@@ -80,8 +83,14 @@ public:
   __cuda_callable__
   IndexType getRowLengthFast( const IndexType row ) const;
   
#ifdef HAVE_CUDA
   //__device__
   //void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result );
   
   IndexType getNonZeroRowLength( const IndexType row ) const;
   
   IndexType getNonZeroRowLengthFast( const IndexType row ) const;
#endif
   template< typename Real2, typename Device2, typename Index2 >
   void setLike( const CSR< Real2, Device2, Index2 >& matrix );

+5 −2
Original line number Diff line number Diff line
@@ -131,9 +131,11 @@ Index CSR< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
   return this->rowPointers[ row + 1 ] - this->rowPointers[ row ];
}

// TODO: presunout do SparseRow
#ifdef HAVE_CUDA
// TODO: move to SparseRow
template< typename MatrixRow >
__global__ void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result )
__global__
void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result )
{
   int threadId = blockIdx.x * blockDim.x + threadIdx.x;
   if( threadId == 0 )
@@ -193,6 +195,7 @@ Index CSR< Real, Device, Index >::getNonZeroRowLengthFast( const IndexType row )
   ConstMatrixRow matrixRow = this->getRow( row );
   return matrixRow.getNonZeroElementsCount();
}
#endif

template< typename Real,
          typename Device,
+3 −0
Original line number Diff line number Diff line
@@ -55,6 +55,9 @@ class SparseRow
      __cuda_callable__
      Index getLength() const;
      
//      __global__ 
//      void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result );
      
      __cuda_callable__
      Index getNonZeroElementsCount() const;

+10 −0
Original line number Diff line number Diff line
@@ -112,6 +112,16 @@ getLength() const
   return length;
}

//template< typename MatrixRow >
//__global__ void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result )
//{
//   int threadId = blockIdx.x * blockDim.x + threadIdx.x;
//   if( threadId == 0 )
//   {
//      result = row->getNonZeroElementsCount();
//   }
//}

template< typename Real, typename Index >
__cuda_callable__
Index