Loading src/TNL/Matrices/CSR.h +10 −1 Original line number Diff line number Diff line Loading @@ -13,6 +13,9 @@ #include <TNL/Matrices/Sparse.h> #include <TNL/Containers/Vector.h> #include <TNL/Devices/Cuda.h> #include <TNL/Exceptions/CudaBadAlloc.h> namespace TNL { namespace Matrices { Loading Loading @@ -80,8 +83,14 @@ public: __cuda_callable__ IndexType getRowLengthFast( const IndexType row ) const; #ifdef HAVE_CUDA //__device__ //void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result ); IndexType getNonZeroRowLength( const IndexType row ) const; IndexType getNonZeroRowLengthFast( const IndexType row ) const; #endif template< typename Real2, typename Device2, typename Index2 > void setLike( const CSR< Real2, Device2, Index2 >& matrix ); Loading src/TNL/Matrices/CSR_impl.h +5 −2 Original line number Diff line number Diff line Loading @@ -131,9 +131,11 @@ Index CSR< Real, Device, Index >::getRowLengthFast( const IndexType row ) const return this->rowPointers[ row + 1 ] - this->rowPointers[ row ]; } // TODO: presunout do SparseRow #ifdef HAVE_CUDA // TODO: move to SparseRow template< typename MatrixRow > __global__ void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result ) __global__ void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result ) { int threadId = blockIdx.x * blockDim.x + threadIdx.x; if( threadId == 0 ) Loading Loading @@ -193,6 +195,7 @@ Index CSR< Real, Device, Index >::getNonZeroRowLengthFast( const IndexType row ) ConstMatrixRow matrixRow = this->getRow( row ); return matrixRow.getNonZeroElementsCount(); } #endif template< typename Real, typename Device, Loading src/TNL/Matrices/SparseRow.h +3 −0 Original line number Diff line number Diff line Loading @@ -55,6 +55,9 @@ class SparseRow __cuda_callable__ Index getLength() const; // __global__ // void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result ); __cuda_callable__ Index getNonZeroElementsCount() const; Loading src/TNL/Matrices/SparseRow_impl.h +10 −0 Original line number Diff line number Diff line Loading @@ -112,6 +112,16 @@ getLength() const return length; } //template< typename MatrixRow > //__global__ void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result ) //{ // int threadId = blockIdx.x * blockDim.x + threadIdx.x; // if( threadId == 0 ) // { // result = row->getNonZeroElementsCount(); // } //} template< typename Real, typename Index > __cuda_callable__ Index Loading Loading
src/TNL/Matrices/CSR.h +10 −1 Original line number Diff line number Diff line Loading @@ -13,6 +13,9 @@ #include <TNL/Matrices/Sparse.h> #include <TNL/Containers/Vector.h> #include <TNL/Devices/Cuda.h> #include <TNL/Exceptions/CudaBadAlloc.h> namespace TNL { namespace Matrices { Loading Loading @@ -80,8 +83,14 @@ public: __cuda_callable__ IndexType getRowLengthFast( const IndexType row ) const; #ifdef HAVE_CUDA //__device__ //void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result ); IndexType getNonZeroRowLength( const IndexType row ) const; IndexType getNonZeroRowLengthFast( const IndexType row ) const; #endif template< typename Real2, typename Device2, typename Index2 > void setLike( const CSR< Real2, Device2, Index2 >& matrix ); Loading
src/TNL/Matrices/CSR_impl.h +5 −2 Original line number Diff line number Diff line Loading @@ -131,9 +131,11 @@ Index CSR< Real, Device, Index >::getRowLengthFast( const IndexType row ) const return this->rowPointers[ row + 1 ] - this->rowPointers[ row ]; } // TODO: presunout do SparseRow #ifdef HAVE_CUDA // TODO: move to SparseRow template< typename MatrixRow > __global__ void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result ) __global__ void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result ) { int threadId = blockIdx.x * blockDim.x + threadIdx.x; if( threadId == 0 ) Loading Loading @@ -193,6 +195,7 @@ Index CSR< Real, Device, Index >::getNonZeroRowLengthFast( const IndexType row ) ConstMatrixRow matrixRow = this->getRow( row ); return matrixRow.getNonZeroElementsCount(); } #endif template< typename Real, typename Device, Loading
src/TNL/Matrices/SparseRow.h +3 −0 Original line number Diff line number Diff line Loading @@ -55,6 +55,9 @@ class SparseRow __cuda_callable__ Index getLength() const; // __global__ // void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result ); __cuda_callable__ Index getNonZeroElementsCount() const; Loading
src/TNL/Matrices/SparseRow_impl.h +10 −0 Original line number Diff line number Diff line Loading @@ -112,6 +112,16 @@ getLength() const return length; } //template< typename MatrixRow > //__global__ void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result ) //{ // int threadId = blockIdx.x * blockDim.x + threadIdx.x; // if( threadId == 0 ) // { // result = row->getNonZeroElementsCount(); // } //} template< typename Real, typename Index > __cuda_callable__ Index Loading