diff --git a/src/TNL/Matrices/CSR.h b/src/TNL/Matrices/CSR.h index 423b40feff03c60869486c76bd330dd129b13df3..348f01592cf13a7061bc23b70d62e0a9405bfb07 100644 --- a/src/TNL/Matrices/CSR.h +++ b/src/TNL/Matrices/CSR.h @@ -13,6 +13,9 @@ #include <TNL/Matrices/Sparse.h> #include <TNL/Containers/Vector.h> +#include <TNL/Devices/Cuda.h> +#include <TNL/Exceptions/CudaBadAlloc.h> + namespace TNL { namespace Matrices { @@ -80,8 +83,14 @@ public: __cuda_callable__ IndexType getRowLengthFast( const IndexType row ) const; +#ifdef HAVE_CUDA + //__device__ + //void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result ); + IndexType getNonZeroRowLength( const IndexType row ) const; - + + IndexType getNonZeroRowLengthFast( const IndexType row ) const; +#endif template< typename Real2, typename Device2, typename Index2 > void setLike( const CSR< Real2, Device2, Index2 >& matrix ); diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/CSR_impl.h index a77e68575b13ce5933dd7fbfd10a376402af48e5..e8324de778f67ca6ceb2786c7643d1a8cf6d4687 100644 --- a/src/TNL/Matrices/CSR_impl.h +++ b/src/TNL/Matrices/CSR_impl.h @@ -131,9 +131,11 @@ Index CSR< Real, Device, Index >::getRowLengthFast( const IndexType row ) const return this->rowPointers[ row + 1 ] - this->rowPointers[ row ]; } -// TODO: presunout do SparseRow +#ifdef HAVE_CUDA +// TODO: move to SparseRow template< typename MatrixRow > -__global__ void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result ) +__global__ +void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result ) { int threadId = blockIdx.x * blockDim.x + threadIdx.x; if( threadId == 0 ) @@ -193,6 +195,7 @@ Index CSR< Real, Device, Index >::getNonZeroRowLengthFast( const IndexType row ) ConstMatrixRow matrixRow = this->getRow( row ); return matrixRow.getNonZeroElementsCount(); } +#endif template< typename Real, typename Device, diff --git a/src/TNL/Matrices/SparseRow.h b/src/TNL/Matrices/SparseRow.h index fac855eae71a26cdb4dbf62f927f12d0f24b5af1..6407d4a526303c70124dd03be48d72e9eab0e565 100644 --- a/src/TNL/Matrices/SparseRow.h +++ b/src/TNL/Matrices/SparseRow.h @@ -55,6 +55,9 @@ class SparseRow __cuda_callable__ Index getLength() const; +// __global__ +// void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result ); + __cuda_callable__ Index getNonZeroElementsCount() const; diff --git a/src/TNL/Matrices/SparseRow_impl.h b/src/TNL/Matrices/SparseRow_impl.h index d83aad239271c44baffb1bc5181aa65224725bf4..31d133c618a94a9804c79ff45849e46e9c95c77c 100644 --- a/src/TNL/Matrices/SparseRow_impl.h +++ b/src/TNL/Matrices/SparseRow_impl.h @@ -112,6 +112,16 @@ getLength() const return length; } +//template< typename MatrixRow > +//__global__ void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result ) +//{ +// int threadId = blockIdx.x * blockDim.x + threadIdx.x; +// if( threadId == 0 ) +// { +// result = row->getNonZeroElementsCount(); +// } +//} + template< typename Real, typename Index > __cuda_callable__ Index