Loading src/TNL/Matrices/CSR.h +6 −2 Original line number Diff line number Diff line Loading @@ -41,7 +41,8 @@ private: public: typedef Real RealType; using RealType = Real; //typedef Real RealType; typedef Device DeviceType; typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >:: CompressedRowLengthsVector CompressedRowLengthsVector; Loading @@ -51,7 +52,10 @@ public: typedef CSR< Real, Devices::Cuda, Index > CudaType; typedef Sparse< Real, Device, Index > BaseType; typedef typename BaseType::MatrixRow MatrixRow; typedef SparseRow< const RealType, const IndexType > ConstMatrixRow; using ConstMatrixRow = typename BaseType::ConstMatrixRow; //using typename BaseType::ConstMatrixRow; //typedef SparseRow< const RealType, const IndexType > ConstMatrixRow; enum SPMVCudaKernel { scalar, vector, hybrid }; Loading src/TNL/Matrices/CSR_impl.h +38 −3 Original line number Diff line number Diff line Loading @@ -131,13 +131,38 @@ Index CSR< Real, Device, Index >::getRowLengthFast( const IndexType row ) const return this->rowPointers[ row + 1 ] - this->rowPointers[ row ]; } // TODO: presunout do SparseRow template< typename MatrixRow > __global__ void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result ) { int threadId = blockIdx.x * blockDim.x + threadIdx.x; if( threadId == 0 ) { result = row->getNonZeroElementsCount(); } } template< typename Real, typename Device, typename Index > Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const { if( std::is_same< DeviceType, Devices::Host >::value ) { ConstMatrixRow matrixRow = this->getRow( row ); return matrixRow.getNonZeroElementsCount( TNL::String( Device::getDeviceType() ) ); return matrixRow.getNonZeroElementsCount(); } if( std::is_same< DeviceType, Devices::Cuda >::value ) { ConstMatrixRow matrixRow = this->getRow( row ); IndexType resultHost; IndexType* resultCuda = Devices::Cuda::passToDevice( resultHost ); getNonZeroRowLengthCudaKernel<<< 1, 1 >>>( row, &resultCuda ); resultHost = Devices::Cuda::passFromDevice( resultCuda ); Devices::Cuda::freeFromDevice( resultCuda ); return resultHost; } // getRow() was throwing segmentation faults. // FOR THIS TO WORK, I had to change getRow() from [ rowIndex ] to .getElement( rowIndex ). Loading @@ -159,6 +184,16 @@ Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) con // return elementCount; } template< typename Real, typename Device, typename Index > __cuda_callable__ Index CSR< Real, Device, Index >::getNonZeroRowLengthFast( const IndexType row ) const { ConstMatrixRow matrixRow = this->getRow( row ); return matrixRow.getNonZeroElementsCount(); } template< typename Real, typename Device, typename Index > Loading src/TNL/Matrices/Sparse.h +1 −0 Original line number Diff line number Diff line Loading @@ -30,6 +30,7 @@ class Sparse : public Matrix< Real, Device, Index > typedef Containers::Vector< IndexType, DeviceType, IndexType > ColumnIndexesVector; typedef Matrix< Real, Device, Index > BaseType; typedef SparseRow< RealType, IndexType > MatrixRow; typedef SparseRow< const RealType, const IndexType > ConstMatrixRow; Sparse(); Loading src/TNL/Matrices/SparseRow.h +4 −1 Original line number Diff line number Diff line Loading @@ -21,6 +21,9 @@ namespace Matrices { template< typename Real, typename Index > class SparseRow { using RealType = Real; using IndexType = Index; public: __cuda_callable__ Loading Loading @@ -53,7 +56,7 @@ class SparseRow Index getLength() const; __cuda_callable__ Index getNonZeroElementsCount( TNL::String deviceType ) const; Index getNonZeroElementsCount() const; void print( std::ostream& str ) const; Loading src/TNL/Matrices/SparseRow_impl.h +1 −1 Original line number Diff line number Diff line Loading @@ -116,7 +116,7 @@ template< typename Real, typename Index > __cuda_callable__ Index SparseRow< Real, Index >:: getNonZeroElementsCount( TNL::String deviceType ) const getNonZeroElementsCount() const { using NonConstIndex = typename std::remove_const< Index >::type; Loading Loading
src/TNL/Matrices/CSR.h +6 −2 Original line number Diff line number Diff line Loading @@ -41,7 +41,8 @@ private: public: typedef Real RealType; using RealType = Real; //typedef Real RealType; typedef Device DeviceType; typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >:: CompressedRowLengthsVector CompressedRowLengthsVector; Loading @@ -51,7 +52,10 @@ public: typedef CSR< Real, Devices::Cuda, Index > CudaType; typedef Sparse< Real, Device, Index > BaseType; typedef typename BaseType::MatrixRow MatrixRow; typedef SparseRow< const RealType, const IndexType > ConstMatrixRow; using ConstMatrixRow = typename BaseType::ConstMatrixRow; //using typename BaseType::ConstMatrixRow; //typedef SparseRow< const RealType, const IndexType > ConstMatrixRow; enum SPMVCudaKernel { scalar, vector, hybrid }; Loading
src/TNL/Matrices/CSR_impl.h +38 −3 Original line number Diff line number Diff line Loading @@ -131,13 +131,38 @@ Index CSR< Real, Device, Index >::getRowLengthFast( const IndexType row ) const return this->rowPointers[ row + 1 ] - this->rowPointers[ row ]; } // TODO: presunout do SparseRow template< typename MatrixRow > __global__ void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result ) { int threadId = blockIdx.x * blockDim.x + threadIdx.x; if( threadId == 0 ) { result = row->getNonZeroElementsCount(); } } template< typename Real, typename Device, typename Index > Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const { if( std::is_same< DeviceType, Devices::Host >::value ) { ConstMatrixRow matrixRow = this->getRow( row ); return matrixRow.getNonZeroElementsCount( TNL::String( Device::getDeviceType() ) ); return matrixRow.getNonZeroElementsCount(); } if( std::is_same< DeviceType, Devices::Cuda >::value ) { ConstMatrixRow matrixRow = this->getRow( row ); IndexType resultHost; IndexType* resultCuda = Devices::Cuda::passToDevice( resultHost ); getNonZeroRowLengthCudaKernel<<< 1, 1 >>>( row, &resultCuda ); resultHost = Devices::Cuda::passFromDevice( resultCuda ); Devices::Cuda::freeFromDevice( resultCuda ); return resultHost; } // getRow() was throwing segmentation faults. // FOR THIS TO WORK, I had to change getRow() from [ rowIndex ] to .getElement( rowIndex ). Loading @@ -159,6 +184,16 @@ Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) con // return elementCount; } template< typename Real, typename Device, typename Index > __cuda_callable__ Index CSR< Real, Device, Index >::getNonZeroRowLengthFast( const IndexType row ) const { ConstMatrixRow matrixRow = this->getRow( row ); return matrixRow.getNonZeroElementsCount(); } template< typename Real, typename Device, typename Index > Loading
src/TNL/Matrices/Sparse.h +1 −0 Original line number Diff line number Diff line Loading @@ -30,6 +30,7 @@ class Sparse : public Matrix< Real, Device, Index > typedef Containers::Vector< IndexType, DeviceType, IndexType > ColumnIndexesVector; typedef Matrix< Real, Device, Index > BaseType; typedef SparseRow< RealType, IndexType > MatrixRow; typedef SparseRow< const RealType, const IndexType > ConstMatrixRow; Sparse(); Loading
src/TNL/Matrices/SparseRow.h +4 −1 Original line number Diff line number Diff line Loading @@ -21,6 +21,9 @@ namespace Matrices { template< typename Real, typename Index > class SparseRow { using RealType = Real; using IndexType = Index; public: __cuda_callable__ Loading Loading @@ -53,7 +56,7 @@ class SparseRow Index getLength() const; __cuda_callable__ Index getNonZeroElementsCount( TNL::String deviceType ) const; Index getNonZeroElementsCount() const; void print( std::ostream& str ) const; Loading
src/TNL/Matrices/SparseRow_impl.h +1 −1 Original line number Diff line number Diff line Loading @@ -116,7 +116,7 @@ template< typename Real, typename Index > __cuda_callable__ Index SparseRow< Real, Index >:: getNonZeroElementsCount( TNL::String deviceType ) const getNonZeroElementsCount() const { using NonConstIndex = typename std::remove_const< Index >::type; Loading