Loading src/Benchmarks/SpMV/tnlCusparseCSRMatrix.h +3 −3 Original line number Diff line number Diff line Loading @@ -60,7 +60,7 @@ class CusparseCSRBase void vectorProduct( const InVector& inVector, OutVector& outVector ) const { TNL_ASSERT( matrix, ); TNL_ASSERT_TRUE( matrix, "matrix was not initialized" ); #ifdef HAVE_CUDA cusparseDcsrmv( *( this->cusparseHandle ), CUSPARSE_OPERATION_NON_TRANSPOSE, Loading Loading @@ -103,7 +103,7 @@ class CusparseCSR< double > : public CusparseCSRBase< double > void vectorProduct( const InVector& inVector, OutVector& outVector ) const { TNL_ASSERT( matrix, "" ); TNL_ASSERT_TRUE( matrix, "matrix was not initialized" ); #ifdef HAVE_CUDA double d = 1.0; double* alpha = &d; Loading Loading @@ -134,7 +134,7 @@ class CusparseCSR< float > : public CusparseCSRBase< float > void vectorProduct( const InVector& inVector, OutVector& outVector ) const { TNL_ASSERT( matrix, "" ); TNL_ASSERT_TRUE( matrix, "matrix was not initialized" ); #ifdef HAVE_CUDA float d = 1.0; float* alpha = &d; Loading src/TNL/Matrices/AdEllpack_impl.h +2 −1 Original line number Diff line number Diff line Loading @@ -318,6 +318,8 @@ Index AdEllpack< Real, Device, Index >::getWarp( const IndexType row ) const ( ( this->rowOffset.getElement( searchedWarp ) < row ) && ( this->rowOffset.getElement( searchedWarp + 1 ) >= row ) ) ) return searchedWarp; } // FIXME: non-void function always has to return something sensible throw "bug - row was not found"; } template< typename Real, Loading Loading @@ -474,7 +476,6 @@ bool AdEllpack< Real, Device, Index >::setRow( const IndexType row, warp++; bool found = false; IndexType length = 0; IndexType elementPtr; IndexType elPtr = 0; while( ( !found ) && ( elPtr < elements ) ) Loading src/TNL/Matrices/BiEllpackSymmetric_impl.h +10 −3 Original line number Diff line number Diff line Loading @@ -149,7 +149,7 @@ Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getNumberOfGroups( c << " this->getName() = " << std::endl ); IndexType strip = row / this->warpSize; IndexType rowStripPermutation = this->rowPermArray.getElement( row ) - this->warpSize * strip; IndexType rowStripPermutation = this->rowPermArray[ row ] - this->warpSize * strip; IndexType numberOfGroups = this->logWarpSize + 1; IndexType bisection = 1; for( IndexType i = 0; i < this->logWarpSize + 1; i++ ) Loading @@ -158,6 +158,12 @@ Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getNumberOfGroups( c return ( numberOfGroups - i ); bisection *= 2; } // FIXME: non-void function always has to return something sensible #ifndef __CUDA_ARCH__ throw "bug - row was not found"; #else TNL_ASSERT_TRUE( false, "bug - row was not found" ); #endif } template< typename Real, Loading Loading @@ -390,6 +396,7 @@ bool BiEllpackSymmetric< Real, Device, Index, StripSize >::setRow( const IndexTy } if( thisElementPtr == numberOfElements ) return true; return false; } template< typename Real, Loading Loading @@ -576,8 +583,8 @@ __cuda_callable__ Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getGroupLength( const Index strip, const Index group ) const { return this->groupPointers.getElement( strip * ( this->logWarpSize + 1 ) + group + 1 ) - this->groupPointers.getElement( strip * ( this->logWarpSize + 1 ) + group ); return this->groupPointers[ strip * ( this->logWarpSize + 1 ) + group + 1 ] - this->groupPointers[ strip * ( this->logWarpSize + 1 ) + group ]; } template< typename Real, Loading src/TNL/Matrices/BiEllpack_impl.h +10 −3 Original line number Diff line number Diff line Loading @@ -154,7 +154,7 @@ Index BiEllpack< Real, Device, Index, StripSize >::getNumberOfGroups( const Inde << " this->getName() = " << std::endl; ); IndexType strip = row / this->warpSize; IndexType rowStripPermutation = this->rowPermArray.getElement( row ) - this->warpSize * strip; IndexType rowStripPermutation = this->rowPermArray[ row ] - this->warpSize * strip; IndexType numberOfGroups = this->logWarpSize + 1; IndexType bisection = 1; for( IndexType i = 0; i < this->logWarpSize + 1; i++ ) Loading @@ -163,6 +163,12 @@ Index BiEllpack< Real, Device, Index, StripSize >::getNumberOfGroups( const Inde return ( numberOfGroups - i ); bisection *= 2; } // FIXME: non-void function always has to return something sensible #ifndef __CUDA_ARCH__ throw "bug - row was not found"; #else TNL_ASSERT_TRUE( false, "bug - row was not found" ); #endif } template< typename Real, Loading Loading @@ -397,6 +403,7 @@ setRow( const IndexType row, } if( thisElementPtr == numberOfElements ) return true; return false; } template< typename Real, Loading Loading @@ -581,8 +588,8 @@ __cuda_callable__ Index BiEllpack< Real, Device, Index, StripSize >::getGroupLength( const Index strip, const Index group ) const { return this->groupPointers.getElement( strip * ( this->logWarpSize + 1 ) + group + 1 ) - this->groupPointers.getElement( strip * ( this->logWarpSize + 1 ) + group ); return this->groupPointers[ strip * ( this->logWarpSize + 1 ) + group + 1 ] - this->groupPointers[ strip * ( this->logWarpSize + 1 ) + group ]; } template< typename Real, Loading src/TNL/Matrices/EllpackSymmetricGraph.h +4 −7 Original line number Diff line number Diff line Loading @@ -153,10 +153,8 @@ class EllpackSymmetricGraph : public Sparse< Real, Device, Index > const int color ) const; #endif __cuda_callable__ void computePermutationArray(); __cuda_callable__ bool rearrangeMatrix( bool verbose ); bool save( File& file ) const; Loading @@ -182,21 +180,20 @@ class EllpackSymmetricGraph : public Sparse< Real, Device, Index > __cuda_callable__ Index getRowsOfColor( IndexType color ) const; __cuda_callable__ void copyFromHostToCuda( EllpackSymmetricGraph< Real, Devices::Host, Index >& matrix ); __cuda_callable__ Containers::Vector< Index, Device, Index > getPermutationArray(); Containers::Vector< Index, Device, Index >& getPermutationArray(); __cuda_callable__ Containers::Vector< Index, Device, Index > getInversePermutation(); Containers::Vector< Index, Device, Index >& getInversePermutation(); __cuda_callable__ Containers::Vector< Index, Device, Index > getColorPointers(); Containers::Vector< Index, Device, Index >& getColorPointers(); protected: bool allocateElements(); void allocateElements(); IndexType rowLengths, alignedRows; Loading Loading
src/Benchmarks/SpMV/tnlCusparseCSRMatrix.h +3 −3 Original line number Diff line number Diff line Loading @@ -60,7 +60,7 @@ class CusparseCSRBase void vectorProduct( const InVector& inVector, OutVector& outVector ) const { TNL_ASSERT( matrix, ); TNL_ASSERT_TRUE( matrix, "matrix was not initialized" ); #ifdef HAVE_CUDA cusparseDcsrmv( *( this->cusparseHandle ), CUSPARSE_OPERATION_NON_TRANSPOSE, Loading Loading @@ -103,7 +103,7 @@ class CusparseCSR< double > : public CusparseCSRBase< double > void vectorProduct( const InVector& inVector, OutVector& outVector ) const { TNL_ASSERT( matrix, "" ); TNL_ASSERT_TRUE( matrix, "matrix was not initialized" ); #ifdef HAVE_CUDA double d = 1.0; double* alpha = &d; Loading Loading @@ -134,7 +134,7 @@ class CusparseCSR< float > : public CusparseCSRBase< float > void vectorProduct( const InVector& inVector, OutVector& outVector ) const { TNL_ASSERT( matrix, "" ); TNL_ASSERT_TRUE( matrix, "matrix was not initialized" ); #ifdef HAVE_CUDA float d = 1.0; float* alpha = &d; Loading
src/TNL/Matrices/AdEllpack_impl.h +2 −1 Original line number Diff line number Diff line Loading @@ -318,6 +318,8 @@ Index AdEllpack< Real, Device, Index >::getWarp( const IndexType row ) const ( ( this->rowOffset.getElement( searchedWarp ) < row ) && ( this->rowOffset.getElement( searchedWarp + 1 ) >= row ) ) ) return searchedWarp; } // FIXME: non-void function always has to return something sensible throw "bug - row was not found"; } template< typename Real, Loading Loading @@ -474,7 +476,6 @@ bool AdEllpack< Real, Device, Index >::setRow( const IndexType row, warp++; bool found = false; IndexType length = 0; IndexType elementPtr; IndexType elPtr = 0; while( ( !found ) && ( elPtr < elements ) ) Loading
src/TNL/Matrices/BiEllpackSymmetric_impl.h +10 −3 Original line number Diff line number Diff line Loading @@ -149,7 +149,7 @@ Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getNumberOfGroups( c << " this->getName() = " << std::endl ); IndexType strip = row / this->warpSize; IndexType rowStripPermutation = this->rowPermArray.getElement( row ) - this->warpSize * strip; IndexType rowStripPermutation = this->rowPermArray[ row ] - this->warpSize * strip; IndexType numberOfGroups = this->logWarpSize + 1; IndexType bisection = 1; for( IndexType i = 0; i < this->logWarpSize + 1; i++ ) Loading @@ -158,6 +158,12 @@ Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getNumberOfGroups( c return ( numberOfGroups - i ); bisection *= 2; } // FIXME: non-void function always has to return something sensible #ifndef __CUDA_ARCH__ throw "bug - row was not found"; #else TNL_ASSERT_TRUE( false, "bug - row was not found" ); #endif } template< typename Real, Loading Loading @@ -390,6 +396,7 @@ bool BiEllpackSymmetric< Real, Device, Index, StripSize >::setRow( const IndexTy } if( thisElementPtr == numberOfElements ) return true; return false; } template< typename Real, Loading Loading @@ -576,8 +583,8 @@ __cuda_callable__ Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getGroupLength( const Index strip, const Index group ) const { return this->groupPointers.getElement( strip * ( this->logWarpSize + 1 ) + group + 1 ) - this->groupPointers.getElement( strip * ( this->logWarpSize + 1 ) + group ); return this->groupPointers[ strip * ( this->logWarpSize + 1 ) + group + 1 ] - this->groupPointers[ strip * ( this->logWarpSize + 1 ) + group ]; } template< typename Real, Loading
src/TNL/Matrices/BiEllpack_impl.h +10 −3 Original line number Diff line number Diff line Loading @@ -154,7 +154,7 @@ Index BiEllpack< Real, Device, Index, StripSize >::getNumberOfGroups( const Inde << " this->getName() = " << std::endl; ); IndexType strip = row / this->warpSize; IndexType rowStripPermutation = this->rowPermArray.getElement( row ) - this->warpSize * strip; IndexType rowStripPermutation = this->rowPermArray[ row ] - this->warpSize * strip; IndexType numberOfGroups = this->logWarpSize + 1; IndexType bisection = 1; for( IndexType i = 0; i < this->logWarpSize + 1; i++ ) Loading @@ -163,6 +163,12 @@ Index BiEllpack< Real, Device, Index, StripSize >::getNumberOfGroups( const Inde return ( numberOfGroups - i ); bisection *= 2; } // FIXME: non-void function always has to return something sensible #ifndef __CUDA_ARCH__ throw "bug - row was not found"; #else TNL_ASSERT_TRUE( false, "bug - row was not found" ); #endif } template< typename Real, Loading Loading @@ -397,6 +403,7 @@ setRow( const IndexType row, } if( thisElementPtr == numberOfElements ) return true; return false; } template< typename Real, Loading Loading @@ -581,8 +588,8 @@ __cuda_callable__ Index BiEllpack< Real, Device, Index, StripSize >::getGroupLength( const Index strip, const Index group ) const { return this->groupPointers.getElement( strip * ( this->logWarpSize + 1 ) + group + 1 ) - this->groupPointers.getElement( strip * ( this->logWarpSize + 1 ) + group ); return this->groupPointers[ strip * ( this->logWarpSize + 1 ) + group + 1 ] - this->groupPointers[ strip * ( this->logWarpSize + 1 ) + group ]; } template< typename Real, Loading
src/TNL/Matrices/EllpackSymmetricGraph.h +4 −7 Original line number Diff line number Diff line Loading @@ -153,10 +153,8 @@ class EllpackSymmetricGraph : public Sparse< Real, Device, Index > const int color ) const; #endif __cuda_callable__ void computePermutationArray(); __cuda_callable__ bool rearrangeMatrix( bool verbose ); bool save( File& file ) const; Loading @@ -182,21 +180,20 @@ class EllpackSymmetricGraph : public Sparse< Real, Device, Index > __cuda_callable__ Index getRowsOfColor( IndexType color ) const; __cuda_callable__ void copyFromHostToCuda( EllpackSymmetricGraph< Real, Devices::Host, Index >& matrix ); __cuda_callable__ Containers::Vector< Index, Device, Index > getPermutationArray(); Containers::Vector< Index, Device, Index >& getPermutationArray(); __cuda_callable__ Containers::Vector< Index, Device, Index > getInversePermutation(); Containers::Vector< Index, Device, Index >& getInversePermutation(); __cuda_callable__ Containers::Vector< Index, Device, Index > getColorPointers(); Containers::Vector< Index, Device, Index >& getColorPointers(); protected: bool allocateElements(); void allocateElements(); IndexType rowLengths, alignedRows; Loading