diff --git a/src/Benchmarks/SpMV/tnlCusparseCSRMatrix.h b/src/Benchmarks/SpMV/tnlCusparseCSRMatrix.h index cfb271d7d7c4090cb6fcfcb67766d0115757d1d9..8f6d376fe27ebed3cd67307bf8f24ea2c5d630d4 100644 --- a/src/Benchmarks/SpMV/tnlCusparseCSRMatrix.h +++ b/src/Benchmarks/SpMV/tnlCusparseCSRMatrix.h @@ -60,7 +60,7 @@ class CusparseCSRBase void vectorProduct( const InVector& inVector, OutVector& outVector ) const { - TNL_ASSERT( matrix, ); + TNL_ASSERT_TRUE( matrix, "matrix was not initialized" ); #ifdef HAVE_CUDA cusparseDcsrmv( *( this->cusparseHandle ), CUSPARSE_OPERATION_NON_TRANSPOSE, @@ -103,7 +103,7 @@ class CusparseCSR< double > : public CusparseCSRBase< double > void vectorProduct( const InVector& inVector, OutVector& outVector ) const { - TNL_ASSERT( matrix, "" ); + TNL_ASSERT_TRUE( matrix, "matrix was not initialized" ); #ifdef HAVE_CUDA double d = 1.0; double* alpha = &d; @@ -134,7 +134,7 @@ class CusparseCSR< float > : public CusparseCSRBase< float > void vectorProduct( const InVector& inVector, OutVector& outVector ) const { - TNL_ASSERT( matrix, "" ); + TNL_ASSERT_TRUE( matrix, "matrix was not initialized" ); #ifdef HAVE_CUDA float d = 1.0; float* alpha = &d; diff --git a/src/TNL/Matrices/AdEllpack_impl.h b/src/TNL/Matrices/AdEllpack_impl.h index b70087b8d956de7d53603e2d8b72d4db064cb46b..daab3b8cdc351459120436186cb3060aeee54d8f 100644 --- a/src/TNL/Matrices/AdEllpack_impl.h +++ b/src/TNL/Matrices/AdEllpack_impl.h @@ -318,6 +318,8 @@ Index AdEllpack< Real, Device, Index >::getWarp( const IndexType row ) const ( ( this->rowOffset.getElement( searchedWarp ) < row ) && ( this->rowOffset.getElement( searchedWarp + 1 ) >= row ) ) ) return searchedWarp; } + // FIXME: non-void function always has to return something sensible + throw "bug - row was not found"; } template< typename Real, @@ -474,7 +476,6 @@ bool AdEllpack< Real, Device, Index >::setRow( const IndexType row, warp++; bool found = false; - IndexType length = 0; IndexType elementPtr; IndexType elPtr = 0; while( ( !found ) && ( elPtr < elements ) ) diff --git a/src/TNL/Matrices/BiEllpackSymmetric_impl.h b/src/TNL/Matrices/BiEllpackSymmetric_impl.h index 4a8ad24701334e2a0609190874e1871096c784cd..5b6f94b57b571963f5ba18c3c4c7e8fb7700fa99 100644 --- a/src/TNL/Matrices/BiEllpackSymmetric_impl.h +++ b/src/TNL/Matrices/BiEllpackSymmetric_impl.h @@ -149,7 +149,7 @@ Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getNumberOfGroups( c << " this->getName() = " << std::endl ); IndexType strip = row / this->warpSize; - IndexType rowStripPermutation = this->rowPermArray.getElement( row ) - this->warpSize * strip; + IndexType rowStripPermutation = this->rowPermArray[ row ] - this->warpSize * strip; IndexType numberOfGroups = this->logWarpSize + 1; IndexType bisection = 1; for( IndexType i = 0; i < this->logWarpSize + 1; i++ ) @@ -158,6 +158,12 @@ Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getNumberOfGroups( c return ( numberOfGroups - i ); bisection *= 2; } + // FIXME: non-void function always has to return something sensible +#ifndef __CUDA_ARCH__ + throw "bug - row was not found"; +#else + TNL_ASSERT_TRUE( false, "bug - row was not found" ); +#endif } template< typename Real, @@ -390,6 +396,7 @@ bool BiEllpackSymmetric< Real, Device, Index, StripSize >::setRow( const IndexTy } if( thisElementPtr == numberOfElements ) return true; + return false; } template< typename Real, @@ -576,8 +583,8 @@ __cuda_callable__ Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getGroupLength( const Index strip, const Index group ) const { - return this->groupPointers.getElement( strip * ( this->logWarpSize + 1 ) + group + 1 ) - - this->groupPointers.getElement( strip * ( this->logWarpSize + 1 ) + group ); + return this->groupPointers[ strip * ( this->logWarpSize + 1 ) + group + 1 ] + - this->groupPointers[ strip * ( this->logWarpSize + 1 ) + group ]; } template< typename Real, diff --git a/src/TNL/Matrices/BiEllpack_impl.h b/src/TNL/Matrices/BiEllpack_impl.h index b8533d62077285549564585569949edd662371d7..ea5e1efb9463915ec724b28b38abbfe64ac596b0 100644 --- a/src/TNL/Matrices/BiEllpack_impl.h +++ b/src/TNL/Matrices/BiEllpack_impl.h @@ -154,7 +154,7 @@ Index BiEllpack< Real, Device, Index, StripSize >::getNumberOfGroups( const Inde << " this->getName() = " << std::endl; ); IndexType strip = row / this->warpSize; - IndexType rowStripPermutation = this->rowPermArray.getElement( row ) - this->warpSize * strip; + IndexType rowStripPermutation = this->rowPermArray[ row ] - this->warpSize * strip; IndexType numberOfGroups = this->logWarpSize + 1; IndexType bisection = 1; for( IndexType i = 0; i < this->logWarpSize + 1; i++ ) @@ -163,6 +163,12 @@ Index BiEllpack< Real, Device, Index, StripSize >::getNumberOfGroups( const Inde return ( numberOfGroups - i ); bisection *= 2; } + // FIXME: non-void function always has to return something sensible +#ifndef __CUDA_ARCH__ + throw "bug - row was not found"; +#else + TNL_ASSERT_TRUE( false, "bug - row was not found" ); +#endif } template< typename Real, @@ -397,6 +403,7 @@ setRow( const IndexType row, } if( thisElementPtr == numberOfElements ) return true; + return false; } template< typename Real, @@ -581,8 +588,8 @@ __cuda_callable__ Index BiEllpack< Real, Device, Index, StripSize >::getGroupLength( const Index strip, const Index group ) const { - return this->groupPointers.getElement( strip * ( this->logWarpSize + 1 ) + group + 1 ) - - this->groupPointers.getElement( strip * ( this->logWarpSize + 1 ) + group ); + return this->groupPointers[ strip * ( this->logWarpSize + 1 ) + group + 1 ] + - this->groupPointers[ strip * ( this->logWarpSize + 1 ) + group ]; } template< typename Real, diff --git a/src/TNL/Matrices/EllpackSymmetricGraph.h b/src/TNL/Matrices/EllpackSymmetricGraph.h index 26a5e18368182f9fda86cba2e00fe0341c2a6e07..3a282c796be439209184023fb76aa692ff1e4294 100644 --- a/src/TNL/Matrices/EllpackSymmetricGraph.h +++ b/src/TNL/Matrices/EllpackSymmetricGraph.h @@ -153,10 +153,8 @@ class EllpackSymmetricGraph : public Sparse< Real, Device, Index > const int color ) const; #endif - __cuda_callable__ void computePermutationArray(); - __cuda_callable__ bool rearrangeMatrix( bool verbose ); bool save( File& file ) const; @@ -182,21 +180,20 @@ class EllpackSymmetricGraph : public Sparse< Real, Device, Index > __cuda_callable__ Index getRowsOfColor( IndexType color ) const; - __cuda_callable__ void copyFromHostToCuda( EllpackSymmetricGraph< Real, Devices::Host, Index >& matrix ); __cuda_callable__ - Containers::Vector< Index, Device, Index > getPermutationArray(); + Containers::Vector< Index, Device, Index >& getPermutationArray(); __cuda_callable__ - Containers::Vector< Index, Device, Index > getInversePermutation(); + Containers::Vector< Index, Device, Index >& getInversePermutation(); __cuda_callable__ - Containers::Vector< Index, Device, Index > getColorPointers(); + Containers::Vector< Index, Device, Index >& getColorPointers(); protected: - bool allocateElements(); + void allocateElements(); IndexType rowLengths, alignedRows; diff --git a/src/TNL/Matrices/EllpackSymmetricGraph_impl.h b/src/TNL/Matrices/EllpackSymmetricGraph_impl.h index 9d04b6b2255fd9c9293c9d81358ac2b214824b33..799d07281e62ae441c39162ee2fc5271997effac 100644 --- a/src/TNL/Matrices/EllpackSymmetricGraph_impl.h +++ b/src/TNL/Matrices/EllpackSymmetricGraph_impl.h @@ -102,7 +102,7 @@ template< typename Real, __cuda_callable__ Index EllpackSymmetricGraph< Real, Device, Index >::getRowsOfColor( IndexType color ) const { - return this->colorPointers.getElement( color + 1 ) - this->colorPointers.getElement( color ); + return this->colorPointers[ color + 1 ] - this->colorPointers[ color ]; } /* @@ -174,7 +174,6 @@ void EllpackSymmetricGraph< Real, Device, Index >::computeColorsVector( Containe template< typename Real, typename Device, typename Index > -__cuda_callable__ void EllpackSymmetricGraph< Real, Device, Index >::computePermutationArray() { // init vector of colors and permutation array @@ -238,7 +237,6 @@ void EllpackSymmetricGraph< Real, Device, Index >::verifyPermutationArray() template< typename Real, typename Device, typename Index > -__cuda_callable__ bool EllpackSymmetricGraph< Real, Device, Index >::rearrangeMatrix( bool verbose ) { // first we need to know permutation @@ -296,7 +294,8 @@ template< typename Real, typename Device, typename Index > __cuda_callable__ -Containers::Vector< Index, Device, Index > EllpackSymmetricGraph< Real, Device, Index >::getPermutationArray() +Containers::Vector< Index, Device, Index >& +EllpackSymmetricGraph< Real, Device, Index >::getPermutationArray() { return this->permutationArray; } @@ -305,7 +304,8 @@ template< typename Real, typename Device, typename Index > __cuda_callable__ -Containers::Vector< Index, Device, Index > EllpackSymmetricGraph< Real, Device, Index >::getInversePermutation() +Containers::Vector< Index, Device, Index >& +EllpackSymmetricGraph< Real, Device, Index >::getInversePermutation() { return this->inversePermutationArray; } @@ -314,7 +314,8 @@ template< typename Real, typename Device, typename Index > __cuda_callable__ -Containers::Vector< Index, Device, Index > EllpackSymmetricGraph< Real, Device, Index >::getColorPointers() +Containers::Vector< Index, Device, Index >& +EllpackSymmetricGraph< Real, Device, Index >::getColorPointers() { return this->colorPointers; } @@ -322,7 +323,6 @@ Containers::Vector< Index, Device, Index > EllpackSymmetricGraph< Real, Device, template< typename Real, typename Device, typename Index > -__cuda_callable__ void EllpackSymmetricGraph< Real, Device, Index >::copyFromHostToCuda( EllpackSymmetricGraph< Real, Devices::Host, Index >& matrix ) { // TODO: fix @@ -331,17 +331,17 @@ void EllpackSymmetricGraph< Real, Device, Index >::copyFromHostToCuda( EllpackSy this->rearranged = true; this->rowLengths = matrix.getRowLengthsInt(); this->alignedRows = matrix.getAlignedRows(); - Containers::Vector< Index, Devices::Host, Index > colorPointers = matrix.getColorPointers(); + Containers::Vector< Index, Devices::Host, Index >& colorPointers = matrix.getColorPointers(); this->colorPointers.setSize( colorPointers.getSize() ); for( IndexType i = 0; i < colorPointers.getSize(); i++ ) this->colorPointers.setElement( i, colorPointers[ i ] ); - Containers::Vector< Index,Devices::Host, Index > permutationArray = matrix.getPermutationArray(); + Containers::Vector< Index,Devices::Host, Index >& permutationArray = matrix.getPermutationArray(); this->permutationArray.setSize( permutationArray.getSize() ); for( IndexType i = 0; i < permutationArray.getSize(); i++ ) this->permutationArray.setElement( i, permutationArray[ i ] ); - Containers::Vector< Index, Devices::Host, Index > inversePermutation = matrix.getInversePermutation(); + Containers::Vector< Index, Devices::Host, Index >& inversePermutation = matrix.getInversePermutation(); this->inversePermutationArray.setSize( inversePermutation.getSize() ); for( IndexType i = 0; i < inversePermutation.getSize(); i++ ) this->inversePermutationArray.setElement( i, inversePermutation[ i ] ); @@ -363,7 +363,7 @@ bool EllpackSymmetricGraph< Real, Device, Index >::setConstantRowLengths( const TNL_ASSERT( rowLengths > 0, std::cerr << " rowLengths = " << rowLengths ); this->rowLengths = rowLengths; if( this->rows > 0 ) - return allocateElements(); + allocateElements(); return true; } @@ -445,37 +445,37 @@ bool EllpackSymmetricGraph< Real, Device, Index > :: addElementFast( const Index const RealType& thisElementMultiplicator ) { typedef EllpackSymmetricGraphDeviceDependentCode< DeviceType > DDCType; - IndexType i = DDCType::getRowBegin( *this, this->permutationArray.getElement( row ) ); - const IndexType rowEnd = DDCType::getRowEnd( *this, this->permutationArray.getElement( row ) ); + IndexType i = DDCType::getRowBegin( *this, this->permutationArray[ row ] ); + const IndexType rowEnd = DDCType::getRowEnd( *this, this->permutationArray[ row ] ); const IndexType step = DDCType::getElementStep( *this ); while( i < rowEnd && - this->columnIndexes.getElement( i ) < column && - this->columnIndexes.getElement( i ) != this->getPaddingIndex() ) i += step; + this->columnIndexes[ i ] < column && + this->columnIndexes[ i ] != this->getPaddingIndex() ) i += step; if( i == rowEnd ) return false; - if( this->columnIndexes.getElement( i ) == column ) + if( this->columnIndexes[ i ] == column ) { - this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value); + this->values[ i ] = thisElementMultiplicator * this->values[ i ] + value; return true; } else - if( this->columnIndexes.getElement( i ) == this->getPaddingIndex() ) // artificial zero + if( this->columnIndexes[ i ] == this->getPaddingIndex() ) // artificial zero { - this->columnIndexes.setElement( i, column); - this->values.setElement( i, value); + this->columnIndexes[ i ] = column; + this->values[ i ] = value; } else { Index j = rowEnd - step; while( j > i ) { - this->columnIndexes.setElement( j, this->columnIndexes.getElement( j - step ) ); - this->values.setElement( j, this->values.getElement( j - step ) ); + this->columnIndexes[ j ] = this->columnIndexes[ j - step ]; + this->values[ j ] = this->values[ j - step ]; j -= step; } - this->columnIndexes.setElement( i, column ); - this->values.setElement( i, value ); + this->columnIndexes[ i ] = column; + this->values[ i ] = value; } return true; } @@ -783,6 +783,7 @@ bool EllpackSymmetricGraph< Real, Device, Index >::help( bool verbose ) { if( !this->rearranged ) return this->rearrangeMatrix( verbose ); + return true; } template< typename Real, @@ -810,7 +811,7 @@ void EllpackSymmetricGraph< Real, Device, Index >::print( std::ostream& str ) co template< typename Real, typename Device, typename Index > -bool EllpackSymmetricGraph< Real, Device, Index >::allocateElements() +void EllpackSymmetricGraph< Real, Device, Index >::allocateElements() { Sparse< Real, Device, Index >::allocateMatrixElements( this->alignedRows * this->rowLengths ); } diff --git a/src/TNL/Matrices/EllpackSymmetric_impl.h b/src/TNL/Matrices/EllpackSymmetric_impl.h index af3a645200027d08c429986849ee594fb0de2f33..42202a883c0c887317aaf68904dfa0bddf27a646 100644 --- a/src/TNL/Matrices/EllpackSymmetric_impl.h +++ b/src/TNL/Matrices/EllpackSymmetric_impl.h @@ -85,7 +85,7 @@ bool EllpackSymmetric< Real, Device, Index >::setConstantRowLengths( const Index std::cerr << " rowLengths = " << rowLengths ); this->rowLengths = rowLengths; if( this->rows > 0 ) - return allocateElements(); + allocateElements(); return true; } diff --git a/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h b/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h index 4cc04436b125004544032013639e1b919f6bec9d..a2ab000957227a88a9655888cb417cea6498f56f 100644 --- a/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h +++ b/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h @@ -195,10 +195,8 @@ class SlicedEllpackSymmetricGraph : public Sparse< Real, Device, Index > void copyFromHostToCuda( SlicedEllpackSymmetricGraph< Real, Devices::Host, Index, SliceSize >& matrix ); - __cuda_callable__ bool rearrangeMatrix( bool verbose = false ); - __cuda_callable__ void computePermutationArray(); Containers::Vector< Index, Device, Index > getSlicePointers(); diff --git a/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h b/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h index d5ad42534869944b4bd11dadca31c1271a839241..9f09a21c506bd08110bc8360e17d3bf1f3a4097c 100644 --- a/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h +++ b/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h @@ -701,7 +701,6 @@ Index SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >::getRealRowL Index rowBegin = slicePointer + rowLength * ( row - sliceIdx * SliceSize ); Index rowEnd = rowBegin + rowLength; - Index step = 1; Index length = 0; for( Index i = rowBegin; i < rowEnd; i++ ) if( this->columnIndexes.getElement( i ) != this->getPaddingIndex() ) @@ -764,7 +763,6 @@ bool SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >::rearrangeMat for( IndexType row = slice * SliceSize; row < (slice + 1) * SliceSize && row < this->getRows(); row++ ) { IndexType rowBegin = slicePointerOrig + rowLengthOrig * ( row - slice * SliceSize ); - IndexType rowEnd = rowBegin + rowLengthOrig; IndexType elementPointer = rowBegin; IndexType sliceNew = this->permutationArray.getElement( row ) / SliceSize; @@ -1040,7 +1038,7 @@ class SlicedEllpackSymmetricGraphDeviceDependentCode< Devices::Host > template< typename Real, typename Index, int SliceSize > - static bool computeMaximalRowLengthInSlices( SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >& matrix, + static void computeMaximalRowLengthInSlices( SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >& matrix, typename SlicedEllpackSymmetricGraph< Real, Device, Index >::ConstCompressedRowLengthsVectorView rowLengths, Containers::Vector< Index, Device, Index >& sliceRowLengths, Containers::Vector< Index, Device, Index >& slicePointers ) @@ -1233,7 +1231,7 @@ class SlicedEllpackSymmetricGraphDeviceDependentCode< Devices::Cuda > template< typename Real, typename Index, int SliceSize > - static bool computeMaximalRowLengthInSlices( SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >& matrix, + static void computeMaximalRowLengthInSlices( SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >& matrix, typename SlicedEllpackSymmetricGraph< Real, Device, Index >::ConstCompressedRowLengthsVectorView rowLengths, Containers::Vector< Index, Device, Index >& sliceRowLengths, Containers::Vector< Index, Device, Index >& slicePointers ) diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h index 905c42080c814b21db156b79341478787cf289df..402ac5a6c8128ab67bbf2393528fb3a2b58b9077 100644 --- a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h +++ b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h @@ -75,7 +75,7 @@ void SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::setCompressedRowL // TODO: Uncomment the next line and fix the compilation //DeviceDependentCode::computeMaximalRowLengthInSlices( *this, rowLengths ); - TNL_ASSERT( false, "code fix required" ); + throw std::runtime_error("code fix required"); this->maxRowLength = rowLengths.max(); @@ -743,6 +743,7 @@ class SlicedEllpackSymmetricDeviceDependentCode< Devices::Host > template< typename Real, typename Index, int SliceSize > + __cuda_callable__ static void initRowTraverseFast( const SlicedEllpackSymmetric< Real, Device, Index, SliceSize >& matrix, const Index row, Index& rowBegin, @@ -762,7 +763,7 @@ class SlicedEllpackSymmetricDeviceDependentCode< Devices::Host > template< typename Real, typename Index, int SliceSize > - static bool computeMaximalRowLengthInSlices( SlicedEllpackSymmetric< Real, Device, Index, SliceSize >& matrix, + static void computeMaximalRowLengthInSlices( SlicedEllpackSymmetric< Real, Device, Index, SliceSize >& matrix, typename SlicedEllpackSymmetric< Real, Device, Index >::ConstCompressedRowLengthsVectorView rowLengths ) { Index row( 0 ), slice( 0 ), sliceRowLength( 0 ); @@ -862,7 +863,7 @@ class SlicedEllpackSymmetricDeviceDependentCode< Devices::Cuda > template< typename Real, typename Index, int SliceSize > - static bool computeMaximalRowLengthInSlices( SlicedEllpackSymmetric< Real, Device, Index, SliceSize >& matrix, + static void computeMaximalRowLengthInSlices( SlicedEllpackSymmetric< Real, Device, Index, SliceSize >& matrix, typename SlicedEllpackSymmetric< Real, Device, Index >::ConstCompressedRowLengthsVectorView rowLengths ) { #ifdef HAVE_CUDA