Loading src/TNL/Matrices/CSR_impl.h +35 −8 Original line number Diff line number Diff line Loading @@ -136,8 +136,28 @@ template< typename Real, typename Index > Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const { ConstMatrixRow matrixRow = getRow( row ); return matrixRow.getNonZeroElementsCount(); ConstMatrixRow matrixRow = this->getRow( row ); IndexType count = matrixRow.getNonZeroElementsCount( TNL::String( Device::getDeviceType() ) ); // return count; // getRow() was throwing segmentation faults. // FOR THIS TO WORK, I had to change getRow() from [ rowIndex ] to .getElement( rowIndex ). // THE FOLLOWING throws: /home/lukas/tnl-dev/src/TNL/ParallelFor.h(92): error: identifier "" is undefined in device code // static IndexType elementCount ( 0 ); // ConstMatrixRow matrixRow = this->getRow( row ); // // elementCount = 0; // Make sure it is reset. Without this seemingly useless step, it returned incorrect values. // // auto computeNonZeros = [matrixRow] __cuda_callable__ ( IndexType i ) mutable // { // if( matrixRow.getElementValue( i ) != 0.0 ) // elementCount++; // }; // // ParallelFor< DeviceType >::exec( (IndexType) 0, matrixRow.getLength(), computeNonZeros ); // // return elementCount; } template< typename Real, Loading Loading @@ -439,12 +459,19 @@ typename CSR< Real, Device, Index >::ConstMatrixRow CSR< Real, Device, Index >:: getRow( const IndexType rowIndex ) const { const IndexType rowOffset = this->rowPointers[ rowIndex ]; const IndexType rowLength = this->rowPointers[ rowIndex + 1 ] - rowOffset; const IndexType rowOffset = this->rowPointers.getElement( rowIndex ); const IndexType rowLength = this->rowPointers.getElement( rowIndex + 1 ) - rowOffset; return ConstMatrixRow( &this->columnIndexes[ rowOffset ], &this->values[ rowOffset ], rowLength, 1 ); // const IndexType rowOffset = this->rowPointers[ rowIndex ]; // const IndexType rowLength = this->rowPointers[ rowIndex + 1 ] - rowOffset; // return ConstMatrixRow( &this->columnIndexes[ rowOffset ], // &this->values[ rowOffset ], // rowLength, // 1 ); } template< typename Real, Loading src/TNL/Matrices/ChunkedEllpack_impl.h +18 −3 Original line number Diff line number Diff line Loading @@ -198,8 +198,8 @@ bool ChunkedEllpack< Real, Device, Index >::setSlice( ConstCompressedRowLengthsV // will be zero and make the assertion fail ( https://stackoverflow.com/questions/33273359/in-c-using-the-ceil-a-division-is-not-working ). // To fix this, typecast them to ( float ), instead of ( RealType ) maxChunkInSlice = max( maxChunkInSlice, ceil( ( float ) rowLengths[ i ] / ( float ) this->rowToChunkMapping[ i ] ) ); ceil( ( double ) rowLengths[ i ] / ( double ) this->rowToChunkMapping[ i ] ) ); } TNL_ASSERT( maxChunkInSlice > 0, std::cerr << " maxChunkInSlice = " << maxChunkInSlice << std::endl ); Loading Loading @@ -314,7 +314,22 @@ template< typename Real, Index ChunkedEllpack< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const { ConstMatrixRow matrixRow = getRow( row ); return matrixRow.getNonZeroElementsCount(); return matrixRow.getNonZeroElementsCount( Device::getDeviceType() ); // IndexType elementCount ( 0 ); // ConstMatrixRow matrixRow = this->getRow( row ); // // auto computeNonZeros = [&] /*__cuda_callable__*/ ( IndexType i ) mutable // { // std::cout << "matrixRow.getElementValue( i ) = " << matrixRow.getElementValue( i ) << " != 0.0" << std::endl; // if( matrixRow.getElementValue( i ) != 0.0 ) // elementCount++; // // std::cout << "End of lambda elementCount = " << elementCount << std::endl; // }; // // ParallelFor< DeviceType >::exec( ( IndexType ) 0, matrixRow.getLength(), computeNonZeros ); // return elementCount; } template< typename Real, Loading src/TNL/Matrices/Ellpack_impl.h +1 −1 Original line number Diff line number Diff line Loading @@ -129,7 +129,7 @@ template< typename Real, Index Ellpack< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const { ConstMatrixRow matrixRow = getRow( row ); return matrixRow.getNonZeroElementsCount(); return matrixRow.getNonZeroElementsCount( Device::getDeviceType() ); } template< typename Real, Loading src/TNL/Matrices/SlicedEllpack_impl.h +1 −1 Original line number Diff line number Diff line Loading @@ -128,7 +128,7 @@ template< typename Real, Index SlicedEllpack< Real, Device, Index, SliceSize >::getNonZeroRowLength( const IndexType row ) const { ConstMatrixRow matrixRow = getRow( row ); return matrixRow.getNonZeroElementsCount(); return matrixRow.getNonZeroElementsCount( Device::getDeviceType() ); } template< typename Real, Loading src/TNL/Matrices/SparseRow.h +1 −1 Original line number Diff line number Diff line Loading @@ -53,7 +53,7 @@ class SparseRow Index getLength() const; __cuda_callable__ Index getNonZeroElementsCount() const; Index getNonZeroElementsCount( TNL::String deviceType ) const; void print( std::ostream& str ) const; Loading Loading
src/TNL/Matrices/CSR_impl.h +35 −8 Original line number Diff line number Diff line Loading @@ -136,8 +136,28 @@ template< typename Real, typename Index > Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const { ConstMatrixRow matrixRow = getRow( row ); return matrixRow.getNonZeroElementsCount(); ConstMatrixRow matrixRow = this->getRow( row ); IndexType count = matrixRow.getNonZeroElementsCount( TNL::String( Device::getDeviceType() ) ); // return count; // getRow() was throwing segmentation faults. // FOR THIS TO WORK, I had to change getRow() from [ rowIndex ] to .getElement( rowIndex ). // THE FOLLOWING throws: /home/lukas/tnl-dev/src/TNL/ParallelFor.h(92): error: identifier "" is undefined in device code // static IndexType elementCount ( 0 ); // ConstMatrixRow matrixRow = this->getRow( row ); // // elementCount = 0; // Make sure it is reset. Without this seemingly useless step, it returned incorrect values. // // auto computeNonZeros = [matrixRow] __cuda_callable__ ( IndexType i ) mutable // { // if( matrixRow.getElementValue( i ) != 0.0 ) // elementCount++; // }; // // ParallelFor< DeviceType >::exec( (IndexType) 0, matrixRow.getLength(), computeNonZeros ); // // return elementCount; } template< typename Real, Loading Loading @@ -439,12 +459,19 @@ typename CSR< Real, Device, Index >::ConstMatrixRow CSR< Real, Device, Index >:: getRow( const IndexType rowIndex ) const { const IndexType rowOffset = this->rowPointers[ rowIndex ]; const IndexType rowLength = this->rowPointers[ rowIndex + 1 ] - rowOffset; const IndexType rowOffset = this->rowPointers.getElement( rowIndex ); const IndexType rowLength = this->rowPointers.getElement( rowIndex + 1 ) - rowOffset; return ConstMatrixRow( &this->columnIndexes[ rowOffset ], &this->values[ rowOffset ], rowLength, 1 ); // const IndexType rowOffset = this->rowPointers[ rowIndex ]; // const IndexType rowLength = this->rowPointers[ rowIndex + 1 ] - rowOffset; // return ConstMatrixRow( &this->columnIndexes[ rowOffset ], // &this->values[ rowOffset ], // rowLength, // 1 ); } template< typename Real, Loading
src/TNL/Matrices/ChunkedEllpack_impl.h +18 −3 Original line number Diff line number Diff line Loading @@ -198,8 +198,8 @@ bool ChunkedEllpack< Real, Device, Index >::setSlice( ConstCompressedRowLengthsV // will be zero and make the assertion fail ( https://stackoverflow.com/questions/33273359/in-c-using-the-ceil-a-division-is-not-working ). // To fix this, typecast them to ( float ), instead of ( RealType ) maxChunkInSlice = max( maxChunkInSlice, ceil( ( float ) rowLengths[ i ] / ( float ) this->rowToChunkMapping[ i ] ) ); ceil( ( double ) rowLengths[ i ] / ( double ) this->rowToChunkMapping[ i ] ) ); } TNL_ASSERT( maxChunkInSlice > 0, std::cerr << " maxChunkInSlice = " << maxChunkInSlice << std::endl ); Loading Loading @@ -314,7 +314,22 @@ template< typename Real, Index ChunkedEllpack< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const { ConstMatrixRow matrixRow = getRow( row ); return matrixRow.getNonZeroElementsCount(); return matrixRow.getNonZeroElementsCount( Device::getDeviceType() ); // IndexType elementCount ( 0 ); // ConstMatrixRow matrixRow = this->getRow( row ); // // auto computeNonZeros = [&] /*__cuda_callable__*/ ( IndexType i ) mutable // { // std::cout << "matrixRow.getElementValue( i ) = " << matrixRow.getElementValue( i ) << " != 0.0" << std::endl; // if( matrixRow.getElementValue( i ) != 0.0 ) // elementCount++; // // std::cout << "End of lambda elementCount = " << elementCount << std::endl; // }; // // ParallelFor< DeviceType >::exec( ( IndexType ) 0, matrixRow.getLength(), computeNonZeros ); // return elementCount; } template< typename Real, Loading
src/TNL/Matrices/Ellpack_impl.h +1 −1 Original line number Diff line number Diff line Loading @@ -129,7 +129,7 @@ template< typename Real, Index Ellpack< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const { ConstMatrixRow matrixRow = getRow( row ); return matrixRow.getNonZeroElementsCount(); return matrixRow.getNonZeroElementsCount( Device::getDeviceType() ); } template< typename Real, Loading
src/TNL/Matrices/SlicedEllpack_impl.h +1 −1 Original line number Diff line number Diff line Loading @@ -128,7 +128,7 @@ template< typename Real, Index SlicedEllpack< Real, Device, Index, SliceSize >::getNonZeroRowLength( const IndexType row ) const { ConstMatrixRow matrixRow = getRow( row ); return matrixRow.getNonZeroElementsCount(); return matrixRow.getNonZeroElementsCount( Device::getDeviceType() ); } template< typename Real, Loading
src/TNL/Matrices/SparseRow.h +1 −1 Original line number Diff line number Diff line Loading @@ -53,7 +53,7 @@ class SparseRow Index getLength() const; __cuda_callable__ Index getNonZeroElementsCount() const; Index getNonZeroElementsCount( TNL::String deviceType ) const; void print( std::ostream& str ) const; Loading