diff --git a/src/TNL/Matrices/CSR.h b/src/TNL/Matrices/CSR.h index f274200b223fce4f7b369443bf71446677e6ee20..97b046fba6d37ba1a1633e6aad3c29c77745f8c4 100644 --- a/src/TNL/Matrices/CSR.h +++ b/src/TNL/Matrices/CSR.h @@ -63,6 +63,9 @@ class CSR : public Sparse< Real, Device, Index > IndexType getRowLength( const IndexType row ) const; + __cuda_callable__ + IndexType getRowLengthFast( const IndexType row ) const; + template< typename Real2, typename Device2, typename Index2 > void setLike( const CSR< Real2, Device2, Index2 >& matrix ); diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/CSR_impl.h index 3563fa789e21554f819ecf3183a0092377893e0c..abc4e282312549d2d955c9d682dc3e757766ecb4 100644 --- a/src/TNL/Matrices/CSR_impl.h +++ b/src/TNL/Matrices/CSR_impl.h @@ -117,6 +117,15 @@ template< typename Real, typename Device, typename Index > Index CSR< Real, Device, Index >::getRowLength( const IndexType row ) const +{ + return this->rowPointers.getElement( row + 1 ) - this->rowPointers.getElement( row ); +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +Index CSR< Real, Device, Index >::getRowLengthFast( const IndexType row ) const { return this->rowPointers[ row + 1 ] - this->rowPointers[ row ]; } diff --git a/src/TNL/Matrices/ChunkedEllpack.h b/src/TNL/Matrices/ChunkedEllpack.h index dace4893cc3225c8f5d431a3097c4c16e76055ea..c86081216eb5e180664ae8e32fb4e3217b8c23db 100644 --- a/src/TNL/Matrices/ChunkedEllpack.h +++ b/src/TNL/Matrices/ChunkedEllpack.h @@ -92,6 +92,9 @@ class ChunkedEllpack : public Sparse< Real, Device, Index > IndexType getRowLength( const IndexType row ) const; + __cuda_callable__ + IndexType getRowLengthFast( const IndexType row ) const; + template< typename Real2, typename Device2, typename Index2 > void setLike( const ChunkedEllpack< Real2, Device2, Index2 >& matrix ); diff --git a/src/TNL/Matrices/ChunkedEllpack_impl.h b/src/TNL/Matrices/ChunkedEllpack_impl.h index 3840384b90fe6ecb4951df6ddc4b7dd45d7f90a4..bd94540b79e43bff987e4898b06e063cd233716d 100644 --- a/src/TNL/Matrices/ChunkedEllpack_impl.h +++ b/src/TNL/Matrices/ChunkedEllpack_impl.h @@ -262,9 +262,21 @@ template< typename Real, typename Index > Index ChunkedEllpack< Real, Device, Index >::getRowLength( const IndexType row ) const { - const IndexType& sliceIndex = rowToSliceMapping[ row ]; + const IndexType& sliceIndex = rowToSliceMapping.getElement( row ); TNL_ASSERT( sliceIndex < this->rows, ); const IndexType& chunkSize = slices.getElement( sliceIndex ).chunkSize; + return rowPointers.getElement( row + 1 ) - rowPointers.getElement( row ); +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +Index ChunkedEllpack< Real, Device, Index >::getRowLengthFast( const IndexType row ) const +{ + const IndexType& sliceIndex = rowToSliceMapping[ row ]; + TNL_ASSERT( sliceIndex < this->rows, ); + const IndexType& chunkSize = slices[ sliceIndex ].chunkSize; return rowPointers[ row + 1 ] - rowPointers[ row ]; } diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index bd5daa284dd716dae3462cfab6e66edee37a4820..5d94b5bda6c711a9c16e4e135285b8cf9677a886 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -66,6 +66,9 @@ class Dense : public Matrix< Real, Device, Index > */ IndexType getRowLength( const IndexType row ) const; + __cuda_callable__ + IndexType getRowLengthFast( const IndexType row ) const; + IndexType getMaxRowLength() const; IndexType getNumberOfMatrixElements() const; diff --git a/src/TNL/Matrices/Dense_impl.h b/src/TNL/Matrices/Dense_impl.h index 55282e8d6228d246e547de5e5f5623cbf327eddf..08f66239c3b83a4ae60ee228a11a928a44f75445 100644 --- a/src/TNL/Matrices/Dense_impl.h +++ b/src/TNL/Matrices/Dense_impl.h @@ -99,6 +99,15 @@ Index Dense< Real, Device, Index >::getRowLength( const IndexType row ) const return this->getColumns(); } +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +Index Dense< Real, Device, Index >::getRowLengthFast( const IndexType row ) const +{ + return this->getColumns(); +} + template< typename Real, typename Device, typename Index > diff --git a/src/TNL/Matrices/Ellpack.h b/src/TNL/Matrices/Ellpack.h index 65f06a550ecab5765f64b9e39e7a28617fcb9808..4e1b92b9497d4fca7b1736261b700272813a427c 100644 --- a/src/TNL/Matrices/Ellpack.h +++ b/src/TNL/Matrices/Ellpack.h @@ -56,6 +56,9 @@ class Ellpack : public Sparse< Real, Device, Index > IndexType getRowLength( const IndexType row ) const; + __cuda_callable__ + IndexType getRowLengthFast( const IndexType row ) const; + template< typename Real2, typename Device2, typename Index2 > void setLike( const Ellpack< Real2, Device2, Index2 >& matrix ); diff --git a/src/TNL/Matrices/Ellpack_impl.h b/src/TNL/Matrices/Ellpack_impl.h index 9f12aee4c9393ad3560f2dc7e07929f5844a56ac..ca23f4168d10b8ce73eeac1aca3996028a95c8c8 100644 --- a/src/TNL/Matrices/Ellpack_impl.h +++ b/src/TNL/Matrices/Ellpack_impl.h @@ -113,6 +113,15 @@ Index Ellpack< Real, Device, Index >::getRowLength( const IndexType row ) const return this->rowLengths; } +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +Index Ellpack< Real, Device, Index >::getRowLengthFast( const IndexType row ) const +{ + return this->rowLengths; +} + template< typename Real, typename Device, typename Index > diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h index 058c05217efe04f0feb3a558d8f6eff3920ab6d6..884e804ec30a5c4948cd9ee03cc710d571eae838 100644 --- a/src/TNL/Matrices/Multidiagonal.h +++ b/src/TNL/Matrices/Multidiagonal.h @@ -53,6 +53,9 @@ class Multidiagonal : public Matrix< Real, Device, Index > IndexType getRowLength( const IndexType row ) const; + __cuda_callable__ + IndexType getRowLengthFast( const IndexType row ) const; + IndexType getMaxRowLength() const; template< typename Vector > diff --git a/src/TNL/Matrices/Multidiagonal_impl.h b/src/TNL/Matrices/Multidiagonal_impl.h index 91588a515e9f12c4b3e6849b569e9a9c689202d2..ea8c4050db60117af1cc720088d520a9ca849187 100644 --- a/src/TNL/Matrices/Multidiagonal_impl.h +++ b/src/TNL/Matrices/Multidiagonal_impl.h @@ -105,6 +105,22 @@ Index Multidiagonal< Real, Device, Index >::getRowLength( const IndexType row ) return rowLength; } +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +Index Multidiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row ) const +{ + IndexType rowLength( 0 ); + for( IndexType i = 0; i < diagonalsShift.getSize(); i++ ) + { + const IndexType column = row + diagonalsShift[ i ]; + if( column >= 0 && column < this->getColumns() ) + rowLength++; + } + return rowLength; +} + template< typename Real, typename Device, typename Index > diff --git a/src/TNL/Matrices/SlicedEllpack.h b/src/TNL/Matrices/SlicedEllpack.h index ef1d542bf30d9402a2d96b06eb48354ca990d813..2bd6e762d7be611180fbf5d3c77d22ed8910622b 100644 --- a/src/TNL/Matrices/SlicedEllpack.h +++ b/src/TNL/Matrices/SlicedEllpack.h @@ -84,6 +84,9 @@ class SlicedEllpack : public Sparse< Real, Device, Index > IndexType getRowLength( const IndexType row ) const; + __cuda_callable__ + IndexType getRowLengthFast( const IndexType row ) const; + template< typename Real2, typename Device2, typename Index2 > void setLike( const SlicedEllpack< Real2, Device2, Index2, SliceSize >& matrix ); diff --git a/src/TNL/Matrices/SlicedEllpack_impl.h b/src/TNL/Matrices/SlicedEllpack_impl.h index 0f22e69b9c52fa6cb9336cff16a9328de68c5d32..bcbed7d986038afae15e7ffd3de330ccfca7c7a1 100644 --- a/src/TNL/Matrices/SlicedEllpack_impl.h +++ b/src/TNL/Matrices/SlicedEllpack_impl.h @@ -108,6 +108,17 @@ Index SlicedEllpack< Real, Device, Index, SliceSize >::getRowLength( const Index return this->sliceCompressedRowLengths.getElement( slice ); } +template< typename Real, + typename Device, + typename Index, + int SliceSize > +__cuda_callable__ +Index SlicedEllpack< Real, Device, Index, SliceSize >::getRowLengthFast( const IndexType row ) const +{ + const IndexType slice = row / SliceSize; + return this->sliceCompressedRowLengths[ slice ]; +} + template< typename Real, typename Device, typename Index, diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h index 396ea25192e02195ca973516d8ff64dd4c8335ea..5ebd526231d52e8fd9f6f69479f499af8158b797 100644 --- a/src/TNL/Matrices/Tridiagonal.h +++ b/src/TNL/Matrices/Tridiagonal.h @@ -54,6 +54,9 @@ class Tridiagonal : public Matrix< Real, Device, Index > IndexType getRowLength( const IndexType row ) const; + __cuda_callable__ + IndexType getRowLengthFast( const IndexType row ) const; + IndexType getMaxRowLength() const; template< typename Real2, typename Device2, typename Index2 > diff --git a/src/TNL/Matrices/Tridiagonal_impl.h b/src/TNL/Matrices/Tridiagonal_impl.h index c1239627da2d8d8dd7351a75b9b77f6c8fdb6b5d..fb070c7f09c06482e1a8883a4da5a41e1eb1bdf0 100644 --- a/src/TNL/Matrices/Tridiagonal_impl.h +++ b/src/TNL/Matrices/Tridiagonal_impl.h @@ -98,6 +98,15 @@ template< typename Real, typename Device, typename Index > Index Tridiagonal< Real, Device, Index >::getRowLength( const IndexType row ) const +{ + return this->getRowLengthFast( row ); +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +Index Tridiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row ) const { const IndexType diagonalLength = min( this->getRows(), this->getColumns() ); if( row == 0 )