Loading src/TNL/Matrices/Dense.h +23 −4 Original line number Diff line number Diff line Loading @@ -167,12 +167,31 @@ class Dense : public Matrix< Real, Device, Index > Vector2& x, const RealType& omega = 1.0 ) const; // copy assignment /** * \brief Assignment operator for exactly the same type of the dense matrix. * * @param matrix * @return */ Dense& operator=( const Dense& matrix ); // cross-device copy assignment template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAlocator_ > Dense& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAlocator_ >& matrix ); /** * \brief Assignment operator for other dense matrices. * * @param matrix * @return */ template< typename RHSReal, typename RHSDevice, typename RHSIndex, bool RHSRowMajorOrder, typename RHSRealAllocator > Dense& operator=( const Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >& matrix ); /** * \brief Assignment operator for other (sparse) types of matrices. * @param matrix * @return */ template< typename RHSMatrix > Dense& operator=( const RHSMatrix& matrix ); template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > bool operator==( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const; Loading src/TNL/Matrices/Dense.hpp +144 −76 Original line number Diff line number Diff line Loading @@ -118,7 +118,7 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: setLike( const Matrix_& matrix ) { Matrix< Real, Device, Index, RealAllocator >::setLike( matrix ); this->setDimensions( matrix.getRows(), matrix.getColumns() ); } template< typename Real, Loading Loading @@ -896,13 +896,50 @@ operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& mat { setLike( matrix ); this->values = matrix.values; /*const IndexType bufferRowsCount( 128 ); const IndexType columns = this->getColumns(); const size_t bufferSize = bufferRowsCount * columns; Containers::Vector< RealType, Device, IndexType, RealAllocatorType > sourceValuesBuffer( bufferSize ); Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize ); auto sourceValuesBuffer_view = sourceValuesBuffer.getView(); auto destinationValuesBuffer_view = destinationValuesBuffer.getView(); return *this; } template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename RHSReal, typename RHSDevice, typename RHSIndex, bool RHSRowMajorOrder, typename RHSRealAllocator > Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: operator=( const Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >& matrix ) { using RHSMatrix = Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >; using RHSIndexType = typename RHSMatrix::IndexType; using RHSRealType = typename RHSMatrix::RealType; using RHSDeviceType = typename RHSMatrix::DeviceType; this->setLike( matrix ); if( RowMajorOrder == RHSRowMajorOrder ) { this->values = matrix.values; return *this; } auto this_view = this->view; if( std::is_same< DeviceType, RHSDeviceType >::value ) { const auto segments_view = this->segments.getView(); auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable { this_view( rowIdx, columnIdx ) = value; }; matrix.forAllRows( f ); } else { const IndexType maxRowLength = matrix.getColumns(); const IndexType bufferRowsCount( 128 ); const size_t bufferSize = bufferRowsCount * maxRowLength; Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType > matrixValuesBuffer( bufferSize ); Containers::Vector< RealType, DeviceType, IndexType > thisValuesBuffer( bufferSize ); auto matrixValuesBuffer_view = matrixValuesBuffer.getView(); auto thisValuesBuffer_view = thisValuesBuffer.getView(); IndexType baseRow( 0 ); const IndexType rowsCount = this->getRows(); Loading @@ -912,23 +949,28 @@ operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& mat //// // Copy matrix elements into buffer auto f1 = [=] __cuda_callable__ ( Index rowIdx, Index columnIdx, Index globalIdx, const Real& value ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; sourceValuesBuffer_view[ bufferIdx ] = value; auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + columnIdx; matrixValuesBuffer_view[ bufferIdx ] = value; }; matrix.forRows( baseRow, lastRow, f1 ); destinationValuesBuffer = sourceValuesBuffer; //// // Copy buffer to this matrix auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; value = destinationValuesBuffer_view[ bufferIdx ]; // Copy the source matrix buffer to this matrix buffer thisValuesBuffer_view = matrixValuesBuffer_view; //// // Copy matrix elements from the buffer to the matrix. auto this_view = this->view; auto f2 = [=] __cuda_callable__ ( IndexType columnIdx, IndexType bufferRowIdx ) mutable { IndexType bufferIdx = bufferRowIdx * maxRowLength + columnIdx; this_view( baseRow + bufferRowIdx, columnIdx ) = thisValuesBuffer_view[ bufferIdx ]; }; this->forRows( baseRow, lastRow, f2 ); Algorithms::ParallelFor2D< DeviceType >::exec( ( IndexType ) 0, ( IndexType ) 0, ( IndexType ) maxRowLength, ( IndexType ) min( bufferRowsCount, this->getRows() - baseRow ), f2 ); baseRow += bufferRowsCount; } return *this;*/ } return *this; } template< typename Real, Loading @@ -936,61 +978,87 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > template< typename RHSMatrix > Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) { this->setLike( matrix ); if( RowMajorOrder == RowMajorOrder_ ) this->values = matrix.getValues(); else operator=( const RHSMatrix& matrix ) { if( std::is_same< DeviceType, Device_ >::value ) using RHSIndexType = typename RHSMatrix::IndexType; using RHSRealType = typename RHSMatrix::RealType; using RHSDeviceType = typename RHSMatrix::DeviceType; using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType; Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths; matrix.getCompressedRowLengths( rowLengths ); this->setDimensions( matrix.getRows(), matrix.getColumns() ); // TODO: use getConstView when it works const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView(); auto values_view = this->values.getView(); RHSIndexType padding_index = matrix.getPaddingIndex(); this->values = 0.0; if( std::is_same< DeviceType, RHSDeviceType >::value ) { auto this_view = this->getView(); auto f = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value, bool& compute ) mutable { this_view.getRow( rowIdx ).setElement( columnIdx, value ); const auto segments_view = this->segments.getView(); auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable { if( value != 0.0 && columnIdx != padding_index ) values_view[ segments_view.getGlobalIndex( rowIdx, columnIdx ) ] = value; }; matrix.forAllRows( f ); } else { const IndexType maxRowLength = max( rowLengths ); const IndexType bufferRowsCount( 128 ); const IndexType columns = this->getColumns(); const size_t bufferSize = bufferRowsCount * columns; Containers::Vector< RealType, Device_, IndexType, RealAllocator_ > sourceValuesBuffer( bufferSize ); Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize ); auto sourceValuesBuffer_view = sourceValuesBuffer.getView(); auto destinationValuesBuffer_view = destinationValuesBuffer.getView(); const size_t bufferSize = bufferRowsCount * maxRowLength; Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize ); Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > matrixColumnsBuffer( bufferSize ); Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize ); Containers::Vector< IndexType, DeviceType, IndexType > thisColumnsBuffer( bufferSize ); auto matrixValuesBuffer_view = matrixValuesBuffer.getView(); auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView(); auto thisValuesBuffer_view = thisValuesBuffer.getView(); auto thisColumnsBuffer_view = thisColumnsBuffer.getView(); IndexType baseRow( 0 ); const IndexType rowsCount = this->getRows(); while( baseRow < rowsCount ) { const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); thisColumnsBuffer = padding_index; matrixColumnsBuffer_view = padding_index; //// // Copy matrix elements into buffer auto f1 = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value, bool& compute ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; sourceValuesBuffer_view[ bufferIdx ] = value; auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { if( columnIndex != padding_index ) { const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; matrixColumnsBuffer_view[ bufferIdx ] = columnIndex; matrixValuesBuffer_view[ bufferIdx ] = value; } }; matrix.forRows( baseRow, lastRow, f1 ); destinationValuesBuffer = sourceValuesBuffer; //// // Copy the source matrix buffer to this matrix buffer thisValuesBuffer_view = matrixValuesBuffer_view; thisColumnsBuffer_view = matrixColumnsBuffer_view; //// // Copy buffer to this matrix auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value, bool& compute ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; value = destinationValuesBuffer_view[ bufferIdx ]; // Copy matrix elements from the buffer to the matrix auto this_view = this->view; auto f2 = [=] __cuda_callable__ ( IndexType bufferColumnIdx, IndexType bufferRowIdx ) mutable { IndexType bufferIdx = bufferRowIdx * maxRowLength + bufferColumnIdx; IndexType columnIdx = thisColumnsBuffer_view[ bufferIdx ]; if( columnIdx != padding_index ) this_view( baseRow + bufferRowIdx, columnIdx ) = thisValuesBuffer_view[ bufferIdx ]; }; this->forRows( baseRow, lastRow, f2 ); Algorithms::ParallelFor2D< DeviceType >::exec( ( IndexType ) 0, ( IndexType ) 0, ( IndexType ) maxRowLength, ( IndexType ) min( bufferRowsCount, this->getRows() - baseRow ), f2 ); baseRow += bufferRowsCount; } } } this->view = this->getView(); return *this; } Loading src/TNL/Matrices/Multidiagonal.h +3 −0 Original line number Diff line number Diff line Loading @@ -201,6 +201,9 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator > IndexerType& getIndexer(); __cuda_callable__ IndexType getPaddingIndex() const; protected: __cuda_callable__ Loading src/TNL/Matrices/Multidiagonal.hpp +14 −0 Original line number Diff line number Diff line Loading @@ -830,6 +830,20 @@ getElementIndex( const IndexType row, const IndexType column ) const return this->indexer.getGlobalIndex( row, localIdx ); } template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator, typename IndexAllocator > __cuda_callable__ Index Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: getPaddingIndex() const { return this->view.getPaddingIndex(); } /* template<> class MultidiagonalDeviceDependentCode< Devices::Host > Loading src/TNL/Matrices/MultidiagonalMatrixView.h +3 −0 Original line number Diff line number Diff line Loading @@ -163,6 +163,9 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index > __cuda_callable__ IndexerType& getIndexer(); __cuda_callable__ IndexType getPaddingIndex() const; protected: __cuda_callable__ Loading Loading
src/TNL/Matrices/Dense.h +23 −4 Original line number Diff line number Diff line Loading @@ -167,12 +167,31 @@ class Dense : public Matrix< Real, Device, Index > Vector2& x, const RealType& omega = 1.0 ) const; // copy assignment /** * \brief Assignment operator for exactly the same type of the dense matrix. * * @param matrix * @return */ Dense& operator=( const Dense& matrix ); // cross-device copy assignment template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAlocator_ > Dense& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAlocator_ >& matrix ); /** * \brief Assignment operator for other dense matrices. * * @param matrix * @return */ template< typename RHSReal, typename RHSDevice, typename RHSIndex, bool RHSRowMajorOrder, typename RHSRealAllocator > Dense& operator=( const Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >& matrix ); /** * \brief Assignment operator for other (sparse) types of matrices. * @param matrix * @return */ template< typename RHSMatrix > Dense& operator=( const RHSMatrix& matrix ); template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > bool operator==( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const; Loading
src/TNL/Matrices/Dense.hpp +144 −76 Original line number Diff line number Diff line Loading @@ -118,7 +118,7 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: setLike( const Matrix_& matrix ) { Matrix< Real, Device, Index, RealAllocator >::setLike( matrix ); this->setDimensions( matrix.getRows(), matrix.getColumns() ); } template< typename Real, Loading Loading @@ -896,13 +896,50 @@ operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& mat { setLike( matrix ); this->values = matrix.values; /*const IndexType bufferRowsCount( 128 ); const IndexType columns = this->getColumns(); const size_t bufferSize = bufferRowsCount * columns; Containers::Vector< RealType, Device, IndexType, RealAllocatorType > sourceValuesBuffer( bufferSize ); Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize ); auto sourceValuesBuffer_view = sourceValuesBuffer.getView(); auto destinationValuesBuffer_view = destinationValuesBuffer.getView(); return *this; } template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename RHSReal, typename RHSDevice, typename RHSIndex, bool RHSRowMajorOrder, typename RHSRealAllocator > Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: operator=( const Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >& matrix ) { using RHSMatrix = Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >; using RHSIndexType = typename RHSMatrix::IndexType; using RHSRealType = typename RHSMatrix::RealType; using RHSDeviceType = typename RHSMatrix::DeviceType; this->setLike( matrix ); if( RowMajorOrder == RHSRowMajorOrder ) { this->values = matrix.values; return *this; } auto this_view = this->view; if( std::is_same< DeviceType, RHSDeviceType >::value ) { const auto segments_view = this->segments.getView(); auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable { this_view( rowIdx, columnIdx ) = value; }; matrix.forAllRows( f ); } else { const IndexType maxRowLength = matrix.getColumns(); const IndexType bufferRowsCount( 128 ); const size_t bufferSize = bufferRowsCount * maxRowLength; Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType > matrixValuesBuffer( bufferSize ); Containers::Vector< RealType, DeviceType, IndexType > thisValuesBuffer( bufferSize ); auto matrixValuesBuffer_view = matrixValuesBuffer.getView(); auto thisValuesBuffer_view = thisValuesBuffer.getView(); IndexType baseRow( 0 ); const IndexType rowsCount = this->getRows(); Loading @@ -912,23 +949,28 @@ operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& mat //// // Copy matrix elements into buffer auto f1 = [=] __cuda_callable__ ( Index rowIdx, Index columnIdx, Index globalIdx, const Real& value ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; sourceValuesBuffer_view[ bufferIdx ] = value; auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + columnIdx; matrixValuesBuffer_view[ bufferIdx ] = value; }; matrix.forRows( baseRow, lastRow, f1 ); destinationValuesBuffer = sourceValuesBuffer; //// // Copy buffer to this matrix auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; value = destinationValuesBuffer_view[ bufferIdx ]; // Copy the source matrix buffer to this matrix buffer thisValuesBuffer_view = matrixValuesBuffer_view; //// // Copy matrix elements from the buffer to the matrix. auto this_view = this->view; auto f2 = [=] __cuda_callable__ ( IndexType columnIdx, IndexType bufferRowIdx ) mutable { IndexType bufferIdx = bufferRowIdx * maxRowLength + columnIdx; this_view( baseRow + bufferRowIdx, columnIdx ) = thisValuesBuffer_view[ bufferIdx ]; }; this->forRows( baseRow, lastRow, f2 ); Algorithms::ParallelFor2D< DeviceType >::exec( ( IndexType ) 0, ( IndexType ) 0, ( IndexType ) maxRowLength, ( IndexType ) min( bufferRowsCount, this->getRows() - baseRow ), f2 ); baseRow += bufferRowsCount; } return *this;*/ } return *this; } template< typename Real, Loading @@ -936,61 +978,87 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > template< typename RHSMatrix > Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) { this->setLike( matrix ); if( RowMajorOrder == RowMajorOrder_ ) this->values = matrix.getValues(); else operator=( const RHSMatrix& matrix ) { if( std::is_same< DeviceType, Device_ >::value ) using RHSIndexType = typename RHSMatrix::IndexType; using RHSRealType = typename RHSMatrix::RealType; using RHSDeviceType = typename RHSMatrix::DeviceType; using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType; Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths; matrix.getCompressedRowLengths( rowLengths ); this->setDimensions( matrix.getRows(), matrix.getColumns() ); // TODO: use getConstView when it works const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView(); auto values_view = this->values.getView(); RHSIndexType padding_index = matrix.getPaddingIndex(); this->values = 0.0; if( std::is_same< DeviceType, RHSDeviceType >::value ) { auto this_view = this->getView(); auto f = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value, bool& compute ) mutable { this_view.getRow( rowIdx ).setElement( columnIdx, value ); const auto segments_view = this->segments.getView(); auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable { if( value != 0.0 && columnIdx != padding_index ) values_view[ segments_view.getGlobalIndex( rowIdx, columnIdx ) ] = value; }; matrix.forAllRows( f ); } else { const IndexType maxRowLength = max( rowLengths ); const IndexType bufferRowsCount( 128 ); const IndexType columns = this->getColumns(); const size_t bufferSize = bufferRowsCount * columns; Containers::Vector< RealType, Device_, IndexType, RealAllocator_ > sourceValuesBuffer( bufferSize ); Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize ); auto sourceValuesBuffer_view = sourceValuesBuffer.getView(); auto destinationValuesBuffer_view = destinationValuesBuffer.getView(); const size_t bufferSize = bufferRowsCount * maxRowLength; Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize ); Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > matrixColumnsBuffer( bufferSize ); Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize ); Containers::Vector< IndexType, DeviceType, IndexType > thisColumnsBuffer( bufferSize ); auto matrixValuesBuffer_view = matrixValuesBuffer.getView(); auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView(); auto thisValuesBuffer_view = thisValuesBuffer.getView(); auto thisColumnsBuffer_view = thisColumnsBuffer.getView(); IndexType baseRow( 0 ); const IndexType rowsCount = this->getRows(); while( baseRow < rowsCount ) { const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); thisColumnsBuffer = padding_index; matrixColumnsBuffer_view = padding_index; //// // Copy matrix elements into buffer auto f1 = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value, bool& compute ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; sourceValuesBuffer_view[ bufferIdx ] = value; auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { if( columnIndex != padding_index ) { const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; matrixColumnsBuffer_view[ bufferIdx ] = columnIndex; matrixValuesBuffer_view[ bufferIdx ] = value; } }; matrix.forRows( baseRow, lastRow, f1 ); destinationValuesBuffer = sourceValuesBuffer; //// // Copy the source matrix buffer to this matrix buffer thisValuesBuffer_view = matrixValuesBuffer_view; thisColumnsBuffer_view = matrixColumnsBuffer_view; //// // Copy buffer to this matrix auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value, bool& compute ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; value = destinationValuesBuffer_view[ bufferIdx ]; // Copy matrix elements from the buffer to the matrix auto this_view = this->view; auto f2 = [=] __cuda_callable__ ( IndexType bufferColumnIdx, IndexType bufferRowIdx ) mutable { IndexType bufferIdx = bufferRowIdx * maxRowLength + bufferColumnIdx; IndexType columnIdx = thisColumnsBuffer_view[ bufferIdx ]; if( columnIdx != padding_index ) this_view( baseRow + bufferRowIdx, columnIdx ) = thisValuesBuffer_view[ bufferIdx ]; }; this->forRows( baseRow, lastRow, f2 ); Algorithms::ParallelFor2D< DeviceType >::exec( ( IndexType ) 0, ( IndexType ) 0, ( IndexType ) maxRowLength, ( IndexType ) min( bufferRowsCount, this->getRows() - baseRow ), f2 ); baseRow += bufferRowsCount; } } } this->view = this->getView(); return *this; } Loading
src/TNL/Matrices/Multidiagonal.h +3 −0 Original line number Diff line number Diff line Loading @@ -201,6 +201,9 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator > IndexerType& getIndexer(); __cuda_callable__ IndexType getPaddingIndex() const; protected: __cuda_callable__ Loading
src/TNL/Matrices/Multidiagonal.hpp +14 −0 Original line number Diff line number Diff line Loading @@ -830,6 +830,20 @@ getElementIndex( const IndexType row, const IndexType column ) const return this->indexer.getGlobalIndex( row, localIdx ); } template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator, typename IndexAllocator > __cuda_callable__ Index Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: getPaddingIndex() const { return this->view.getPaddingIndex(); } /* template<> class MultidiagonalDeviceDependentCode< Devices::Host > Loading
src/TNL/Matrices/MultidiagonalMatrixView.h +3 −0 Original line number Diff line number Diff line Loading @@ -163,6 +163,9 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index > __cuda_callable__ IndexerType& getIndexer(); __cuda_callable__ IndexType getPaddingIndex() const; protected: __cuda_callable__ Loading