diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index 757fa4eae4fcc34d5fa0e8752dd8f658aaf66daa..9c05297d152f6809a140edbbe302dcf3e394f0e2 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -34,7 +34,8 @@ class Dense : public Matrix< Real, Device, Index > using RealType = Real; using DeviceType = Device; using IndexType = Index; - using BaseType = Matrix< Real, Device, Index >; + using RealAllocatorType = RealAllocator; + using BaseType = Matrix< Real, Device, Index, RealAllocator >; using ValuesType = typename BaseType::ValuesVector; using ValuesViewType = typename ValuesType::ViewType; using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >; diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index 7517c6b0eaadd31a1242f88ac2371ed1b304c2a2..7a6c4becc383e4398813b1c8d4b1645ad973ad7a 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -377,7 +377,6 @@ forRows( IndexType first, IndexType last, Function& function ) const return true; }; this->segments.forSegments( first, last, f ); - } template< typename Real, @@ -392,11 +391,10 @@ forRows( IndexType first, IndexType last, Function& function ) { auto values_view = this->values.getView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { - function( rowIdx, columnIdx, values_view[ globalIdx ] ); + function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] ); return true; }; this->segments.forSegments( first, last, f ); - } template< typename Real, @@ -959,6 +957,50 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORItera x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum ); } +/*template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix ) +{ + const IndexType bufferRowsCount( 128 ); + const IndexType columns = this->getColumns(); + const size_t bufferSize = bufferRowsCount * columns; + Containers::Vector< RealType, Device, IndexType, RealAllocatorType > sourceValuesBuffer( bufferSize ); + Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize ); + auto sourceValuesBuffer_view = sourceValuesBuffer.getView(); + auto destinationValuesBuffer_view = destinationValuesBuffer.getView(); + + IndexType baseRow( 0 ); + const IndexType rowsCount = this->getRows(); + while( baseRow < rowsCount ) + { + const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); + + //// + // Copy matrix elements into buffer + auto f1 = [=] __cuda_callable__ ( Index rowIdx, Index columnIdx, Index globalIdx, const Real& value ) mutable { + const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; + sourceValuesBuffer_view[ bufferIdx ] = value; + }; + matrix.forRows( baseRow, lastRow, f1 ); + destinationValuesBuffer = sourceValuesBuffer; + + //// + // Copy buffer to this matrix + auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable { + const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; + value = destinationValuesBuffer_view[ bufferIdx ]; + }; + this->forRows( baseRow, lastRow, f2 ); + baseRow += bufferRowsCount; + } + return *this; +}*/ + template< typename Real, typename Device, typename Index, @@ -969,15 +1011,57 @@ Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) { + this->setLike( matrix ); if( RowMajorOrder == RowMajorOrder_ ) - { - this->setLike( matrix ); this->values = matrix.getValues(); - } else { - + if( std::is_same< DeviceType, Device_ >::value ) + { + auto this_view = this->getView(); + auto f = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value ) mutable { + this_view.getRow( rowIdx ).setElement( columnIdx, value ); + }; + matrix.forAllRows( f ); + } + else + { + const IndexType bufferRowsCount( 128 ); + const IndexType columns = this->getColumns(); + const size_t bufferSize = bufferRowsCount * columns; + Containers::Vector< RealType, Device_, IndexType, RealAllocator_ > sourceValuesBuffer( bufferSize ); + Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize ); + auto sourceValuesBuffer_view = sourceValuesBuffer.getView(); + auto destinationValuesBuffer_view = destinationValuesBuffer.getView(); + + IndexType baseRow( 0 ); + const IndexType rowsCount = this->getRows(); + while( baseRow < rowsCount ) + { + const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); + + //// + // Copy matrix elements into buffer + auto f1 = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value ) mutable { + const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; + sourceValuesBuffer_view[ bufferIdx ] = value; + }; + matrix.forRows( baseRow, lastRow, f1 ); + + destinationValuesBuffer = sourceValuesBuffer; + + //// + // Copy buffer to this matrix + auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable { + const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; + value = destinationValuesBuffer_view[ bufferIdx ]; + }; + this->forRows( baseRow, lastRow, f2 ); + baseRow += bufferRowsCount; + } + } } + return *this; } template< typename Real, diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h index 2ddd19c7af3fa4eab6941a3ef25b1dd777acf67b..686602ebd10a33d7f73185cabbd274fe09ec1669 100644 --- a/src/UnitTests/Matrices/DenseMatrixTest.h +++ b/src/UnitTests/Matrices/DenseMatrixTest.h @@ -1169,9 +1169,51 @@ void test_PerformSORIteration() template< typename Matrix > void test_AssignmentOperator() { - EXPECT_EQ( 1, 0 ); + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >; + using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >; + + const IndexType rows( 10 ), columns( 10 ); + DenseHost hostMatrix( rows, columns ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j <= i; j++ ) + hostMatrix( i, j ) = i + j; + + Matrix matrix( rows, columns ); + matrix.getValues() = 0.0; + matrix = hostMatrix; + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } + +#ifdef HAVE_CUDA + DenseCuda cudaMatrix( rows, columns ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j <= i; j++ ) + cudaMatrix.setElement( i, j, i + j ); + + matrix.getValues() = 0.0; + matrix = cudaMatrix; + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } +#endif } + template< typename Matrix > void test_SaveAndLoad() { @@ -1426,6 +1468,13 @@ TYPED_TEST( MatrixTest, addMatrixTest ) test_AddMatrix< MatrixType >(); } +TYPED_TEST( MatrixTest, assignmentOperatorTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_AssignmentOperator< MatrixType >(); +} + TYPED_TEST( MatrixTest, saveAndLoadTest ) { using MatrixType = typename TestFixture::MatrixType;