Loading src/TNL/Matrices/Dense.h +2 −1 Original line number Diff line number Diff line Loading @@ -34,7 +34,8 @@ class Dense : public Matrix< Real, Device, Index > using RealType = Real; using DeviceType = Device; using IndexType = Index; using BaseType = Matrix< Real, Device, Index >; using RealAllocatorType = RealAllocator; using BaseType = Matrix< Real, Device, Index, RealAllocator >; using ValuesType = typename BaseType::ValuesVector; using ValuesViewType = typename ValuesType::ViewType; using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >; Loading src/TNL/Matrices/Dense.hpp +91 −7 Original line number Diff line number Diff line Loading @@ -377,7 +377,6 @@ forRows( IndexType first, IndexType last, Function& function ) const return true; }; this->segments.forSegments( first, last, f ); } template< typename Real, Loading @@ -392,11 +391,10 @@ forRows( IndexType first, IndexType last, Function& function ) { auto values_view = this->values.getView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { function( rowIdx, columnIdx, values_view[ globalIdx ] ); function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] ); return true; }; this->segments.forSegments( first, last, f ); } template< typename Real, Loading Loading @@ -959,6 +957,50 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORItera x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum ); } /*template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix ) { const IndexType bufferRowsCount( 128 ); const IndexType columns = this->getColumns(); const size_t bufferSize = bufferRowsCount * columns; Containers::Vector< RealType, Device, IndexType, RealAllocatorType > sourceValuesBuffer( bufferSize ); Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize ); auto sourceValuesBuffer_view = sourceValuesBuffer.getView(); auto destinationValuesBuffer_view = destinationValuesBuffer.getView(); IndexType baseRow( 0 ); const IndexType rowsCount = this->getRows(); while( baseRow < rowsCount ) { const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); //// // Copy matrix elements into buffer auto f1 = [=] __cuda_callable__ ( Index rowIdx, Index columnIdx, Index globalIdx, const Real& value ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; sourceValuesBuffer_view[ bufferIdx ] = value; }; matrix.forRows( baseRow, lastRow, f1 ); destinationValuesBuffer = sourceValuesBuffer; //// // Copy buffer to this matrix auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; value = destinationValuesBuffer_view[ bufferIdx ]; }; this->forRows( baseRow, lastRow, f2 ); baseRow += bufferRowsCount; } return *this; }*/ template< typename Real, typename Device, typename Index, Loading @@ -968,16 +1010,58 @@ template< typename Real, Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) { if( RowMajorOrder == RowMajorOrder_ ) { this->setLike( matrix ); if( RowMajorOrder == RowMajorOrder_ ) this->values = matrix.getValues(); else { if( std::is_same< DeviceType, Device_ >::value ) { auto this_view = this->getView(); auto f = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value ) mutable { this_view.getRow( rowIdx ).setElement( columnIdx, value ); }; matrix.forAllRows( f ); } else { const IndexType bufferRowsCount( 128 ); const IndexType columns = this->getColumns(); const size_t bufferSize = bufferRowsCount * columns; Containers::Vector< RealType, Device_, IndexType, RealAllocator_ > sourceValuesBuffer( bufferSize ); Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize ); auto sourceValuesBuffer_view = sourceValuesBuffer.getView(); auto destinationValuesBuffer_view = destinationValuesBuffer.getView(); IndexType baseRow( 0 ); const IndexType rowsCount = this->getRows(); while( baseRow < rowsCount ) { const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); //// // Copy matrix elements into buffer auto f1 = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; sourceValuesBuffer_view[ bufferIdx ] = value; }; matrix.forRows( baseRow, lastRow, f1 ); destinationValuesBuffer = sourceValuesBuffer; //// // Copy buffer to this matrix auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; value = destinationValuesBuffer_view[ bufferIdx ]; }; this->forRows( baseRow, lastRow, f2 ); baseRow += bufferRowsCount; } } } return *this; } template< typename Real, Loading src/UnitTests/Matrices/DenseMatrixTest.h +50 −1 Original line number Diff line number Diff line Loading @@ -1169,9 +1169,51 @@ void test_PerformSORIteration() template< typename Matrix > void test_AssignmentOperator() { EXPECT_EQ( 1, 0 ); using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >; using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >; const IndexType rows( 10 ), columns( 10 ); DenseHost hostMatrix( rows, columns ); for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j <= i; j++ ) hostMatrix( i, j ) = i + j; Matrix matrix( rows, columns ); matrix.getValues() = 0.0; matrix = hostMatrix; for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j < rows; j++ ) { if( j > i ) EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); else EXPECT_EQ( matrix.getElement( i, j ), i + j ); } #ifdef HAVE_CUDA DenseCuda cudaMatrix( rows, columns ); for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j <= i; j++ ) cudaMatrix.setElement( i, j, i + j ); matrix.getValues() = 0.0; matrix = cudaMatrix; for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j < rows; j++ ) { if( j > i ) EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); else EXPECT_EQ( matrix.getElement( i, j ), i + j ); } #endif } template< typename Matrix > void test_SaveAndLoad() { Loading Loading @@ -1426,6 +1468,13 @@ TYPED_TEST( MatrixTest, addMatrixTest ) test_AddMatrix< MatrixType >(); } TYPED_TEST( MatrixTest, assignmentOperatorTest ) { using MatrixType = typename TestFixture::MatrixType; test_AssignmentOperator< MatrixType >(); } TYPED_TEST( MatrixTest, saveAndLoadTest ) { using MatrixType = typename TestFixture::MatrixType; Loading Loading
src/TNL/Matrices/Dense.h +2 −1 Original line number Diff line number Diff line Loading @@ -34,7 +34,8 @@ class Dense : public Matrix< Real, Device, Index > using RealType = Real; using DeviceType = Device; using IndexType = Index; using BaseType = Matrix< Real, Device, Index >; using RealAllocatorType = RealAllocator; using BaseType = Matrix< Real, Device, Index, RealAllocator >; using ValuesType = typename BaseType::ValuesVector; using ValuesViewType = typename ValuesType::ViewType; using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >; Loading
src/TNL/Matrices/Dense.hpp +91 −7 Original line number Diff line number Diff line Loading @@ -377,7 +377,6 @@ forRows( IndexType first, IndexType last, Function& function ) const return true; }; this->segments.forSegments( first, last, f ); } template< typename Real, Loading @@ -392,11 +391,10 @@ forRows( IndexType first, IndexType last, Function& function ) { auto values_view = this->values.getView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { function( rowIdx, columnIdx, values_view[ globalIdx ] ); function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] ); return true; }; this->segments.forSegments( first, last, f ); } template< typename Real, Loading Loading @@ -959,6 +957,50 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORItera x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum ); } /*template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix ) { const IndexType bufferRowsCount( 128 ); const IndexType columns = this->getColumns(); const size_t bufferSize = bufferRowsCount * columns; Containers::Vector< RealType, Device, IndexType, RealAllocatorType > sourceValuesBuffer( bufferSize ); Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize ); auto sourceValuesBuffer_view = sourceValuesBuffer.getView(); auto destinationValuesBuffer_view = destinationValuesBuffer.getView(); IndexType baseRow( 0 ); const IndexType rowsCount = this->getRows(); while( baseRow < rowsCount ) { const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); //// // Copy matrix elements into buffer auto f1 = [=] __cuda_callable__ ( Index rowIdx, Index columnIdx, Index globalIdx, const Real& value ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; sourceValuesBuffer_view[ bufferIdx ] = value; }; matrix.forRows( baseRow, lastRow, f1 ); destinationValuesBuffer = sourceValuesBuffer; //// // Copy buffer to this matrix auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; value = destinationValuesBuffer_view[ bufferIdx ]; }; this->forRows( baseRow, lastRow, f2 ); baseRow += bufferRowsCount; } return *this; }*/ template< typename Real, typename Device, typename Index, Loading @@ -968,16 +1010,58 @@ template< typename Real, Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) { if( RowMajorOrder == RowMajorOrder_ ) { this->setLike( matrix ); if( RowMajorOrder == RowMajorOrder_ ) this->values = matrix.getValues(); else { if( std::is_same< DeviceType, Device_ >::value ) { auto this_view = this->getView(); auto f = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value ) mutable { this_view.getRow( rowIdx ).setElement( columnIdx, value ); }; matrix.forAllRows( f ); } else { const IndexType bufferRowsCount( 128 ); const IndexType columns = this->getColumns(); const size_t bufferSize = bufferRowsCount * columns; Containers::Vector< RealType, Device_, IndexType, RealAllocator_ > sourceValuesBuffer( bufferSize ); Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize ); auto sourceValuesBuffer_view = sourceValuesBuffer.getView(); auto destinationValuesBuffer_view = destinationValuesBuffer.getView(); IndexType baseRow( 0 ); const IndexType rowsCount = this->getRows(); while( baseRow < rowsCount ) { const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); //// // Copy matrix elements into buffer auto f1 = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; sourceValuesBuffer_view[ bufferIdx ] = value; }; matrix.forRows( baseRow, lastRow, f1 ); destinationValuesBuffer = sourceValuesBuffer; //// // Copy buffer to this matrix auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; value = destinationValuesBuffer_view[ bufferIdx ]; }; this->forRows( baseRow, lastRow, f2 ); baseRow += bufferRowsCount; } } } return *this; } template< typename Real, Loading
src/UnitTests/Matrices/DenseMatrixTest.h +50 −1 Original line number Diff line number Diff line Loading @@ -1169,9 +1169,51 @@ void test_PerformSORIteration() template< typename Matrix > void test_AssignmentOperator() { EXPECT_EQ( 1, 0 ); using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >; using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >; const IndexType rows( 10 ), columns( 10 ); DenseHost hostMatrix( rows, columns ); for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j <= i; j++ ) hostMatrix( i, j ) = i + j; Matrix matrix( rows, columns ); matrix.getValues() = 0.0; matrix = hostMatrix; for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j < rows; j++ ) { if( j > i ) EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); else EXPECT_EQ( matrix.getElement( i, j ), i + j ); } #ifdef HAVE_CUDA DenseCuda cudaMatrix( rows, columns ); for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j <= i; j++ ) cudaMatrix.setElement( i, j, i + j ); matrix.getValues() = 0.0; matrix = cudaMatrix; for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j < rows; j++ ) { if( j > i ) EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); else EXPECT_EQ( matrix.getElement( i, j ), i + j ); } #endif } template< typename Matrix > void test_SaveAndLoad() { Loading Loading @@ -1426,6 +1468,13 @@ TYPED_TEST( MatrixTest, addMatrixTest ) test_AddMatrix< MatrixType >(); } TYPED_TEST( MatrixTest, assignmentOperatorTest ) { using MatrixType = typename TestFixture::MatrixType; test_AssignmentOperator< MatrixType >(); } TYPED_TEST( MatrixTest, saveAndLoadTest ) { using MatrixType = typename TestFixture::MatrixType; Loading