Implemented dense matrix assignment operator. (74c5c158) · Commits · TNL / tnl-dev

src/TNL/Matrices/Dense.h

+2 −1

Original line number	Diff line number	Diff line
		@@ -34,7 +34,8 @@ class Dense : public Matrix< Real, Device, Index >
		using RealType = Real;
		using DeviceType = Device;
		using IndexType = Index;
		using BaseType = Matrix< Real, Device, Index >;
		using RealAllocatorType = RealAllocator;
		using BaseType = Matrix< Real, Device, Index, RealAllocator >;
		using ValuesType = typename BaseType::ValuesVector;
		using ValuesViewType = typename ValuesType::ViewType;
		using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >;

src/TNL/Matrices/Dense.hpp

+91 −7

Original line number	Diff line number	Diff line
		@@ -377,7 +377,6 @@ forRows( IndexType first, IndexType last, Function& function ) const
		return true;
		};
		this->segments.forSegments( first, last, f );

		}

		template< typename Real,
		@@ -392,11 +391,10 @@ forRows( IndexType first, IndexType last, Function& function )
		{
		auto values_view = this->values.getView();
		auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool {
		function( rowIdx, columnIdx, values_view[ globalIdx ] );
		function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] );
		return true;
		};
		this->segments.forSegments( first, last, f );

		}

		template< typename Real,
		@@ -959,6 +957,50 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORItera
		x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum );
		}

		/*template< typename Real,
		typename Device,
		typename Index,
		bool RowMajorOrder,
		typename RealAllocator >
		Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
		Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
		operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix )
		{
		const IndexType bufferRowsCount( 128 );
		const IndexType columns = this->getColumns();
		const size_t bufferSize = bufferRowsCount * columns;
		Containers::Vector< RealType, Device, IndexType, RealAllocatorType > sourceValuesBuffer( bufferSize );
		Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize );
		auto sourceValuesBuffer_view = sourceValuesBuffer.getView();
		auto destinationValuesBuffer_view = destinationValuesBuffer.getView();

		IndexType baseRow( 0 );
		const IndexType rowsCount = this->getRows();
		while( baseRow < rowsCount )
		{
		const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );

		////
		// Copy matrix elements into buffer
		auto f1 = [=] __cuda_callable__ ( Index rowIdx, Index columnIdx, Index globalIdx, const Real& value ) mutable {
		const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
		sourceValuesBuffer_view[ bufferIdx ] = value;
		};
		matrix.forRows( baseRow, lastRow, f1 );
		destinationValuesBuffer = sourceValuesBuffer;

		////
		// Copy buffer to this matrix
		auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable {
		const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
		value = destinationValuesBuffer_view[ bufferIdx ];
		};
		this->forRows( baseRow, lastRow, f2 );
		baseRow += bufferRowsCount;
		}
		return *this;
		}*/

		template< typename Real,
		typename Device,
		typename Index,
		@@ -968,16 +1010,58 @@ template< typename Real,
		Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
		Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
		operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix )
		{
		if( RowMajorOrder == RowMajorOrder_ )
		{
		this->setLike( matrix );
		if( RowMajorOrder == RowMajorOrder_ )
		this->values = matrix.getValues();
		else
		{
		if( std::is_same< DeviceType, Device_ >::value )
		{
		auto this_view = this->getView();
		auto f = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value ) mutable {
		this_view.getRow( rowIdx ).setElement( columnIdx, value );
		};
		matrix.forAllRows( f );
		}
		else
		{
		const IndexType bufferRowsCount( 128 );
		const IndexType columns = this->getColumns();
		const size_t bufferSize = bufferRowsCount * columns;
		Containers::Vector< RealType, Device_, IndexType, RealAllocator_ > sourceValuesBuffer( bufferSize );
		Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize );
		auto sourceValuesBuffer_view = sourceValuesBuffer.getView();
		auto destinationValuesBuffer_view = destinationValuesBuffer.getView();

		IndexType baseRow( 0 );
		const IndexType rowsCount = this->getRows();
		while( baseRow < rowsCount )
		{
		const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );

		////
		// Copy matrix elements into buffer
		auto f1 = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value ) mutable {
		const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
		sourceValuesBuffer_view[ bufferIdx ] = value;
		};
		matrix.forRows( baseRow, lastRow, f1 );

		destinationValuesBuffer = sourceValuesBuffer;

		////
		// Copy buffer to this matrix
		auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable {
		const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
		value = destinationValuesBuffer_view[ bufferIdx ];
		};
		this->forRows( baseRow, lastRow, f2 );
		baseRow += bufferRowsCount;
		}
		}
		}
		return *this;
		}

		template< typename Real,

src/UnitTests/Matrices/DenseMatrixTest.h

+50 −1

Original line number	Diff line number	Diff line
		@@ -1169,9 +1169,51 @@ void test_PerformSORIteration()
		template< typename Matrix >
		void test_AssignmentOperator()
		{
		EXPECT_EQ( 1, 0 );
		using RealType = typename Matrix::RealType;
		using DeviceType = typename Matrix::DeviceType;
		using IndexType = typename Matrix::IndexType;

		using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >;
		using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >;

		const IndexType rows( 10 ), columns( 10 );
		DenseHost hostMatrix( rows, columns );
		for( IndexType i = 0; i < columns; i++ )
		for( IndexType j = 0; j <= i; j++ )
		hostMatrix( i, j ) = i + j;

		Matrix matrix( rows, columns );
		matrix.getValues() = 0.0;
		matrix = hostMatrix;
		for( IndexType i = 0; i < columns; i++ )
		for( IndexType j = 0; j < rows; j++ )
		{
		if( j > i )
		EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
		else
		EXPECT_EQ( matrix.getElement( i, j ), i + j );
		}

		#ifdef HAVE_CUDA
		DenseCuda cudaMatrix( rows, columns );
		for( IndexType i = 0; i < columns; i++ )
		for( IndexType j = 0; j <= i; j++ )
		cudaMatrix.setElement( i, j, i + j );

		matrix.getValues() = 0.0;
		matrix = cudaMatrix;
		for( IndexType i = 0; i < columns; i++ )
		for( IndexType j = 0; j < rows; j++ )
		{
		if( j > i )
		EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
		else
		EXPECT_EQ( matrix.getElement( i, j ), i + j );
		}
		#endif
		}


		template< typename Matrix >
		void test_SaveAndLoad()
		{
		@@ -1426,6 +1468,13 @@ TYPED_TEST( MatrixTest, addMatrixTest )
		test_AddMatrix< MatrixType >();
		}

		TYPED_TEST( MatrixTest, assignmentOperatorTest )
		{
		using MatrixType = typename TestFixture::MatrixType;

		test_AssignmentOperator< MatrixType >();
		}

		TYPED_TEST( MatrixTest, saveAndLoadTest )
		{
		using MatrixType = typename TestFixture::MatrixType;