Implementing dense matrix assignment. (bb234945) · Commits · TNL / tnl-dev

src/TNL/Matrices/Dense.h

+23 −4

Original line number	Diff line number	Diff line
		@@ -167,12 +167,31 @@ class Dense : public Matrix< Real, Device, Index >
		Vector2& x,
		const RealType& omega = 1.0 ) const;

		// copy assignment
		/**
		* \brief Assignment operator for exactly the same type of the dense matrix.
		*
		* @param matrix
		* @return
		*/
		Dense& operator=( const Dense& matrix );

		// cross-device copy assignment
		template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAlocator_ >
		Dense& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAlocator_ >& matrix );
		/**
		* \brief Assignment operator for other dense matrices.
		*
		* @param matrix
		* @return
		*/
		template< typename RHSReal, typename RHSDevice, typename RHSIndex,
		bool RHSRowMajorOrder, typename RHSRealAllocator >
		Dense& operator=( const Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >& matrix );

		/**
		* \brief Assignment operator for other (sparse) types of matrices.
		* @param matrix
		* @return
		*/
		template< typename RHSMatrix >
		Dense& operator=( const RHSMatrix& matrix );

		template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ >
		bool operator==( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const;

src/TNL/Matrices/Dense.hpp

+144 −76

Original line number	Diff line number	Diff line
		@@ -118,7 +118,7 @@ void
		Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
		setLike( const Matrix_& matrix )
		{
		Matrix< Real, Device, Index, RealAllocator >::setLike( matrix );
		this->setDimensions( matrix.getRows(), matrix.getColumns() );
		}

		template< typename Real,
		@@ -896,13 +896,50 @@ operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& mat
		{
		setLike( matrix );
		this->values = matrix.values;
		/*const IndexType bufferRowsCount( 128 );
		const IndexType columns = this->getColumns();
		const size_t bufferSize = bufferRowsCount * columns;
		Containers::Vector< RealType, Device, IndexType, RealAllocatorType > sourceValuesBuffer( bufferSize );
		Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize );
		auto sourceValuesBuffer_view = sourceValuesBuffer.getView();
		auto destinationValuesBuffer_view = destinationValuesBuffer.getView();
		return *this;
		}

		template< typename Real,
		typename Device,
		typename Index,
		bool RowMajorOrder,
		typename RealAllocator >
		template< typename RHSReal, typename RHSDevice, typename RHSIndex,
		bool RHSRowMajorOrder, typename RHSRealAllocator >
		Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
		Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
		operator=( const Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >& matrix )
		{
		using RHSMatrix = Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >;
		using RHSIndexType = typename RHSMatrix::IndexType;
		using RHSRealType = typename RHSMatrix::RealType;
		using RHSDeviceType = typename RHSMatrix::DeviceType;

		this->setLike( matrix );
		if( RowMajorOrder == RHSRowMajorOrder )
		{
		this->values = matrix.values;
		return *this;
		}

		auto this_view = this->view;
		if( std::is_same< DeviceType, RHSDeviceType >::value )
		{
		const auto segments_view = this->segments.getView();
		auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable {
		this_view( rowIdx, columnIdx ) = value;
		};
		matrix.forAllRows( f );
		}
		else
		{
		const IndexType maxRowLength = matrix.getColumns();
		const IndexType bufferRowsCount( 128 );
		const size_t bufferSize = bufferRowsCount * maxRowLength;
		Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType > matrixValuesBuffer( bufferSize );
		Containers::Vector< RealType, DeviceType, IndexType > thisValuesBuffer( bufferSize );
		auto matrixValuesBuffer_view = matrixValuesBuffer.getView();
		auto thisValuesBuffer_view = thisValuesBuffer.getView();

		IndexType baseRow( 0 );
		const IndexType rowsCount = this->getRows();
		@@ -912,23 +949,28 @@ operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& mat

		////
		// Copy matrix elements into buffer
		auto f1 = [=] __cuda_callable__ ( Index rowIdx, Index columnIdx, Index globalIdx, const Real& value ) mutable {
		const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
		sourceValuesBuffer_view[ bufferIdx ] = value;
		auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable {
		const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + columnIdx;
		matrixValuesBuffer_view[ bufferIdx ] = value;
		};
		matrix.forRows( baseRow, lastRow, f1 );
		destinationValuesBuffer = sourceValuesBuffer;

		////
		// Copy buffer to this matrix
		auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable {
		const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
		value = destinationValuesBuffer_view[ bufferIdx ];
		// Copy the source matrix buffer to this matrix buffer
		thisValuesBuffer_view = matrixValuesBuffer_view;

		////
		// Copy matrix elements from the buffer to the matrix.
		auto this_view = this->view;
		auto f2 = [=] __cuda_callable__ ( IndexType columnIdx, IndexType bufferRowIdx ) mutable {
		IndexType bufferIdx = bufferRowIdx * maxRowLength + columnIdx;
		this_view( baseRow + bufferRowIdx, columnIdx ) = thisValuesBuffer_view[ bufferIdx ];
		};
		this->forRows( baseRow, lastRow, f2 );
		Algorithms::ParallelFor2D< DeviceType >::exec( ( IndexType ) 0, ( IndexType ) 0, ( IndexType ) maxRowLength, ( IndexType ) min( bufferRowsCount, this->getRows() - baseRow ), f2 );
		baseRow += bufferRowsCount;
		}
		return this;/
		}
		return *this;
		}

		template< typename Real,
		@@ -936,61 +978,87 @@ template< typename Real,
		typename Index,
		bool RowMajorOrder,
		typename RealAllocator >
		template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
		template< typename RHSMatrix >
		Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
		Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
		operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix )
		{
		this->setLike( matrix );
		if( RowMajorOrder == RowMajorOrder_ )
		this->values = matrix.getValues();
		else
		operator=( const RHSMatrix& matrix )
		{
		if( std::is_same< DeviceType, Device_ >::value )
		using RHSIndexType = typename RHSMatrix::IndexType;
		using RHSRealType = typename RHSMatrix::RealType;
		using RHSDeviceType = typename RHSMatrix::DeviceType;
		using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType;

		Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths;
		matrix.getCompressedRowLengths( rowLengths );
		this->setDimensions( matrix.getRows(), matrix.getColumns() );

		// TODO: use getConstView when it works
		const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView();
		auto values_view = this->values.getView();
		RHSIndexType padding_index = matrix.getPaddingIndex();
		this->values = 0.0;

		if( std::is_same< DeviceType, RHSDeviceType >::value )
		{
		auto this_view = this->getView();
		auto f = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value, bool& compute ) mutable {
		this_view.getRow( rowIdx ).setElement( columnIdx, value );
		const auto segments_view = this->segments.getView();
		auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable {
		if( value != 0.0 && columnIdx != padding_index )
		values_view[ segments_view.getGlobalIndex( rowIdx, columnIdx ) ] = value;
		};
		matrix.forAllRows( f );
		}
		else
		{
		const IndexType maxRowLength = max( rowLengths );
		const IndexType bufferRowsCount( 128 );
		const IndexType columns = this->getColumns();
		const size_t bufferSize = bufferRowsCount * columns;
		Containers::Vector< RealType, Device_, IndexType, RealAllocator_ > sourceValuesBuffer( bufferSize );
		Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize );
		auto sourceValuesBuffer_view = sourceValuesBuffer.getView();
		auto destinationValuesBuffer_view = destinationValuesBuffer.getView();
		const size_t bufferSize = bufferRowsCount * maxRowLength;
		Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize );
		Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > matrixColumnsBuffer( bufferSize );
		Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize );
		Containers::Vector< IndexType, DeviceType, IndexType > thisColumnsBuffer( bufferSize );
		auto matrixValuesBuffer_view = matrixValuesBuffer.getView();
		auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView();
		auto thisValuesBuffer_view = thisValuesBuffer.getView();
		auto thisColumnsBuffer_view = thisColumnsBuffer.getView();

		IndexType baseRow( 0 );
		const IndexType rowsCount = this->getRows();
		while( baseRow < rowsCount )
		{
		const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );
		thisColumnsBuffer = padding_index;
		matrixColumnsBuffer_view = padding_index;

		////
		// Copy matrix elements into buffer
		auto f1 = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value, bool& compute ) mutable {
		const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
		sourceValuesBuffer_view[ bufferIdx ] = value;
		auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
		if( columnIndex != padding_index )
		{
		const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
		matrixColumnsBuffer_view[ bufferIdx ] = columnIndex;
		matrixValuesBuffer_view[ bufferIdx ] = value;
		}
		};
		matrix.forRows( baseRow, lastRow, f1 );

		destinationValuesBuffer = sourceValuesBuffer;
		////
		// Copy the source matrix buffer to this matrix buffer
		thisValuesBuffer_view = matrixValuesBuffer_view;
		thisColumnsBuffer_view = matrixColumnsBuffer_view;

		////
		// Copy buffer to this matrix
		auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value, bool& compute ) mutable {
		const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
		value = destinationValuesBuffer_view[ bufferIdx ];
		// Copy matrix elements from the buffer to the matrix
		auto this_view = this->view;
		auto f2 = [=] __cuda_callable__ ( IndexType bufferColumnIdx, IndexType bufferRowIdx ) mutable {
		IndexType bufferIdx = bufferRowIdx * maxRowLength + bufferColumnIdx;
		IndexType columnIdx = thisColumnsBuffer_view[ bufferIdx ];
		if( columnIdx != padding_index )
		this_view( baseRow + bufferRowIdx, columnIdx ) = thisValuesBuffer_view[ bufferIdx ];
		};
		this->forRows( baseRow, lastRow, f2 );
		Algorithms::ParallelFor2D< DeviceType >::exec( ( IndexType ) 0, ( IndexType ) 0, ( IndexType ) maxRowLength, ( IndexType ) min( bufferRowsCount, this->getRows() - baseRow ), f2 );
		baseRow += bufferRowsCount;
		}
		}
		}
		this->view = this->getView();
		return *this;
		}

src/TNL/Matrices/Multidiagonal.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -201,6 +201,9 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator >

		IndexerType& getIndexer();

		__cuda_callable__
		IndexType getPaddingIndex() const;

		protected:

		__cuda_callable__

src/TNL/Matrices/Multidiagonal.hpp

+14 −0

Original line number	Diff line number	Diff line
		@@ -830,6 +830,20 @@ getElementIndex( const IndexType row, const IndexType column ) const
		return this->indexer.getGlobalIndex( row, localIdx );
		}

		template< typename Real,
		typename Device,
		typename Index,
		bool RowMajorOrder,
		typename RealAllocator,
		typename IndexAllocator >
		__cuda_callable__
		Index
		Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
		getPaddingIndex() const
		{
		return this->view.getPaddingIndex();
		}

		/*
		template<>
		class MultidiagonalDeviceDependentCode< Devices::Host >

src/TNL/Matrices/MultidiagonalMatrixView.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -163,6 +163,9 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
		__cuda_callable__
		IndexerType& getIndexer();

		__cuda_callable__
		IndexType getPaddingIndex() const;

		protected:

		__cuda_callable__