Deleted DenseMatrix[View]::getRowVectorProduct and added rows range to vectorProduct method. (5f98eede) · Commits · TNL / tnl-dev

src/TNL/Matrices/DenseMatrix.h

+17 −25

Original line number	Diff line number	Diff line
		@@ -331,9 +331,10 @@ class DenseMatrix : public Matrix< Real, Device, Index >
		/**
		* \brief Sets element at given \e row and \e column to given \e value.
		*
		* This method can be called only from the host system (CPU) no matter
		* where the matrix is allocated. If the matrix is allocated in GPU device
		* this methods transfer values of each matrix element separately and so the
		* This method can be called from the host system (CPU) no matter
		* where the matrix is allocated. If the matrix is allocated on GPU this method
		* can be called even from device kernels. If the matrix is allocated in GPU device
		* this method is called from CPU, it transfers values of each matrix element separately and so the
		* performance is very low. For higher performance see. \ref DenseMatrix::getRow
		* or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows.
		*
		@@ -354,9 +355,10 @@ class DenseMatrix : public Matrix< Real, Device, Index >
		/**
		* \brief Add element at given \e row and \e column to given \e value.
		*
		* This method can be called only from the host system (CPU) no matter
		* where the matrix is allocated. If the matrix is allocated in GPU device
		* this methods transfer values of each matrix element separately and so the
		* This method can be called from the host system (CPU) no matter
		* where the matrix is allocated. If the matrix is allocated on GPU this method
		* can be called even from device kernels. If the matrix is allocated in GPU device
		* this method is called from CPU, it transfers values of each matrix element separately and so the
		* performance is very low. For higher performance see. \ref DenseMatrix::getRow
		* or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows.
		*
		@@ -375,9 +377,10 @@ class DenseMatrix : public Matrix< Real, Device, Index >
		/**
		* \brief Returns value of matrix element at position given by its row and column index.
		*
		* This method can be called only from the host system (CPU) no matter
		* where the matrix is allocated. If the matrix is allocated in GPU device
		* this methods transfer values of each matrix element separately and so the
		* This method can be called from the host system (CPU) no matter
		* where the matrix is allocated. If the matrix is allocated on GPU this method
		* can be called even from device kernels. If the matrix is allocated in GPU device
		* this method is called from CPU, it transfers values of each matrix element separately and so the
		* performance is very low. For higher performance see. \ref DenseMatrix::getRow
		* or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows.
		*
		@@ -518,21 +521,6 @@ class DenseMatrix : public Matrix< Real, Device, Index >
		template< typename Function >
		void forAllRows( Function& function );

		/**
		* \brief This method computes scalar product of given vector and one
		* row of the matrix.
		*
		* \tparam Vector is type of input vector. It can be \ref Vector,
		* \ref VectorView, \ref Array, \ref ArraView or similar container.
		* \param row is index of the row used for the scalar product.
		* \param vector is the input vector.
		* \return result of the matrix row and vector product.
		*/
		template< typename Vector >
		__cuda_callable__
		typename Vector::RealType rowVectorProduct( const IndexType row,
		const Vector& vector ) const;

		/**
		* \brief Computes product of matrix and vector.
		*
		@@ -546,7 +534,11 @@ class DenseMatrix : public Matrix< Real, Device, Index >
		*/
		template< typename InVector, typename OutVector >
		void vectorProduct( const InVector& inVector,
		OutVector& outVector ) const;
		OutVector& outVector,
		const RealType& matrixMultiplicator = 1.0,
		const RealType& outVectorMultiplicator = 0.0,
		const IndexType firstRow = 0,
		const IndexType lastRow = 0 ) const;

		template< typename Matrix >
		void addMatrix( const Matrix& matrix,

src/TNL/Matrices/DenseMatrix.hpp

+7 −16

Original line number	Diff line number	Diff line
		@@ -416,20 +416,6 @@ forAllRows( Function& function )
		this->forRows( 0, this->getRows(), function );
		}

		template< typename Real,
		typename Device,
		typename Index,
		bool RowMajorOrder,
		typename RealAllocator >
		template< typename Vector >
		__cuda_callable__
		typename Vector::RealType
		DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::
		rowVectorProduct( const IndexType row, const Vector& vector ) const
		{
		return this->view.rowVectorProduct( row, vector );
		}

		template< typename Real,
		typename Device,
		typename Index,
		@@ -439,9 +425,14 @@ template< typename Real,
		typename OutVector >
		void
		DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::
		vectorProduct( const InVector& inVector, OutVector& outVector ) const
		vectorProduct( const InVector& inVector,
		OutVector& outVector,
		const RealType& matrixMultiplicator,
		const RealType& outVectorMultiplicator,
		const IndexType firstRow,
		const IndexType lastRow ) const
		{
		this->view.vectorProduct( inVector, outVector );
		this->view.vectorProduct( inVector, outVector, matrixMultiplicator, outVectorMultiplicator, firstRow, lastRow );
		}

		template< typename Real,

src/TNL/Matrices/DenseMatrixView.h

+5 −6

Original line number	Diff line number	Diff line
		@@ -141,14 +141,13 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
		template< typename Function >
		void forAllRows( Function& function );

		template< typename Vector >
		__cuda_callable__
		typename Vector::RealType rowVectorProduct( const IndexType row,
		const Vector& vector ) const;

		template< typename InVector, typename OutVector >
		void vectorProduct( const InVector& inVector,
		OutVector& outVector ) const;
		OutVector& outVector,
		const RealType& matrixMultiplicator = 1.0,
		const RealType& outVectorMultiplicator = 0.0,
		const IndexType firstRow = 0,
		IndexType lastRow = 0 ) const;

		template< typename Matrix >
		void addMatrix( const Matrix& matrix,

src/TNL/Matrices/DenseMatrixView.hpp

+10 −20

Original line number	Diff line number	Diff line
		@@ -367,23 +367,6 @@ forAllRows( Function& function )
		this->forRows( 0, this->getRows(), function );
		}

		template< typename Real,
		typename Device,
		typename Index,
		bool RowMajorOrder >
		template< typename Vector >
		__cuda_callable__
		typename Vector::RealType
		DenseMatrixView< Real, Device, Index, RowMajorOrder >::
		rowVectorProduct( const IndexType row, const Vector& vector ) const
		{
		RealType sum( 0.0 );
		// TODO: Fix this
		//for( IndexType column = 0; column < this->getColumns(); column++ )
		// sum += this->getElementFast( row, column ) * vector[ column ];
		return sum;
		}

		template< typename Real,
		typename Device,
		typename Index,
		@@ -392,7 +375,12 @@ template< typename Real,
		typename OutVector >
		void
		DenseMatrixView< Real, Device, Index, RowMajorOrder >::
		vectorProduct( const InVector& inVector, OutVector& outVector ) const
		vectorProduct( const InVector& inVector,
		OutVector& outVector,
		const RealType& matrixMultiplicator,
		const RealType& outVectorMultiplicator,
		const IndexType firstRow,
		IndexType lastRow ) const
		{
		TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns count differs with input vector size." );
		TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows count differs with output vector size." );
		@@ -400,13 +388,15 @@ vectorProduct( const InVector& inVector, OutVector& outVector ) const
		const auto inVectorView = inVector.getConstView();
		auto outVectorView = outVector.getView();
		const auto valuesView = this->values.getConstView();
		if( lastRow == 0 )
		lastRow = this->getRows();
		auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType offset, bool& compute ) -> RealType {
		return valuesView[ offset ] * inVectorView[ column ];
		};
		auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
		outVectorView[ row ] = value;
		outVectorView[ row ] = matrixMultiplicator * value + outVectorMultiplicator * outVectorView[ row ];
		};
		this->segments.segmentsReduction( 0, this->getRows(), fetch, std::plus<>{}, keeper, ( RealType ) 0.0 );
		this->segments.segmentsReduction( firstRow, lastRow, fetch, std::plus<>{}, keeper, ( RealType ) 0.0 );
		}

		template< typename Real,