Loading src/TNL/Matrices/DenseMatrix.h +17 −25 Original line number Diff line number Diff line Loading @@ -331,9 +331,10 @@ class DenseMatrix : public Matrix< Real, Device, Index > /** * \brief Sets element at given \e row and \e column to given \e value. * * This method can be called only from the host system (CPU) no matter * where the matrix is allocated. If the matrix is allocated in GPU device * this methods transfer values of each matrix element separately and so the * This method can be called from the host system (CPU) no matter * where the matrix is allocated. If the matrix is allocated on GPU this method * can be called even from device kernels. If the matrix is allocated in GPU device * this method is called from CPU, it transfers values of each matrix element separately and so the * performance is very low. For higher performance see. \ref DenseMatrix::getRow * or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows. * Loading @@ -354,9 +355,10 @@ class DenseMatrix : public Matrix< Real, Device, Index > /** * \brief Add element at given \e row and \e column to given \e value. * * This method can be called only from the host system (CPU) no matter * where the matrix is allocated. If the matrix is allocated in GPU device * this methods transfer values of each matrix element separately and so the * This method can be called from the host system (CPU) no matter * where the matrix is allocated. If the matrix is allocated on GPU this method * can be called even from device kernels. If the matrix is allocated in GPU device * this method is called from CPU, it transfers values of each matrix element separately and so the * performance is very low. For higher performance see. \ref DenseMatrix::getRow * or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows. * Loading @@ -375,9 +377,10 @@ class DenseMatrix : public Matrix< Real, Device, Index > /** * \brief Returns value of matrix element at position given by its row and column index. * * This method can be called only from the host system (CPU) no matter * where the matrix is allocated. If the matrix is allocated in GPU device * this methods transfer values of each matrix element separately and so the * This method can be called from the host system (CPU) no matter * where the matrix is allocated. If the matrix is allocated on GPU this method * can be called even from device kernels. If the matrix is allocated in GPU device * this method is called from CPU, it transfers values of each matrix element separately and so the * performance is very low. For higher performance see. \ref DenseMatrix::getRow * or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows. * Loading Loading @@ -518,21 +521,6 @@ class DenseMatrix : public Matrix< Real, Device, Index > template< typename Function > void forAllRows( Function& function ); /** * \brief This method computes scalar product of given vector and one * row of the matrix. * * \tparam Vector is type of input vector. It can be \ref Vector, * \ref VectorView, \ref Array, \ref ArraView or similar container. * \param row is index of the row used for the scalar product. * \param vector is the input vector. * \return result of the matrix row and vector product. */ template< typename Vector > __cuda_callable__ typename Vector::RealType rowVectorProduct( const IndexType row, const Vector& vector ) const; /** * \brief Computes product of matrix and vector. * Loading @@ -546,7 +534,11 @@ class DenseMatrix : public Matrix< Real, Device, Index > */ template< typename InVector, typename OutVector > void vectorProduct( const InVector& inVector, OutVector& outVector ) const; OutVector& outVector, const RealType& matrixMultiplicator = 1.0, const RealType& outVectorMultiplicator = 0.0, const IndexType firstRow = 0, const IndexType lastRow = 0 ) const; template< typename Matrix > void addMatrix( const Matrix& matrix, Loading src/TNL/Matrices/DenseMatrix.hpp +7 −16 Original line number Diff line number Diff line Loading @@ -416,20 +416,6 @@ forAllRows( Function& function ) this->forRows( 0, this->getRows(), function ); } template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename Vector > __cuda_callable__ typename Vector::RealType DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: rowVectorProduct( const IndexType row, const Vector& vector ) const { return this->view.rowVectorProduct( row, vector ); } template< typename Real, typename Device, typename Index, Loading @@ -439,9 +425,14 @@ template< typename Real, typename OutVector > void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: vectorProduct( const InVector& inVector, OutVector& outVector ) const vectorProduct( const InVector& inVector, OutVector& outVector, const RealType& matrixMultiplicator, const RealType& outVectorMultiplicator, const IndexType firstRow, const IndexType lastRow ) const { this->view.vectorProduct( inVector, outVector ); this->view.vectorProduct( inVector, outVector, matrixMultiplicator, outVectorMultiplicator, firstRow, lastRow ); } template< typename Real, Loading src/TNL/Matrices/DenseMatrixView.h +5 −6 Original line number Diff line number Diff line Loading @@ -141,14 +141,13 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > template< typename Function > void forAllRows( Function& function ); template< typename Vector > __cuda_callable__ typename Vector::RealType rowVectorProduct( const IndexType row, const Vector& vector ) const; template< typename InVector, typename OutVector > void vectorProduct( const InVector& inVector, OutVector& outVector ) const; OutVector& outVector, const RealType& matrixMultiplicator = 1.0, const RealType& outVectorMultiplicator = 0.0, const IndexType firstRow = 0, IndexType lastRow = 0 ) const; template< typename Matrix > void addMatrix( const Matrix& matrix, Loading src/TNL/Matrices/DenseMatrixView.hpp +10 −20 Original line number Diff line number Diff line Loading @@ -367,23 +367,6 @@ forAllRows( Function& function ) this->forRows( 0, this->getRows(), function ); } template< typename Real, typename Device, typename Index, bool RowMajorOrder > template< typename Vector > __cuda_callable__ typename Vector::RealType DenseMatrixView< Real, Device, Index, RowMajorOrder >:: rowVectorProduct( const IndexType row, const Vector& vector ) const { RealType sum( 0.0 ); // TODO: Fix this //for( IndexType column = 0; column < this->getColumns(); column++ ) // sum += this->getElementFast( row, column ) * vector[ column ]; return sum; } template< typename Real, typename Device, typename Index, Loading @@ -392,7 +375,12 @@ template< typename Real, typename OutVector > void DenseMatrixView< Real, Device, Index, RowMajorOrder >:: vectorProduct( const InVector& inVector, OutVector& outVector ) const vectorProduct( const InVector& inVector, OutVector& outVector, const RealType& matrixMultiplicator, const RealType& outVectorMultiplicator, const IndexType firstRow, IndexType lastRow ) const { TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns count differs with input vector size." ); TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows count differs with output vector size." ); Loading @@ -400,13 +388,15 @@ vectorProduct( const InVector& inVector, OutVector& outVector ) const const auto inVectorView = inVector.getConstView(); auto outVectorView = outVector.getView(); const auto valuesView = this->values.getConstView(); if( lastRow == 0 ) lastRow = this->getRows(); auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType offset, bool& compute ) -> RealType { return valuesView[ offset ] * inVectorView[ column ]; }; auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { outVectorView[ row ] = value; outVectorView[ row ] = matrixMultiplicator * value + outVectorMultiplicator * outVectorView[ row ]; }; this->segments.segmentsReduction( 0, this->getRows(), fetch, std::plus<>{}, keeper, ( RealType ) 0.0 ); this->segments.segmentsReduction( firstRow, lastRow, fetch, std::plus<>{}, keeper, ( RealType ) 0.0 ); } template< typename Real, Loading Loading
src/TNL/Matrices/DenseMatrix.h +17 −25 Original line number Diff line number Diff line Loading @@ -331,9 +331,10 @@ class DenseMatrix : public Matrix< Real, Device, Index > /** * \brief Sets element at given \e row and \e column to given \e value. * * This method can be called only from the host system (CPU) no matter * where the matrix is allocated. If the matrix is allocated in GPU device * this methods transfer values of each matrix element separately and so the * This method can be called from the host system (CPU) no matter * where the matrix is allocated. If the matrix is allocated on GPU this method * can be called even from device kernels. If the matrix is allocated in GPU device * this method is called from CPU, it transfers values of each matrix element separately and so the * performance is very low. For higher performance see. \ref DenseMatrix::getRow * or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows. * Loading @@ -354,9 +355,10 @@ class DenseMatrix : public Matrix< Real, Device, Index > /** * \brief Add element at given \e row and \e column to given \e value. * * This method can be called only from the host system (CPU) no matter * where the matrix is allocated. If the matrix is allocated in GPU device * this methods transfer values of each matrix element separately and so the * This method can be called from the host system (CPU) no matter * where the matrix is allocated. If the matrix is allocated on GPU this method * can be called even from device kernels. If the matrix is allocated in GPU device * this method is called from CPU, it transfers values of each matrix element separately and so the * performance is very low. For higher performance see. \ref DenseMatrix::getRow * or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows. * Loading @@ -375,9 +377,10 @@ class DenseMatrix : public Matrix< Real, Device, Index > /** * \brief Returns value of matrix element at position given by its row and column index. * * This method can be called only from the host system (CPU) no matter * where the matrix is allocated. If the matrix is allocated in GPU device * this methods transfer values of each matrix element separately and so the * This method can be called from the host system (CPU) no matter * where the matrix is allocated. If the matrix is allocated on GPU this method * can be called even from device kernels. If the matrix is allocated in GPU device * this method is called from CPU, it transfers values of each matrix element separately and so the * performance is very low. For higher performance see. \ref DenseMatrix::getRow * or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows. * Loading Loading @@ -518,21 +521,6 @@ class DenseMatrix : public Matrix< Real, Device, Index > template< typename Function > void forAllRows( Function& function ); /** * \brief This method computes scalar product of given vector and one * row of the matrix. * * \tparam Vector is type of input vector. It can be \ref Vector, * \ref VectorView, \ref Array, \ref ArraView or similar container. * \param row is index of the row used for the scalar product. * \param vector is the input vector. * \return result of the matrix row and vector product. */ template< typename Vector > __cuda_callable__ typename Vector::RealType rowVectorProduct( const IndexType row, const Vector& vector ) const; /** * \brief Computes product of matrix and vector. * Loading @@ -546,7 +534,11 @@ class DenseMatrix : public Matrix< Real, Device, Index > */ template< typename InVector, typename OutVector > void vectorProduct( const InVector& inVector, OutVector& outVector ) const; OutVector& outVector, const RealType& matrixMultiplicator = 1.0, const RealType& outVectorMultiplicator = 0.0, const IndexType firstRow = 0, const IndexType lastRow = 0 ) const; template< typename Matrix > void addMatrix( const Matrix& matrix, Loading
src/TNL/Matrices/DenseMatrix.hpp +7 −16 Original line number Diff line number Diff line Loading @@ -416,20 +416,6 @@ forAllRows( Function& function ) this->forRows( 0, this->getRows(), function ); } template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename Vector > __cuda_callable__ typename Vector::RealType DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: rowVectorProduct( const IndexType row, const Vector& vector ) const { return this->view.rowVectorProduct( row, vector ); } template< typename Real, typename Device, typename Index, Loading @@ -439,9 +425,14 @@ template< typename Real, typename OutVector > void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: vectorProduct( const InVector& inVector, OutVector& outVector ) const vectorProduct( const InVector& inVector, OutVector& outVector, const RealType& matrixMultiplicator, const RealType& outVectorMultiplicator, const IndexType firstRow, const IndexType lastRow ) const { this->view.vectorProduct( inVector, outVector ); this->view.vectorProduct( inVector, outVector, matrixMultiplicator, outVectorMultiplicator, firstRow, lastRow ); } template< typename Real, Loading
src/TNL/Matrices/DenseMatrixView.h +5 −6 Original line number Diff line number Diff line Loading @@ -141,14 +141,13 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > template< typename Function > void forAllRows( Function& function ); template< typename Vector > __cuda_callable__ typename Vector::RealType rowVectorProduct( const IndexType row, const Vector& vector ) const; template< typename InVector, typename OutVector > void vectorProduct( const InVector& inVector, OutVector& outVector ) const; OutVector& outVector, const RealType& matrixMultiplicator = 1.0, const RealType& outVectorMultiplicator = 0.0, const IndexType firstRow = 0, IndexType lastRow = 0 ) const; template< typename Matrix > void addMatrix( const Matrix& matrix, Loading
src/TNL/Matrices/DenseMatrixView.hpp +10 −20 Original line number Diff line number Diff line Loading @@ -367,23 +367,6 @@ forAllRows( Function& function ) this->forRows( 0, this->getRows(), function ); } template< typename Real, typename Device, typename Index, bool RowMajorOrder > template< typename Vector > __cuda_callable__ typename Vector::RealType DenseMatrixView< Real, Device, Index, RowMajorOrder >:: rowVectorProduct( const IndexType row, const Vector& vector ) const { RealType sum( 0.0 ); // TODO: Fix this //for( IndexType column = 0; column < this->getColumns(); column++ ) // sum += this->getElementFast( row, column ) * vector[ column ]; return sum; } template< typename Real, typename Device, typename Index, Loading @@ -392,7 +375,12 @@ template< typename Real, typename OutVector > void DenseMatrixView< Real, Device, Index, RowMajorOrder >:: vectorProduct( const InVector& inVector, OutVector& outVector ) const vectorProduct( const InVector& inVector, OutVector& outVector, const RealType& matrixMultiplicator, const RealType& outVectorMultiplicator, const IndexType firstRow, IndexType lastRow ) const { TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns count differs with input vector size." ); TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows count differs with output vector size." ); Loading @@ -400,13 +388,15 @@ vectorProduct( const InVector& inVector, OutVector& outVector ) const const auto inVectorView = inVector.getConstView(); auto outVectorView = outVector.getView(); const auto valuesView = this->values.getConstView(); if( lastRow == 0 ) lastRow = this->getRows(); auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType offset, bool& compute ) -> RealType { return valuesView[ offset ] * inVectorView[ column ]; }; auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { outVectorView[ row ] = value; outVectorView[ row ] = matrixMultiplicator * value + outVectorMultiplicator * outVectorView[ row ]; }; this->segments.segmentsReduction( 0, this->getRows(), fetch, std::plus<>{}, keeper, ( RealType ) 0.0 ); this->segments.segmentsReduction( firstRow, lastRow, fetch, std::plus<>{}, keeper, ( RealType ) 0.0 ); } template< typename Real, Loading