From 30b25f639627af6c9b5055a901a5fc599e2b46b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkjak@fjfi.cvut.cz> Date: Sat, 2 Mar 2019 14:57:52 +0100 Subject: [PATCH] Added scalar multiplicator parameter to vectorProduct in Ellpack TODO: it should be added to other formats as well --- src/TNL/Matrices/Ellpack.h | 3 ++- src/TNL/Matrices/Ellpack_impl.h | 18 +++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/TNL/Matrices/Ellpack.h b/src/TNL/Matrices/Ellpack.h index 7d17ff07e8..e19010dafa 100644 --- a/src/TNL/Matrices/Ellpack.h +++ b/src/TNL/Matrices/Ellpack.h @@ -153,7 +153,8 @@ public: template< typename InVector, typename OutVector > void vectorProduct( const InVector& inVector, - OutVector& outVector ) const; + OutVector& outVector, + RealType multiplicator = 1.0 ) const; template< typename Real2, typename Index2 > void addMatrix( const Ellpack< Real2, Device, Index2 >& matrix, diff --git a/src/TNL/Matrices/Ellpack_impl.h b/src/TNL/Matrices/Ellpack_impl.h index 5e757efb96..7c2a3b6f00 100644 --- a/src/TNL/Matrices/Ellpack_impl.h +++ b/src/TNL/Matrices/Ellpack_impl.h @@ -518,9 +518,10 @@ template< typename Real, template< typename InVector, typename OutVector > void Ellpack< Real, Device, Index >::vectorProduct( const InVector& inVector, - OutVector& outVector ) const + OutVector& outVector, + RealType multiplicator ) const { - DeviceDependentCode::vectorProduct( *this, inVector, outVector ); + DeviceDependentCode::vectorProduct( *this, inVector, outVector, multiplicator ); } template< typename Real, @@ -815,13 +816,14 @@ class EllpackDeviceDependentCode< Devices::Host > typename OutVector > static void vectorProduct( const Ellpack< Real, Device, Index >& matrix, const InVector& inVector, - OutVector& outVector ) + OutVector& outVector, + Real multiplicator ) { #ifdef HAVE_OPENMP #pragma omp parallel for if( Devices::Host::isOMPEnabled() ) #endif for( Index row = 0; row < matrix.getRows(); row ++ ) - outVector[ row ] = matrix.rowVectorProduct( row, inVector ); + outVector[ row ] = matrix.rowVectorProduct( row, inVector ) * multiplicator; /*Index col; for( Index row = 0; row < matrix.getRows(); row ++ ) { @@ -848,6 +850,7 @@ __global__ void EllpackVectorProductCudaKernel( const Real* values, const Real* inVector, Real* outVector, + Real multiplicator, const Index gridIdx ) { const Index rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; @@ -864,7 +867,7 @@ __global__ void EllpackVectorProductCudaKernel( result += values[ i ] * inVector[ columnIndex ]; i += alignedRows; } - outVector[ rowIdx ] = result; + outVector[ rowIdx ] = result * multiplicator; } #endif @@ -909,7 +912,8 @@ class EllpackDeviceDependentCode< Devices::Cuda > typename OutVector > static void vectorProduct( const Ellpack< Real, Device, Index >& matrix, const InVector& inVector, - OutVector& outVector ) + OutVector& outVector, + Real multiplicator ) { //MatrixVectorProductCuda( matrix, inVector, outVector ); #ifdef HAVE_CUDA @@ -937,6 +941,7 @@ class EllpackDeviceDependentCode< Devices::Cuda > matrix.values.getData(), inVector.getData(), outVector.getData(), + multiplicator, gridIdx ); TNL_CHECK_CUDA_DEVICE; } @@ -946,7 +951,6 @@ class EllpackDeviceDependentCode< Devices::Cuda > TNL_CHECK_CUDA_DEVICE; cudaDeviceSynchronize(); #endif - } }; -- GitLab