Commit 30b25f63 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Added scalar multiplicator parameter to vectorProduct in Ellpack

TODO: it should be added to other formats as well
parent cf4a141e
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -153,7 +153,8 @@ public:
   template< typename InVector,
             typename OutVector >
   void vectorProduct( const InVector& inVector,
                       OutVector& outVector ) const;
                       OutVector& outVector,
                       RealType multiplicator = 1.0 ) const;

   template< typename Real2, typename Index2 >
   void addMatrix( const Ellpack< Real2, Device, Index2 >& matrix,
+11 −7
Original line number Diff line number Diff line
@@ -518,9 +518,10 @@ template< typename Real,
   template< typename InVector,
             typename OutVector >
void Ellpack< Real, Device, Index >::vectorProduct( const InVector& inVector,
                                                                   OutVector& outVector ) const
                                                    OutVector& outVector,
                                                    RealType multiplicator ) const
{
   DeviceDependentCode::vectorProduct( *this, inVector, outVector );
   DeviceDependentCode::vectorProduct( *this, inVector, outVector, multiplicator );
}

template< typename Real,
@@ -815,13 +816,14 @@ class EllpackDeviceDependentCode< Devices::Host >
                typename OutVector >
      static void vectorProduct( const Ellpack< Real, Device, Index >& matrix,
                                 const InVector& inVector,
                                 OutVector& outVector )
                                 OutVector& outVector,
                                 Real multiplicator )
      {
#ifdef HAVE_OPENMP
#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
#endif
         for( Index row = 0; row < matrix.getRows(); row ++ )
            outVector[ row ] = matrix.rowVectorProduct( row, inVector );
            outVector[ row ] = matrix.rowVectorProduct( row, inVector ) * multiplicator;
         /*Index col;
         for( Index row = 0; row < matrix.getRows(); row ++ )
         {
@@ -848,6 +850,7 @@ __global__ void EllpackVectorProductCudaKernel(
   const Real* values,
   const Real* inVector,
   Real* outVector,
   Real multiplicator,
   const Index gridIdx )
{
   const Index rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
@@ -864,7 +867,7 @@ __global__ void EllpackVectorProductCudaKernel(
      result += values[ i ] * inVector[ columnIndex ];
      i += alignedRows;
   }
   outVector[ rowIdx ] = result;
   outVector[ rowIdx ] = result * multiplicator;
}
#endif

@@ -909,7 +912,8 @@ class EllpackDeviceDependentCode< Devices::Cuda >
                typename OutVector >
      static void vectorProduct( const Ellpack< Real, Device, Index >& matrix,
                                 const InVector& inVector,
                                 OutVector& outVector )
                                 OutVector& outVector,
                                 Real multiplicator )
      {
         //MatrixVectorProductCuda( matrix, inVector, outVector );
         #ifdef HAVE_CUDA
@@ -937,6 +941,7 @@ class EllpackDeviceDependentCode< Devices::Cuda >
                  matrix.values.getData(),
                  inVector.getData(),
                  outVector.getData(),
                  multiplicator,
                  gridIdx );
               TNL_CHECK_CUDA_DEVICE;
            }
@@ -946,7 +951,6 @@ class EllpackDeviceDependentCode< Devices::Cuda >
            TNL_CHECK_CUDA_DEVICE;
            cudaDeviceSynchronize();
         #endif
 
      }
};