Commit 5f98eede authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Deleted DenseMatrix[View]::getRowVectorProduct and added rows range to vectorProduct method.

parent 344b5e07
Loading
Loading
Loading
Loading
+17 −25
Original line number Diff line number Diff line
@@ -331,9 +331,10 @@ class DenseMatrix : public Matrix< Real, Device, Index >
      /**
       * \brief Sets element at given \e row and \e column to given \e value.
       * 
       * This method can be called only from the host system (CPU) no matter
       * where the matrix is allocated. If the matrix is allocated in GPU device
       * this methods transfer values of each matrix element separately and so the
       * This method can be called from the host system (CPU) no matter
       * where the matrix is allocated. If the matrix is allocated on GPU this method
       * can be called even from device kernels. If the matrix is allocated in GPU device
       * this method is called from CPU, it transfers values of each matrix element separately and so the
       * performance is very low. For higher performance see. \ref DenseMatrix::getRow
       * or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows.
       * 
@@ -354,9 +355,10 @@ class DenseMatrix : public Matrix< Real, Device, Index >
      /**
       * \brief Add element at given \e row and \e column to given \e value.
       * 
       * This method can be called only from the host system (CPU) no matter
       * where the matrix is allocated. If the matrix is allocated in GPU device
       * this methods transfer values of each matrix element separately and so the
       * This method can be called from the host system (CPU) no matter
       * where the matrix is allocated. If the matrix is allocated on GPU this method
       * can be called even from device kernels. If the matrix is allocated in GPU device
       * this method is called from CPU, it transfers values of each matrix element separately and so the
       * performance is very low. For higher performance see. \ref DenseMatrix::getRow
       * or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows.
       * 
@@ -375,9 +377,10 @@ class DenseMatrix : public Matrix< Real, Device, Index >
      /**
       * \brief Returns value of matrix element at position given by its row and column index.
       * 
       * This method can be called only from the host system (CPU) no matter
       * where the matrix is allocated. If the matrix is allocated in GPU device
       * this methods transfer values of each matrix element separately and so the
       * This method can be called from the host system (CPU) no matter
       * where the matrix is allocated. If the matrix is allocated on GPU this method
       * can be called even from device kernels. If the matrix is allocated in GPU device
       * this method is called from CPU, it transfers values of each matrix element separately and so the
       * performance is very low. For higher performance see. \ref DenseMatrix::getRow
       * or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows.
       * 
@@ -518,21 +521,6 @@ class DenseMatrix : public Matrix< Real, Device, Index >
      template< typename Function >
      void forAllRows( Function& function );

      /**
       * \brief This method computes scalar product of given vector and one 
       *  row of the matrix.
       * 
       * \tparam Vector is type of input vector. It can be \ref Vector,
       *     \ref VectorView, \ref Array, \ref ArraView or similar container.
       * \param row is index of the row used for the scalar product.
       * \param vector is the input vector.
       * \return result of the matrix row and vector product.
       */
      template< typename Vector >
      __cuda_callable__
      typename Vector::RealType rowVectorProduct( const IndexType row,
                                                  const Vector& vector ) const;

      /**
       * \brief Computes product of matrix and vector.
       * 
@@ -546,7 +534,11 @@ class DenseMatrix : public Matrix< Real, Device, Index >
       */
      template< typename InVector, typename OutVector >
      void vectorProduct( const InVector& inVector,
                          OutVector& outVector ) const;
                          OutVector& outVector,
                          const RealType& matrixMultiplicator = 1.0,
                          const RealType& outVectorMultiplicator = 0.0,
                          const IndexType firstRow = 0,
                          const IndexType lastRow = 0 ) const;

      template< typename Matrix >
      void addMatrix( const Matrix& matrix,
+7 −16
Original line number Diff line number Diff line
@@ -416,20 +416,6 @@ forAllRows( Function& function )
   this->forRows( 0, this->getRows(), function );
}

template< typename Real,
          typename Device,
          typename Index,
          bool RowMajorOrder,
          typename RealAllocator >
   template< typename Vector >
__cuda_callable__
typename Vector::RealType 
DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::
rowVectorProduct( const IndexType row, const Vector& vector ) const
{
   return this->view.rowVectorProduct( row, vector );
}

template< typename Real,
          typename Device,
          typename Index,
@@ -439,9 +425,14 @@ template< typename Real,
             typename OutVector >
void
DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::
vectorProduct( const InVector& inVector, OutVector& outVector ) const
vectorProduct( const InVector& inVector,
               OutVector& outVector,
               const RealType& matrixMultiplicator,
               const RealType& outVectorMultiplicator,
               const IndexType firstRow,
               const IndexType lastRow ) const
{
   this->view.vectorProduct( inVector, outVector );
   this->view.vectorProduct( inVector, outVector, matrixMultiplicator, outVectorMultiplicator, firstRow, lastRow );
}

template< typename Real,
+5 −6
Original line number Diff line number Diff line
@@ -141,14 +141,13 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
      template< typename Function >
      void forAllRows( Function& function );

      template< typename Vector >
      __cuda_callable__
      typename Vector::RealType rowVectorProduct( const IndexType row,
                                                  const Vector& vector ) const;

      template< typename InVector, typename OutVector >
      void vectorProduct( const InVector& inVector,
                          OutVector& outVector ) const;
                          OutVector& outVector,
                          const RealType& matrixMultiplicator = 1.0,
                          const RealType& outVectorMultiplicator = 0.0,
                          const IndexType firstRow = 0,
                          IndexType lastRow = 0 ) const;

      template< typename Matrix >
      void addMatrix( const Matrix& matrix,
+10 −20
Original line number Diff line number Diff line
@@ -367,23 +367,6 @@ forAllRows( Function& function )
   this->forRows( 0, this->getRows(), function );
}

template< typename Real,
          typename Device,
          typename Index,
          bool RowMajorOrder >
   template< typename Vector >
__cuda_callable__
typename Vector::RealType
DenseMatrixView< Real, Device, Index, RowMajorOrder >::
rowVectorProduct( const IndexType row, const Vector& vector ) const
{
   RealType sum( 0.0 );
   // TODO: Fix this
   //for( IndexType column = 0; column < this->getColumns(); column++ )
   //   sum += this->getElementFast( row, column ) * vector[ column ];
   return sum;
}

template< typename Real,
          typename Device,
          typename Index,
@@ -392,7 +375,12 @@ template< typename Real,
             typename OutVector >
void
DenseMatrixView< Real, Device, Index, RowMajorOrder >::
vectorProduct( const InVector& inVector, OutVector& outVector ) const
vectorProduct( const InVector& inVector,
               OutVector& outVector,
               const RealType& matrixMultiplicator,
               const RealType& outVectorMultiplicator,
               const IndexType firstRow,
               IndexType lastRow ) const
{
   TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns count differs with input vector size." );
   TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows count differs with output vector size." );
@@ -400,13 +388,15 @@ vectorProduct( const InVector& inVector, OutVector& outVector ) const
   const auto inVectorView = inVector.getConstView();
   auto outVectorView = outVector.getView();
   const auto valuesView = this->values.getConstView();
   if( lastRow == 0 )
      lastRow = this->getRows();
   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType offset, bool& compute ) -> RealType {
      return valuesView[ offset ] * inVectorView[ column ];
   };
   auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
      outVectorView[ row ] = value;
      outVectorView[ row ] = matrixMultiplicator * value + outVectorMultiplicator * outVectorView[ row ];
   };
   this->segments.segmentsReduction( 0, this->getRows(), fetch, std::plus<>{}, keeper, ( RealType ) 0.0 );
   this->segments.segmentsReduction( firstRow, lastRow, fetch, std::plus<>{}, keeper, ( RealType ) 0.0 );
}

template< typename Real,