Commit 5bae44a6 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Added computing of non-zero elements in matrix row for CUDA.

parent 550b90f0
Loading
Loading
Loading
Loading
+6 −2
Original line number Diff line number Diff line
@@ -41,7 +41,8 @@ private:

public:

   typedef Real RealType;
   using RealType = Real;
   //typedef Real RealType;
   typedef Device DeviceType;
   typedef Index IndexType;
   typedef typename Sparse< RealType, DeviceType, IndexType >:: CompressedRowLengthsVector CompressedRowLengthsVector;
@@ -51,7 +52,10 @@ public:
   typedef CSR< Real, Devices::Cuda, Index > CudaType;
   typedef Sparse< Real, Device, Index > BaseType;
   typedef typename BaseType::MatrixRow MatrixRow;
   typedef SparseRow< const RealType, const IndexType > ConstMatrixRow;
   
   using ConstMatrixRow = typename BaseType::ConstMatrixRow;
   //using typename BaseType::ConstMatrixRow;
   //typedef SparseRow< const RealType, const IndexType > ConstMatrixRow;


   enum SPMVCudaKernel { scalar, vector, hybrid };
+38 −3
Original line number Diff line number Diff line
@@ -131,13 +131,38 @@ Index CSR< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
   return this->rowPointers[ row + 1 ] - this->rowPointers[ row ];
}

// TODO: presunout do SparseRow
template< typename MatrixRow >
__global__ void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result )
{
   int threadId = blockIdx.x * blockDim.x + threadIdx.x;
   if( threadId == 0 )
   {
      result = row->getNonZeroElementsCount();
   }
}

template< typename Real,
          typename Device,
          typename Index >
Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const
{  
   if( std::is_same< DeviceType, Devices::Host >::value )
   {
      ConstMatrixRow matrixRow = this->getRow( row );
    return matrixRow.getNonZeroElementsCount( TNL::String( Device::getDeviceType() ) );
      return matrixRow.getNonZeroElementsCount();
   }
   if( std::is_same< DeviceType, Devices::Cuda >::value )
   {
      ConstMatrixRow matrixRow = this->getRow( row );
      IndexType resultHost;
      IndexType* resultCuda = Devices::Cuda::passToDevice( resultHost );
      getNonZeroRowLengthCudaKernel<<< 1, 1 >>>( row, &resultCuda );
      resultHost = Devices::Cuda::passFromDevice( resultCuda );
      Devices::Cuda::freeFromDevice( resultCuda );
      return resultHost;
   }
   
    // getRow() was throwing segmentation faults.
    // FOR THIS TO WORK, I had to change getRow() from [ rowIndex ] to .getElement( rowIndex ).
    
@@ -159,6 +184,16 @@ Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) con
//    return elementCount;
}

template< typename Real,
          typename Device,
          typename Index >
__cuda_callable__
Index CSR< Real, Device, Index >::getNonZeroRowLengthFast( const IndexType row ) const
{  
   ConstMatrixRow matrixRow = this->getRow( row );
   return matrixRow.getNonZeroElementsCount();
}

template< typename Real,
          typename Device,
          typename Index >
+1 −0
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@ class Sparse : public Matrix< Real, Device, Index >
   typedef Containers::Vector< IndexType, DeviceType, IndexType > ColumnIndexesVector;
   typedef Matrix< Real, Device, Index > BaseType;
   typedef SparseRow< RealType, IndexType > MatrixRow;
   typedef SparseRow< const RealType, const IndexType > ConstMatrixRow;

   Sparse();

+4 −1
Original line number Diff line number Diff line
@@ -21,6 +21,9 @@ namespace Matrices {
template< typename Real, typename Index >
class SparseRow
{
   using RealType = Real;
   using IndexType = Index;
   
   public:

      __cuda_callable__
@@ -53,7 +56,7 @@ class SparseRow
      Index getLength() const;
      
      __cuda_callable__
      Index getNonZeroElementsCount( TNL::String deviceType ) const;
      Index getNonZeroElementsCount() const;

      void print( std::ostream& str ) const;

+1 −1
Original line number Diff line number Diff line
@@ -116,7 +116,7 @@ template< typename Real, typename Index >
__cuda_callable__
Index
SparseRow< Real, Index >::
getNonZeroElementsCount( TNL::String deviceType ) const
getNonZeroElementsCount() const
{
    using NonConstIndex = typename std::remove_const< Index >::type;