Commit af358d68 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Adding kernel type parameter to CSR segments.

parent 4160e723
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@ namespace TNL {

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_ = CSRScalar,
          typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > >
class CSR
{
@@ -36,6 +37,7 @@ class CSR
      using ViewType = CSRView< Device, Index >;
      using ConstViewType = CSRView< Device, std::add_const_t< IndexType > >;
      using SegmentViewType = SegmentView< IndexType, RowMajorOrder >;
      CSRKernelTypes KernelType = KernelType_;

      CSR();

@@ -114,8 +116,8 @@ class CSR

      CSR& operator=( const CSR& rhsSegments ) = default;

      template< typename Device_, typename Index_, typename IndexAllocator_ >
      CSR& operator=( const CSR< Device_, Index_, IndexAllocator_ >& source );
      template< typename Device_, typename Index_, CSRKernelTypes KernelType__, typename IndexAllocator_ >
      CSR& operator=( const CSR< Device_, Index_, KernelType__, IndexAllocator_ >& source );

      void save( File& file ) const;

+51 −28
Original line number Diff line number Diff line
@@ -22,16 +22,18 @@ namespace TNL {

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator >
CSR< Device, Index, IndexAllocator >::
CSR< Device, Index, KernelType_, IndexAllocator >::
CSR()
{
}

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator >
CSR< Device, Index, IndexAllocator >::
CSR< Device, Index, KernelType_, IndexAllocator >::
CSR( const SegmentsSizes& segmentsSizes )
{
   this->setSegmentsSizes( segmentsSizes );
@@ -39,16 +41,18 @@ CSR( const SegmentsSizes& segmentsSizes )

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator >
CSR< Device, Index, IndexAllocator >::
CSR< Device, Index, KernelType_, IndexAllocator >::
CSR( const CSR& csr ) : offsets( csr.offsets )
{
}

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator >
CSR< Device, Index, IndexAllocator >::
CSR< Device, Index, KernelType_, IndexAllocator >::
CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) )
{

@@ -56,9 +60,10 @@ CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) )

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator >
String
CSR< Device, Index, IndexAllocator >::
CSR< Device, Index, KernelType_, IndexAllocator >::
getSerializationType()
{
   return "CSR< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
@@ -66,9 +71,10 @@ getSerializationType()

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator >
String
CSR< Device, Index, IndexAllocator >::
CSR< Device, Index, KernelType_, IndexAllocator >::
getSegmentsType()
{
   return ViewType::getSegmentsType();
@@ -76,10 +82,11 @@ getSegmentsType()

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator >
   template< typename SizesHolder >
void
CSR< Device, Index, IndexAllocator >::
CSR< Device, Index, KernelType_, IndexAllocator >::
setSegmentsSizes( const SizesHolder& sizes )
{
   details::CSR< Device, Index >::setSegmentsSizes( sizes, this->offsets );
@@ -87,9 +94,10 @@ setSegmentsSizes( const SizesHolder& sizes )

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator >
void
CSR< Device, Index, IndexAllocator >::
CSR< Device, Index, KernelType_, IndexAllocator >::
reset()
{
   this->offsets.setSize( 1 );
@@ -99,9 +107,10 @@ reset()

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator >
typename CSR< Device, Index, IndexAllocator >::ViewType
CSR< Device, Index, IndexAllocator >::
typename CSR< Device, Index, KernelType_, IndexAllocator >::ViewType
CSR< Device, Index, KernelType_, IndexAllocator >::
getView()
{
   return ViewType( this->offsets.getView() );
@@ -109,9 +118,10 @@ getView()

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator >
auto
CSR< Device, Index, IndexAllocator >::
CSR< Device, Index, KernelType_, IndexAllocator >::
getConstView() const -> const ConstViewType
{
   return ConstViewType( this->offsets.getConstView() );
@@ -119,8 +129,9 @@ getConstView() const -> const ConstViewType

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator >
__cuda_callable__ auto CSR< Device, Index, IndexAllocator >::
__cuda_callable__ auto CSR< Device, Index, KernelType_, IndexAllocator >::
getSegmentsCount() const -> IndexType
{
   return this->offsets.getSize() - 1;
@@ -128,8 +139,9 @@ getSegmentsCount() const -> IndexType

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator >
__cuda_callable__ auto CSR< Device, Index, IndexAllocator >::
__cuda_callable__ auto CSR< Device, Index, KernelType_, IndexAllocator >::
getSegmentSize( const IndexType segmentIdx ) const -> IndexType
{
   return details::CSR< Device, Index >::getSegmentSize( this->offsets, segmentIdx );
@@ -137,8 +149,9 @@ getSegmentSize( const IndexType segmentIdx ) const -> IndexType

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator >
__cuda_callable__ auto CSR< Device, Index, IndexAllocator >::
__cuda_callable__ auto CSR< Device, Index, KernelType_, IndexAllocator >::
getSize() const -> IndexType
{
   return this->getStorageSize();
@@ -146,8 +159,9 @@ getSize() const -> IndexType

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator >
__cuda_callable__ auto CSR< Device, Index, IndexAllocator >::
__cuda_callable__ auto CSR< Device, Index, KernelType_, IndexAllocator >::
getStorageSize() const -> IndexType
{
   return details::CSR< Device, Index >::getStorageSize( this->offsets );
@@ -155,8 +169,9 @@ getStorageSize() const -> IndexType

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator >
__cuda_callable__ auto CSR< Device, Index, IndexAllocator >::
__cuda_callable__ auto CSR< Device, Index, KernelType_, IndexAllocator >::
getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexType
{
   if( ! std::is_same< DeviceType, Devices::Host >::value )
@@ -172,10 +187,11 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator >
__cuda_callable__
auto
CSR< Device, Index, IndexAllocator >::
CSR< Device, Index, KernelType_, IndexAllocator >::
getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
{
   return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] );
@@ -183,10 +199,11 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator >
   template< typename Function, typename... Args >
void
CSR< Device, Index, IndexAllocator >::
CSR< Device, Index, KernelType_, IndexAllocator >::
forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
{
   this->getConstView().forSegments( first, last, f, args... );
@@ -194,10 +211,11 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator>
   template< typename Function, typename... Args >
void
CSR< Device, Index, IndexAllocator >::
CSR< Device, Index, KernelType_, IndexAllocator >::
forAll( Function& f, Args... args ) const
{
   this->forSegments( 0, this->getSegmentsCount(), f, args... );
@@ -205,10 +223,11 @@ forAll( Function& f, Args... args ) const

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator >
   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
void
CSR< Device, Index, IndexAllocator >::
CSR< Device, Index, KernelType_, IndexAllocator >::
segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
{
   this->getConstView().segmentsReduction( first, last, fetch, reduction, keeper, zero, args... );
@@ -216,10 +235,11 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reductio

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator >
   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
void
CSR< Device, Index, IndexAllocator >::
CSR< Device, Index, KernelType_, IndexAllocator >::
allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
{
   this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
@@ -227,11 +247,12 @@ allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, co

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator >
   template< typename Device_, typename Index_, typename IndexAllocator_ >
CSR< Device, Index, IndexAllocator >&
CSR< Device, Index, IndexAllocator >::
operator=( const CSR< Device_, Index_, IndexAllocator_ >& source )
   template< typename Device_, typename Index_, CSRKernelTypes KernelType__, typename IndexAllocator_ >
CSR< Device, Index, KernelType_, IndexAllocator >&
CSR< Device, Index, KernelType_, IndexAllocator >::
operator=( const CSR< Device_, Index_, KernelType__, IndexAllocator_ >& source )
{
   this->offsets = source.offsets;
   return *this;
@@ -239,9 +260,10 @@ operator=( const CSR< Device_, Index_, IndexAllocator_ >& source )

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator >
void
CSR< Device, Index, IndexAllocator >::
CSR< Device, Index, KernelType_, IndexAllocator >::
save( File& file ) const
{
   file << this->offsets;
@@ -249,9 +271,10 @@ save( File& file ) const

template< typename Device,
          typename Index,
          CSRKernelTypes KernelType_,
          typename IndexAllocator >
void
CSR< Device, Index, IndexAllocator >::
CSR< Device, Index, KernelType_, IndexAllocator >::
load( File& file )
{
   file >> this->offsets;
+6 −2
Original line number Diff line number Diff line
@@ -19,8 +19,11 @@ namespace TNL {
   namespace Algorithms {
      namespace Segments {

enum CSRKernelTypes { CSRScalarKernel, CSRVectorKernel, CSRLightKernel };

template< typename Device,
          typename Index >
          typename Index,
          CSRKernelTypes KernelType_ = CSRScalar >
class CSRView
{
   public:
@@ -34,6 +37,7 @@ class CSRView
      using ViewTemplate = CSRView< Device_, Index_ >;
      using ConstViewType = CSRView< Device, std::add_const_t< Index > >;
      using SegmentViewType = SegmentView< IndexType, RowMajorOrder >;
      CSRKernelTypes KernelType = KernelType_;

      __cuda_callable__
      CSRView();
+81 −56
Original line number Diff line number Diff line
@@ -22,122 +22,136 @@ namespace TNL {


template< typename Device,
          typename Index >
          typename Index,
          CSRKernelTypes KernelType_ >
__cuda_callable__
CSRView< Device, Index >::
CSRView< Device, Index, KernelType_ >::
CSRView()
{
}

template< typename Device,
          typename Index >
          typename Index,
          CSRKernelTypes KernelType_ >
__cuda_callable__
CSRView< Device, Index >::
CSRView< Device, Index, KernelType_ >::
CSRView( const OffsetsView& offsets_view )
   : offsets( offsets_view )
{
}

template< typename Device,
          typename Index >
          typename Index,
          CSRKernelTypes KernelType_ >
__cuda_callable__
CSRView< Device, Index >::
CSRView< Device, Index, KernelType_ >::
CSRView( const OffsetsView&& offsets_view )
   : offsets( offsets_view )
{
}

template< typename Device,
          typename Index >
          typename Index,
          CSRKernelTypes KernelType_ >
__cuda_callable__
CSRView< Device, Index >::
CSRView< Device, Index, KernelType_ >::
CSRView( const CSRView& csr_view )
   : offsets( csr_view.offsets )
{
}

template< typename Device,
          typename Index >
          typename Index,
          CSRKernelTypes KernelType_ >
__cuda_callable__
CSRView< Device, Index >::
CSRView< Device, Index, KernelType_ >::
CSRView( const CSRView&& csr_view )
   : offsets( std::move( csr_view.offsets ) )
{
}

template< typename Device,
          typename Index >
          typename Index,
          CSRKernelTypes KernelType_ >
String
CSRView< Device, Index >::
CSRView< Device, Index, KernelType_ >::
getSerializationType()
{
   return "CSR< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
}

template< typename Device,
          typename Index >
          typename Index,
          CSRKernelTypes KernelType_ >
String
CSRView< Device, Index >::
CSRView< Device, Index, KernelType_ >::
getSegmentsType()
{
   return "CSR";
}

template< typename Device,
          typename Index >
          typename Index,
          CSRKernelTypes KernelType_ >
__cuda_callable__
typename CSRView< Device, Index >::ViewType
CSRView< Device, Index >::
typename CSRView< Device, Index, KernelType_ >::ViewType
CSRView< Device, Index, KernelType_ >::
getView()
{
   return ViewType( this->offsets );
}

template< typename Device,
          typename Index >
          typename Index,
          CSRKernelTypes KernelType_ >
__cuda_callable__
auto
CSRView< Device, Index >::
CSRView< Device, Index, KernelType_ >::
getConstView() const -> const ConstViewType
{
   return ConstViewType( this->offsets.getConstView() );
}

template< typename Device,
          typename Index >
__cuda_callable__ auto CSRView< Device, Index >::
          typename Index,
          CSRKernelTypes KernelType_ >
__cuda_callable__ auto CSRView< Device, Index, KernelType_ >::
getSegmentsCount() const -> IndexType
{
   return this->offsets.getSize() - 1;
}

template< typename Device,
          typename Index >
__cuda_callable__ auto CSRView< Device, Index >::
          typename Index,
          CSRKernelTypes KernelType_ >
__cuda_callable__ auto CSRView< Device, Index, KernelType_ >::
getSegmentSize( const IndexType segmentIdx ) const -> IndexType
{
   return details::CSR< Device, Index >::getSegmentSize( this->offsets, segmentIdx );
}

template< typename Device,
          typename Index >
__cuda_callable__ auto CSRView< Device, Index >::
          typename Index,
          CSRKernelTypes KernelType_ >
__cuda_callable__ auto CSRView< Device, Index, KernelType_ >::
getSize() const -> IndexType
{
   return this->getStorageSize();
}

template< typename Device,
          typename Index >
__cuda_callable__ auto CSRView< Device, Index >::
          typename Index,
          CSRKernelTypes KernelType_ >
__cuda_callable__ auto CSRView< Device, Index, KernelType_ >::
getStorageSize() const -> IndexType
{
   return details::CSR< Device, Index >::getStorageSize( this->offsets );
}

template< typename Device,
          typename Index >
__cuda_callable__ auto CSRView< Device, Index >::
          typename Index,
          CSRKernelTypes KernelType_ >
__cuda_callable__ auto CSRView< Device, Index, KernelType_ >::
getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexType
{
   if( ! std::is_same< DeviceType, Devices::Host >::value )
@@ -152,20 +166,22 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp
}

template< typename Device,
          typename Index >
          typename Index,
          CSRKernelTypes KernelType_ >
__cuda_callable__
auto
CSRView< Device, Index >::
CSRView< Device, Index, KernelType_ >::
getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
{
   return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ], 1 );
}

template< typename Device,
          typename Index >
          typename Index,
          CSRKernelTypes KernelType_ >
   template< typename Function, typename... Args >
void
CSRView< Device, Index >::
CSRView< Device, Index, KernelType_ >::
forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
{
   const auto offsetsView = this->offsets;
@@ -181,24 +197,28 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
}

template< typename Device,
          typename Index >
          typename Index,
          CSRKernelTypes KernelType_ >
   template< typename Function, typename... Args >
void
CSRView< Device, Index >::
CSRView< Device, Index, KernelType_ >::
forAll( Function& f, Args... args ) const
{
   this->forSegments( 0, this->getSegmentsCount(), f, args... );
}

template< typename Device,
          typename Index >
          typename Index,
          CSRKernelTypes KernelType_ >
   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
void
CSRView< Device, Index >::
CSRView< Device, Index, KernelType_ >::
segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
{
   using RealType = typename details::FetchLambdaAdapter< Index, Fetch >::ReturnType;
   const auto offsetsView = this->offsets.getConstView();
   if( KernelType == CSRScalar )
   {
      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
         const IndexType begin = offsetsView[ segmentIdx ];
         const IndexType end = offsetsView[ segmentIdx + 1 ];
@@ -211,21 +231,24 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reductio
      };
   Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
   }
}

template< typename Device,
          typename Index >
          typename Index,
          CSRKernelTypes KernelType_ >
   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
void
CSRView< Device, Index >::
CSRView< Device, Index, KernelType_ >::
allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
{
   this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
}

template< typename Device,
          typename Index >
CSRView< Device, Index >&
CSRView< Device, Index >::
          typename Index,
          CSRKernelTypes KernelType_ >
CSRView< Device, Index, KernelType_ >&
CSRView< Device, Index, KernelType_ >::
operator=( const CSRView& view )
{
   this->offsets.bind( view.offsets );
@@ -233,18 +256,20 @@ operator=( const CSRView& view )
}

template< typename Device,
          typename Index >
          typename Index,
          CSRKernelTypes KernelType_ >
void
CSRView< Device, Index >::
CSRView< Device, Index, KernelType_ >::
save( File& file ) const
{
   file << this->offsets;
}

template< typename Device,
          typename Index >
          typename Index,
          CSRKernelTypes KernelType_ >
void
CSRView< Device, Index >::
CSRView< Device, Index, KernelType_ >::
load( File& file )
{
   file >> this->offsets;