Commit b32d83f0 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Added forSegments to all segments and forRows (just in SparseMatrix for now).

parent 361d8421
Loading
Loading
Loading
Loading
+109 −98
Original line number Diff line number Diff line
@@ -15,9 +15,12 @@
#include <TNL/Algorithms/Segments/BiEllpackView.h>
#include <TNL/Algorithms/Segments/SegmentView.h>

namespace TNL {
   namespace Algorithms {
      namespace Segments {
namespace TNL
{
   namespace Algorithms
   {
      namespace Segments
      {

         template <typename Device,
                   typename Index,
@@ -27,16 +30,15 @@ template< typename Device,
         class BiEllpack
         {
         public:

            using DeviceType = Device;
            using IndexType = std::remove_const_t<Index>;
      using OffsetsHolder = Containers::Vector< Index, DeviceType, IndexType, IndexAllocator >;
            using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocator>;
            static constexpr ElementsOrganization getOrganization() { return Organization; }
      using ViewType = BiEllpackView< Device, Index, Organization >;
            using ViewType = BiEllpackView< Device, Index, Organization, WarpSize >;
            template <typename Device_, typename Index_>
      using ViewTemplate = BiEllpackView< Device_, Index_, Organization >;
      using ConstViewType = BiEllpackView< Device, std::add_const_t< IndexType >, Organization >;
      using SegmentViewType = BiEllpackSegmentView< IndexType, Organization >;
            using ViewTemplate = BiEllpackView<Device_, Index_, Organization, WarpSize >;
            using ConstViewType = typename ViewType::ConstViewType;
            using SegmentViewType = typename ViewType::SegmentViewType;

            static constexpr bool havePadding() { return true; };

@@ -60,7 +62,8 @@ class BiEllpack
       * \brief Number of segments.
       */
            __cuda_callable__
      IndexType getSegmentsCount() const;
                IndexType
                getSegmentsCount() const;

            /**
       * \brief Set sizes of particular segments.
@@ -76,16 +79,20 @@ class BiEllpack
       * \brief Number segments.
       */
            __cuda_callable__
      IndexType getSize() const;
                IndexType
                getSize() const;

            __cuda_callable__
      IndexType getStorageSize() const;
                IndexType
                getStorageSize() const;

            __cuda_callable__
      IndexType getGlobalIndex( const IndexType segmentIdx, const IndexType localIdx ) const;
                IndexType
                getGlobalIndex(const IndexType segmentIdx, const IndexType localIdx) const;

            __cuda_callable__
      SegmentViewType getSegmentView( const IndexType segmentIdx ) const;
                SegmentViewType
                getSegmentView(const IndexType segmentIdx) const;

            /***
       * \brief Go over all segments and for each segment element call
@@ -93,12 +100,17 @@ class BiEllpack
       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
       * is terminated.
       */
      template< typename Function, typename... Args >
      void forElements( IndexType first, IndexType last, Function& f, Args... args ) const;
            template <typename Function>
            void forElements(IndexType first, IndexType last, Function &&f) const;

            template <typename Function>
            void forEachElement(Function &&f) const;

      template< typename Function, typename... Args >
      void forEachElement( Function& f, Args... args ) const;
            template <typename Function>
            void forSegments(IndexType begin, IndexType end, Function &&f) const;

            template <typename Function>
            void forEachSegment(Function &&f) const;

            /***
       * \brief Go over all segments and perform a reduction in each of them.
@@ -129,7 +141,6 @@ class BiEllpack
            void computeColumnSizes(const SizesHolder &segmentsSizes);

         protected:

            static constexpr int getWarpSize() { return WarpSize; };

            static constexpr int getLogWarpSize() { return std::log2(WarpSize); };
+33 −6
Original line number Diff line number Diff line
@@ -443,12 +443,12 @@ template< typename Device,
          typename IndexAllocator,
          ElementsOrganization Organization,
          int WarpSize >
   template< typename Function, typename... Args >
   template< typename Function >
void
BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >::
forElements( IndexType first, IndexType last, Function& f, Args... args ) const
forElements( IndexType first, IndexType last, Function&& f ) const
{
   this->getConstView().forElements( first, last, f, args... );
   this->getConstView().forElements( first, last, f );
}

template< typename Device,
@@ -456,14 +456,41 @@ template< typename Device,
          typename IndexAllocator,
          ElementsOrganization Organization,
          int WarpSize >
   template< typename Function, typename... Args >
   template< typename Function >
void
BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >::
forEachElement( Function& f, Args... args ) const
forEachElement( Function&& f ) const
{
   this->forElements( 0, this->getSegmentsCount(), f, args... );
   this->forElements( 0, this->getSegmentsCount(), f );
}

template< typename Device,
          typename Index,
          typename IndexAllocator,
          ElementsOrganization Organization,
          int WarpSize >
   template< typename Function >
void
BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >::
forSegments( IndexType begin, IndexType end, Function&& f ) const
{
   this->getConstView().forSegments( begin, end, f );
}

template< typename Device,
          typename Index,
          typename IndexAllocator,
          ElementsOrganization Organization,
          int WarpSize >
   template< typename Function >
void
BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >::
forEachSegment( Function&& f ) const
{
   this->getConstView().forEachSegment( f );
}


template< typename Device,
          typename Index,
          typename IndexAllocator,
+12 −5
Original line number Diff line number Diff line
@@ -44,10 +44,11 @@ class BiEllpackSegmentView
       * \param groupsWidth is a static vector containing widths of the strip groups
       */
      __cuda_callable__
      BiEllpackSegmentView( const IndexType offset,
      BiEllpackSegmentView( const IndexType segmentIdx,
                            const IndexType offset,
                            const IndexType inStripIdx,
                            const GroupsWidthType& groupsWidth )
      : groupOffset( offset ), inStripIdx( inStripIdx ), segmentSize( TNL::sum( groupsWidth ) ), groupsWidth( groupsWidth ){};
      : segmentIdx( segmentIdx ), groupOffset( offset ), inStripIdx( inStripIdx ), segmentSize( TNL::sum( groupsWidth ) ), groupsWidth( groupsWidth ){};

      __cuda_callable__
      IndexType getSize() const
@@ -79,9 +80,15 @@ class BiEllpackSegmentView
            return offset + inStripIdx + localIdx * groupHeight;
      };

      __cuda_callable__
      const IndexType& getSegmentIndex() const
      {
         return this->segmentIdx;
      };

      protected:

         IndexType groupOffset, inStripIdx, segmentSize;
         IndexType segmentIdx, groupOffset, inStripIdx, segmentSize;

         GroupsWidthType groupsWidth;
};
+13 −8
Original line number Diff line number Diff line
@@ -32,13 +32,13 @@ class BiEllpackView

      using DeviceType = Device;
      using IndexType = std::remove_const_t< Index >;
      using OffsetsView = typename Containers::VectorView< Index, DeviceType, IndexType >;
      using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, IndexType >;
      using ConstOffsetsView = typename OffsetsView::ConstViewType;
      using ViewType = BiEllpackView;
      template< typename Device_, typename Index_ >
      using ViewTemplate = BiEllpackView< Device_, Index_ >;
      using ConstViewType = BiEllpackView< Device, std::add_const_t< Index > >;
      using SegmentViewType = BiEllpackSegmentView< IndexType, Organization >;
      using ViewTemplate = BiEllpackView< Device_, Index_, Organization, WarpSize >;
      using ConstViewType = BiEllpackView< Device, std::add_const_t< Index >, Organization, WarpSize >;
      using SegmentViewType = BiEllpackSegmentView< IndexType, Organization, WarpSize >;

      static constexpr bool havePadding() { return true; };

@@ -111,12 +111,17 @@ class BiEllpackView
       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
       * is terminated.
       */
      template< typename Function, typename... Args >
      void forElements( IndexType first, IndexType last, Function& f, Args... args ) const;
      template< typename Function >
      void forElements( IndexType first, IndexType last, Function&& f ) const;

      template< typename Function, typename... Args >
      void forEachElement( Function& f, Args... args ) const;
      template< typename Function >
      void forEachElement( Function&& f ) const;

      template< typename Function >
      void forSegments( IndexType begin, IndexType end, Function&& f ) const;

      template< typename Function >
      void forEachSegment( Function&& f ) const;

      /***
       * \brief Go over all segments and perform a reduction in each of them.
+42 −8
Original line number Diff line number Diff line
@@ -129,7 +129,12 @@ template< typename Device,
__cuda_callable__ auto BiEllpackView< Device, Index, Organization, WarpSize >::
getConstView() const -> const ConstViewType
{
   return ConstViewType( size, storageSize, virtualRows, rowPermArray.getConstView(), groupPointers.getConstView() );
   BiEllpackView* this_ptr = const_cast< BiEllpackView* >( this );
   return ConstViewType( size,
                         storageSize,
                         virtualRows,
                         this_ptr->rowPermArray.getView(),
                         this_ptr->groupPointers.getView() );
}

template< typename Device,
@@ -255,14 +260,14 @@ template< typename Device,
          typename Index,
          ElementsOrganization Organization,
          int WarpSize >
   template< typename Function, typename... Args >
   template< typename Function >
void
BiEllpackView< Device, Index, Organization, WarpSize >::
forElements( IndexType first, IndexType last, Function& f, Args... args ) const
forElements( IndexType first, IndexType last, Function&& f ) const
{
   const auto segmentsPermutationView = this->rowPermArray.getConstView();
   const auto groupPointersView = this->groupPointers.getConstView();
   auto work = [=] __cuda_callable__ ( IndexType segmentIdx, Args... args ) mutable {
   auto work = [=] __cuda_callable__ ( IndexType segmentIdx ) mutable {
      const IndexType strip = segmentIdx / getWarpSize();
      const IndexType firstGroupInStrip = strip * ( getLogWarpSize() + 1 );
      const IndexType rowStripPerm = segmentsPermutationView[ segmentIdx ] - strip * getWarpSize();
@@ -298,19 +303,48 @@ forElements( IndexType first, IndexType last, Function& f, Args... args ) const
         groupHeight /= 2;
      }
   };
   Algorithms::ParallelFor< DeviceType >::exec( first, last , work, args... );
   Algorithms::ParallelFor< DeviceType >::exec( first, last , work );
}

template< typename Device,
          typename Index,
          ElementsOrganization Organization,
          int WarpSize >
   template< typename Function, typename... Args >
   template< typename Function >
void
BiEllpackView< Device, Index, Organization, WarpSize >::
forEachElement( Function& f, Args... args ) const
forEachElement( Function&& f ) const
{
   this->forElements( 0, this->getSegmentsCount(), f, args... );
   this->forElements( 0, this->getSegmentsCount(), f );
}

template< typename Device,
          typename Index,
          ElementsOrganization Organization,
          int WarpSize >
   template< typename Function >
void
BiEllpackView< Device, Index, Organization, WarpSize >::
forSegments( IndexType begin, IndexType end, Function&& function ) const
{
   auto view = this->getConstView();
   auto f = [=] __cuda_callable__ ( IndexType segmentIdx ) mutable {
      auto segment = view.getSegmentView( segmentIdx );
      function( segment );
   };
   TNL::Algorithms::ParallelFor< DeviceType >::exec( begin, end, f );
}

template< typename Device,
          typename Index,
          ElementsOrganization Organization,
          int WarpSize >
   template< typename Function >
void
BiEllpackView< Device, Index, Organization, WarpSize >::
forEachSegment( Function&& f ) const
{
   this->forSegments( 0, this->getSegmentsCount(), f );
}

template< typename Device,
Loading