From 91806e1de466b3c06aea63811c9dcb6672341079 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Tue, 9 Feb 2021 20:16:45 +0100
Subject: [PATCH] Moving implementation of CSRAdaptiveKernel to .hpp file.

---
 .../Algorithms/Segments/CSRAdaptiveKernel.h   |  94 ++----------
 .../Algorithms/Segments/CSRAdaptiveKernel.hpp | 143 +++++++++++++++++-
 2 files changed, 152 insertions(+), 85 deletions(-)

diff --git a/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.h b/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.h
index 899e22ff9a..6b64f1a851 100644
--- a/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.h
+++ b/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.h
@@ -63,10 +63,7 @@ struct CSRAdaptiveKernel
    using BlocksType = typename ViewType::BlocksType;
    using BlocksView = typename BlocksType::ViewType;
 
-   static TNL::String getKernelType()
-   {
-      return ViewType::getKernelType();
-   };
+   static TNL::String getKernelType();
 
     static constexpr Index THREADS_ADAPTIVE = sizeof(Index) == 8 ? 128 : 256;
 
@@ -93,84 +90,16 @@ struct CSRAdaptiveKernel
                     const Offsets& offsets,
                     const Index size,
                     details::Type &type,
-                    Index &sum )
-   {
-      sum = 0;
-      for (Index current = start; current < size - 1; current++ )
-      {
-         Index elements = offsets[ current + 1 ] - offsets[ current ];
-         sum += elements;
-         if( sum > SHARED_PER_WARP )
-         {
-            if( current - start > 0 ) // extra row
-            {
-               type = details::Type::STREAM;
-               return current;
-            }
-            else
-            {                  // one long row
-               if( sum <= 2 * MAX_ELEMENTS_PER_WARP_ADAPT )
-                  type = details::Type::VECTOR;
-               else
-                  type = details::Type::LONG;
-               return current + 1;
-            }
-         }
-      }
-      type = details::Type::STREAM;
-      return size - 1; // return last row pointer
-    }
+                    Index &sum );
 
    template< typename Offsets >
-   void init( const Offsets& offsets )
-   {
-      using HostOffsetsType = TNL::Containers::Vector< typename Offsets::IndexType, TNL::Devices::Host, typename Offsets::IndexType >;
-      HostOffsetsType hostOffsets( offsets );
-      const Index rows = offsets.getSize();
-      Index sum, start( 0 ), nextStart( 0 );
-
-      // Fill blocks
-      std::vector< details::CSRAdaptiveKernelBlockDescriptor< Index > > inBlocks;
-      inBlocks.reserve( rows );
-
-      while( nextStart != rows - 1 )
-      {
-         details::Type type;
-         nextStart = findLimit( start, hostOffsets, rows, type, sum );
-
-         if( type == details::Type::LONG )
-         {
-            const Index blocksCount = inBlocks.size();
-            const Index warpsPerCudaBlock = THREADS_ADAPTIVE / TNL::Cuda::getWarpSize();
-            Index warpsLeft = roundUpDivision( blocksCount, warpsPerCudaBlock ) * warpsPerCudaBlock - blocksCount;
-            if( warpsLeft == 0 )
-               warpsLeft = warpsPerCudaBlock;
-            for( Index index = 0; index < warpsLeft; index++ )
-               inBlocks.emplace_back( start, details::Type::LONG, index, warpsLeft );
-         }
-         else
-         {
-            inBlocks.emplace_back(start, type,
-                  nextStart,
-                  offsets.getElement(nextStart),
-                  offsets.getElement(start) );
-         }
-         start = nextStart;
-      }
-      inBlocks.emplace_back(nextStart);
-      this->blocks = inBlocks;
-      this->view.setBlocks( blocks );
-   }
-
-   void reset()
-   {
-      this->blocks.reset();
-      this->view.setBlocks( blocks );
-   }
-
-   ViewType getView() { return this->view; };
-
-   ConstViewType getConstView() const { return this->view; };
+   void init( const Offsets& offsets );
+
+   void reset();
+
+   ViewType getView();
+
+   ConstViewType getConstView() const;
 
    template< typename OffsetsView,
               typename Fetch,
@@ -185,10 +114,7 @@ struct CSRAdaptiveKernel
                         const Reduction& reduction,
                         ResultKeeper& keeper,
                         const Real& zero,
-                        Args... args ) const
-   {
-      view.segmentsReduction( offsets, first, last, fetch, reduction, keeper, zero, args... );
-   }
+                        Args... args ) const;
 
    protected:
       BlocksType blocks;
diff --git a/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.hpp b/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.hpp
index b795a52f57..4c53a83ca3 100644
--- a/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.hpp
+++ b/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.hpp
@@ -22,7 +22,148 @@ namespace TNL {
    namespace Algorithms {
       namespace Segments {
 
+template< typename Index,
+          typename Device >
+TNL::String
+CSRAdaptiveKernel< Index, Device >::
+getKernelType()
+{
+   return ViewType::getKernelType();
+};
+
+
+template< typename Index,
+          typename Device >
+   template< typename Offsets >
+Index
+CSRAdaptiveKernel< Index, Device >::
+findLimit( const Index start,
+           const Offsets& offsets,
+           const Index size,
+           details::Type &type,
+           Index &sum )
+{
+   sum = 0;
+   for (Index current = start; current < size - 1; current++ )
+   {
+      Index elements = offsets[ current + 1 ] - offsets[ current ];
+      sum += elements;
+      if( sum > SHARED_PER_WARP )
+      {
+         if( current - start > 0 ) // extra row
+         {
+            type = details::Type::STREAM;
+            return current;
+         }
+         else
+         {                  // one long row
+            if( sum <= 2 * MAX_ELEMENTS_PER_WARP_ADAPT )
+               type = details::Type::VECTOR;
+            else
+               type = details::Type::LONG;
+            return current + 1;
+         }
+      }
+   }
+   type = details::Type::STREAM;
+   return size - 1; // return last row pointer
+   }
+
+template< typename Index,
+          typename Device >
+   template< typename Offsets >
+void
+CSRAdaptiveKernel< Index, Device >::
+init( const Offsets& offsets )
+{
+   using HostOffsetsType = TNL::Containers::Vector< typename Offsets::IndexType, TNL::Devices::Host, typename Offsets::IndexType >;
+   HostOffsetsType hostOffsets( offsets );
+   const Index rows = offsets.getSize();
+   Index sum, start( 0 ), nextStart( 0 );
+
+   // Fill blocks
+   std::vector< details::CSRAdaptiveKernelBlockDescriptor< Index > > inBlocks;
+   inBlocks.reserve( rows );
+
+   while( nextStart != rows - 1 )
+   {
+      details::Type type;
+      nextStart = findLimit( start, hostOffsets, rows, type, sum );
+
+      if( type == details::Type::LONG )
+      {
+         const Index blocksCount = inBlocks.size();
+         const Index warpsPerCudaBlock = THREADS_ADAPTIVE / TNL::Cuda::getWarpSize();
+         Index warpsLeft = roundUpDivision( blocksCount, warpsPerCudaBlock ) * warpsPerCudaBlock - blocksCount;
+         if( warpsLeft == 0 )
+            warpsLeft = warpsPerCudaBlock;
+         for( Index index = 0; index < warpsLeft; index++ )
+            inBlocks.emplace_back( start, details::Type::LONG, index, warpsLeft );
+      }
+      else
+      {
+         inBlocks.emplace_back(start, type,
+               nextStart,
+               offsets.getElement(nextStart),
+               offsets.getElement(start) );
+      }
+      start = nextStart;
+   }
+   inBlocks.emplace_back(nextStart);
+   this->blocks = inBlocks;
+   this->view.setBlocks( blocks );
+}
+
+template< typename Index,
+          typename Device >
+void
+CSRAdaptiveKernel< Index, Device >::
+reset()
+{
+   this->blocks.reset();
+   this->view.setBlocks( blocks );
+}
+
+template< typename Index,
+          typename Device >
+auto
+CSRAdaptiveKernel< Index, Device >::
+getView() -> ViewType
+{
+   return this->view;
+}
+
+template< typename Index,
+          typename Device >
+auto
+CSRAdaptiveKernel< Index, Device >::
+getConstView() const -> ConstViewType
+{
+   return this->view;
+};
+
+template< typename Index,
+          typename Device >
+   template< typename OffsetsView,
+               typename Fetch,
+               typename Reduction,
+               typename ResultKeeper,
+               typename Real,
+               typename... Args >
+void
+CSRAdaptiveKernel< Index, Device >::
+segmentsReduction( const OffsetsView& offsets,
+                   Index first,
+                   Index last,
+                   Fetch& fetch,
+                   const Reduction& reduction,
+                   ResultKeeper& keeper,
+                   const Real& zero,
+                   Args... args ) const
+{
+   view.segmentsReduction( offsets, first, last, fetch, reduction, keeper, zero, args... );
+}
 
       } // namespace Segments
    }  // namespace Algorithms
-} // namespace TNL
\ No newline at end of file
+} // namespace TNL
-- 
GitLab