Moving implementation of CSRAdaptiveKernel to .hpp file.

91806e1d · Tomáš Oberhuber · 37632fd9 · 91806e1d · 91806e1d
Commit 91806e1d authored 4 years ago by Tomáš Oberhuber
--- a/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.h
+++ b/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.h
@@ -63,10 +63,7 @@ struct CSRAdaptiveKernel
   using BlocksType = typename ViewType::BlocksType;
   using BlocksView = typename BlocksType::ViewType;
-   static TNL::String getKernelType()
+   static TNL::String getKernelType();
-   {
-      return ViewType::getKernelType();
-   };
    static constexpr Index THREADS_ADAPTIVE = sizeof(Index) == 8 ? 128 : 256;
@@ -93,84 +90,16 @@ struct CSRAdaptiveKernel
                    const Offsets& offsets,
                    const Index size,
                    details::Type &type,
-                    Index &sum )
+                    Index &sum );
-   {
-      sum = 0;
-      for (Index current = start; current < size - 1; current++ )
-      {
-         Index elements = offsets[ current + 1 ] - offsets[ current ];
-         sum += elements;
-         if( sum > SHARED_PER_WARP )
-         {
-            if( current - start > 0 ) // extra row
-            {
-               type = details::Type::STREAM;
-               return current;
-            }
-            else
-            {                  // one long row
-               if( sum <= 2 * MAX_ELEMENTS_PER_WARP_ADAPT )
-                  type = details::Type::VECTOR;
-               else
-                  type = details::Type::LONG;
-               return current + 1;
-            }
-         }
-      }
-      type = details::Type::STREAM;
-      return size - 1; // return last row pointer
-    }
   template< typename Offsets >
-   void init( const Offsets& offsets )
+   void init( const Offsets& offsets );
-   {
-      using HostOffsetsType = TNL::Containers::Vector< typename Offsets::IndexType, TNL::Devices::Host, typename Offsets::IndexType >;
+   void reset();
-      HostOffsetsType hostOffsets( offsets );
-      const Index rows = offsets.getSize();
+   ViewType getView();
-      Index sum, start( 0 ), nextStart( 0 );
+   ConstViewType getConstView() const;
-      // Fill blocks
-      std::vector< details::CSRAdaptiveKernelBlockDescriptor< Index > > inBlocks;
-      inBlocks.reserve( rows );
-      while( nextStart != rows - 1 )
-      {
-         details::Type type;
-         nextStart = findLimit( start, hostOffsets, rows, type, sum );
-         if( type == details::Type::LONG )
-         {
-            const Index blocksCount = inBlocks.size();
-            const Index warpsPerCudaBlock = THREADS_ADAPTIVE / TNL::Cuda::getWarpSize();
-            Index warpsLeft = roundUpDivision( blocksCount, warpsPerCudaBlock ) * warpsPerCudaBlock - blocksCount;
-            if( warpsLeft == 0 )
-               warpsLeft = warpsPerCudaBlock;
-            for( Index index = 0; index < warpsLeft; index++ )
-               inBlocks.emplace_back( start, details::Type::LONG, index, warpsLeft );
-         }
-         else
-         {
-            inBlocks.emplace_back(start, type,
-                  nextStart,
-                  offsets.getElement(nextStart),
-                  offsets.getElement(start) );
-         }
-         start = nextStart;
-      }
-      inBlocks.emplace_back(nextStart);
-      this->blocks = inBlocks;
-      this->view.setBlocks( blocks );
-   }
-   void reset()
-   {
-      this->blocks.reset();
-      this->view.setBlocks( blocks );
-   }
-   ViewType getView() { return this->view; };
-   ConstViewType getConstView() const { return this->view; };
   template< typename OffsetsView,
              typename Fetch,
@@ -185,10 +114,7 @@ struct CSRAdaptiveKernel
                        const Reduction& reduction,
                        ResultKeeper& keeper,
                        const Real& zero,
-                        Args... args ) const
+                        Args... args ) const;
-   {
-      view.segmentsReduction( offsets, first, last, fetch, reduction, keeper, zero, args... );
-   }
   protected:
      BlocksType blocks;

--- a/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.hpp
+++ b/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.hpp
@@ -22,7 +22,148 @@ namespace TNL {
   namespace Algorithms {
      namespace Segments {
+template< typename Index,
+          typename Device >
+TNL::String
+CSRAdaptiveKernel< Index, Device >::
+getKernelType()
+{
+   return ViewType::getKernelType();
+};
+template< typename Index,
+          typename Device >
+   template< typename Offsets >
+Index
+CSRAdaptiveKernel< Index, Device >::
+findLimit( const Index start,
+           const Offsets& offsets,
+           const Index size,
+           details::Type &type,
+           Index &sum )
+{
+   sum = 0;
+   for (Index current = start; current < size - 1; current++ )
+   {
+      Index elements = offsets[ current + 1 ] - offsets[ current ];
+      sum += elements;
+      if( sum > SHARED_PER_WARP )
+      {
+         if( current - start > 0 ) // extra row
+         {
+            type = details::Type::STREAM;
+            return current;
+         }
+         else
+         {                  // one long row
+            if( sum <= 2 * MAX_ELEMENTS_PER_WARP_ADAPT )
+               type = details::Type::VECTOR;
+            else
+               type = details::Type::LONG;
+            return current + 1;
+         }
+      }
+   }
+   type = details::Type::STREAM;
+   return size - 1; // return last row pointer
+   }
+template< typename Index,
+          typename Device >
+   template< typename Offsets >
+void
+CSRAdaptiveKernel< Index, Device >::
+init( const Offsets& offsets )
+{
+   using HostOffsetsType = TNL::Containers::Vector< typename Offsets::IndexType, TNL::Devices::Host, typename Offsets::IndexType >;
+   HostOffsetsType hostOffsets( offsets );
+   const Index rows = offsets.getSize();
+   Index sum, start( 0 ), nextStart( 0 );
+   // Fill blocks
+   std::vector< details::CSRAdaptiveKernelBlockDescriptor< Index > > inBlocks;
+   inBlocks.reserve( rows );
+   while( nextStart != rows - 1 )
+   {
+      details::Type type;
+      nextStart = findLimit( start, hostOffsets, rows, type, sum );
+      if( type == details::Type::LONG )
+      {
+         const Index blocksCount = inBlocks.size();
+         const Index warpsPerCudaBlock = THREADS_ADAPTIVE / TNL::Cuda::getWarpSize();
+         Index warpsLeft = roundUpDivision( blocksCount, warpsPerCudaBlock ) * warpsPerCudaBlock - blocksCount;
+         if( warpsLeft == 0 )
+            warpsLeft = warpsPerCudaBlock;
+         for( Index index = 0; index < warpsLeft; index++ )
+            inBlocks.emplace_back( start, details::Type::LONG, index, warpsLeft );
+      }
+      else
+      {
+         inBlocks.emplace_back(start, type,
+               nextStart,
+               offsets.getElement(nextStart),
+               offsets.getElement(start) );
+      }
+      start = nextStart;
+   }
+   inBlocks.emplace_back(nextStart);
+   this->blocks = inBlocks;
+   this->view.setBlocks( blocks );
+}
+template< typename Index,
+          typename Device >
+void
+CSRAdaptiveKernel< Index, Device >::
+reset()
+{
+   this->blocks.reset();
+   this->view.setBlocks( blocks );
+}
+template< typename Index,
+          typename Device >
+auto
+CSRAdaptiveKernel< Index, Device >::
+getView() -> ViewType
+{
+   return this->view;
+}
+template< typename Index,
+          typename Device >
+auto
+CSRAdaptiveKernel< Index, Device >::
+getConstView() const -> ConstViewType
+{
+   return this->view;
+};
+template< typename Index,
+          typename Device >
+   template< typename OffsetsView,
+               typename Fetch,
+               typename Reduction,
+               typename ResultKeeper,
+               typename Real,
+               typename... Args >
+void
+CSRAdaptiveKernel< Index, Device >::
+segmentsReduction( const OffsetsView& offsets,
+                   Index first,
+                   Index last,
+                   Fetch& fetch,
+                   const Reduction& reduction,
+                   ResultKeeper& keeper,
+                   const Real& zero,
+                   Args... args ) const
+{
+   view.segmentsReduction( offsets, first, last, fetch, reduction, keeper, zero, args... );
+}
      } // namespace Segments
   }  // namespace Algorithms
 } // namespace TNL
\ No newline at end of file