From edb2644aca85e22250f98c2504a42ed10effb8ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Thu, 28 Jan 2021 21:22:18 +0100
Subject: [PATCH 1/3] Optimizing initiation of Adaptive CSR kernel.

---
 .../Algorithms/Segments/CSRKernelAdaptive.h   | 28 ++++++++-----------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h b/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h
index feed58a587..7c36b0df84 100644
--- a/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h
+++ b/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h
@@ -373,10 +373,11 @@ struct CSRKernelAdaptive
                     Index &sum )
    {
       sum = 0;
+      TNL::Containers::Vector< typename Offsets::IndexType, TNL::Devices::Host, typename Offsets::IndexType >
+         hostOffsets( offsets );
       for (Index current = start; current < size - 1; current++ )
       {
-         Index elements = offsets.getElement(current + 1) -
-                           offsets.getElement(current);
+         Index elements = hostOffsets[ current + 1 ] - hostOffsets[ current ];
          sum += elements;
          if( sum > SHARED_PER_WARP )
          {
@@ -407,8 +408,8 @@ struct CSRKernelAdaptive
         Index sum, start( 0 ), nextStart( 0 );
 
         // Fill blocks
-        std::vector< details::CSRAdaptiveKernelBlockDescriptor< Index > > inBlock;
-        inBlock.reserve( rows );
+        std::vector< details::CSRAdaptiveKernelBlockDescriptor< Index > > inBlocks;
+        inBlocks.reserve( rows );
 
         while( nextStart != rows - 1 )
         {
@@ -417,35 +418,30 @@ struct CSRKernelAdaptive
 
             if( type == details::Type::LONG )
             {
-               const Index blocksCount = inBlock.size();
+               const Index blocksCount = inBlocks.size();
                const Index warpsPerCudaBlock = THREADS_ADAPTIVE / TNL::Cuda::getWarpSize();
                Index warpsLeft = roundUpDivision( blocksCount, warpsPerCudaBlock ) * warpsPerCudaBlock - blocksCount;
                if( warpsLeft == 0 )
                   warpsLeft = warpsPerCudaBlock;
                //Index parts = roundUpDivision(sum, this->SHARED_PER_WARP);
-               inBlock.emplace_back( start, details::Type::LONG, 0, warpsLeft );
+               inBlocks.emplace_back( start, details::Type::LONG, 0, warpsLeft );
                for( Index index = 1; index < warpsLeft; index++ )
                {
-                  inBlock.emplace_back( start, details::Type::LONG, index, warpsLeft );
+                  inBlocks.emplace_back( start, details::Type::LONG, index, warpsLeft );
                }
             }
             else
             {
-               inBlock.emplace_back(start, type,
+               inBlocks.emplace_back(start, type,
                     nextStart,
                     offsets.getElement(nextStart),
                     offsets.getElement(start) );
             }
             start = nextStart;
         }
-        inBlock.emplace_back(nextStart);
-
-        // Copy values
-        this->blocks.setSize(inBlock.size());
-        for (size_t i = 0; i < inBlock.size(); ++i)
-            this->blocks.setElement(i, inBlock[i]);
-
-         this->view.setBlocks( blocks );
+        inBlocks.emplace_back(nextStart);
+        this->blocks = inBlocks;
+        this->view.setBlocks( blocks );
     };
 
    void reset()
-- 
GitLab


From 01f4ab27ee1ee680ca4ee6d8b8fb8823287a424d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Thu, 28 Jan 2021 21:23:06 +0100
Subject: [PATCH 2/3] Added CSR kernel type getter.

---
 .../Algorithms/Segments/CSRKernelAdaptive.h   |  9 +++
 src/TNL/Algorithms/Segments/CSRKernelHybrid.h |  1 +
 .../Algorithms/Segments/CSRKernelHybrid.hpp   |  9 +++
 src/TNL/Algorithms/Segments/CSRKernelScalar.h |  2 +
 .../Algorithms/Segments/CSRKernelScalar.hpp   |  9 +++
 src/TNL/Algorithms/Segments/CSRKernelVector.h | 59 ++++++++++---------
 .../Algorithms/Segments/CSRKernelVector.hpp   |  8 +++
 src/TNL/Algorithms/Segments/CSRView.hpp       |  2 +-
 8 files changed, 69 insertions(+), 30 deletions(-)

diff --git a/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h b/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h
index 7c36b0df84..0b71c2b917 100644
--- a/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h
+++ b/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h
@@ -220,6 +220,11 @@ struct CSRKernelAdaptiveView
 
    ConstViewType getConstView() const { return *this; };
 
+   static TNL::String getKernelType()
+   {
+      return "Adaptive";
+   };
+
    template< typename OffsetsView,
              typename Fetch,
              typename Reduction,
@@ -344,6 +349,10 @@ struct CSRKernelAdaptive
     using BlocksType = typename ViewType::BlocksType;
     using BlocksView = typename BlocksType::ViewType;
 
+   static TNL::String getKernelType()
+   {
+      return ViewType::getKernelType();
+   };
 
     static constexpr Index THREADS_ADAPTIVE = sizeof(Index) == 8 ? 128 : 256;
 
diff --git a/src/TNL/Algorithms/Segments/CSRKernelHybrid.h b/src/TNL/Algorithms/Segments/CSRKernelHybrid.h
index 92a4a54ee6..c24c9fa108 100644
--- a/src/TNL/Algorithms/Segments/CSRKernelHybrid.h
+++ b/src/TNL/Algorithms/Segments/CSRKernelHybrid.h
@@ -38,6 +38,7 @@ struct CSRKernelHybrid
 
    ConstViewType getConstView() const;
 
+   static TNL::String getKernelType();
 
    template< typename OffsetsView,
              typename Fetch,
diff --git a/src/TNL/Algorithms/Segments/CSRKernelHybrid.hpp b/src/TNL/Algorithms/Segments/CSRKernelHybrid.hpp
index 06d2d2868d..c559169946 100644
--- a/src/TNL/Algorithms/Segments/CSRKernelHybrid.hpp
+++ b/src/TNL/Algorithms/Segments/CSRKernelHybrid.hpp
@@ -114,6 +114,15 @@ getView() -> ViewType
     return *this;
 }
 
+template< typename Index,
+          typename Device >
+TNL::String
+CSRKernelHybrid< Index, Device >::
+getKernelType()
+{
+    return "Hybrid";
+}
+
 template< typename Index,
           typename Device >
 auto
diff --git a/src/TNL/Algorithms/Segments/CSRKernelScalar.h b/src/TNL/Algorithms/Segments/CSRKernelScalar.h
index 4a716c890d..1de467a399 100644
--- a/src/TNL/Algorithms/Segments/CSRKernelScalar.h
+++ b/src/TNL/Algorithms/Segments/CSRKernelScalar.h
@@ -38,6 +38,8 @@ struct CSRKernelScalar
 
     ConstViewType getConstView() const;
 
+    static TNL::String getKernelType();
+
     template< typename OffsetsView,
               typename Fetch,
               typename Reduction,
diff --git a/src/TNL/Algorithms/Segments/CSRKernelScalar.hpp b/src/TNL/Algorithms/Segments/CSRKernelScalar.hpp
index 7dd0f5cd7b..b5a396e159 100644
--- a/src/TNL/Algorithms/Segments/CSRKernelScalar.hpp
+++ b/src/TNL/Algorithms/Segments/CSRKernelScalar.hpp
@@ -56,6 +56,15 @@ getConstView() const -> ConstViewType
     return *this;
 };
 
+template< typename Index,
+          typename Device >
+TNL::String
+CSRKernelScalar< Index, Device >::
+getKernelType()
+{
+    return "Scalar";
+}
+
 template< typename Index,
           typename Device >
     template< typename OffsetsView,
diff --git a/src/TNL/Algorithms/Segments/CSRKernelVector.h b/src/TNL/Algorithms/Segments/CSRKernelVector.h
index 7a6ccf7ff7..a5eb772108 100644
--- a/src/TNL/Algorithms/Segments/CSRKernelVector.h
+++ b/src/TNL/Algorithms/Segments/CSRKernelVector.h
@@ -24,35 +24,36 @@ template< typename Index,
           typename Device >
 struct CSRKernelVector
 {
-    using IndexType = Index;
-    using DeviceType = Device;
-    using ViewType = CSRKernelVector< Index, Device >;
-    using ConstViewType = CSRKernelVector< Index, Device >;
-
-    template< typename Offsets >
-    void init( const Offsets& offsets );
-
-    void reset();
-
-    ViewType getView();
-
-    ConstViewType getConstView() const;
-
-
-    template< typename OffsetsView,
-              typename Fetch,
-              typename Reduction,
-              typename ResultKeeper,
-              typename Real,
-              typename... Args >
-    static void segmentsReduction( const OffsetsView& offsets,
-                                   Index first,
-                                   Index last,
-                                   Fetch& fetch,
-                                   const Reduction& reduction,
-                                   ResultKeeper& keeper,
-                                   const Real& zero,
-                                   Args... args );
+   using IndexType = Index;
+   using DeviceType = Device;
+   using ViewType = CSRKernelVector< Index, Device >;
+   using ConstViewType = CSRKernelVector< Index, Device >;
+
+   template< typename Offsets >
+   void init( const Offsets& offsets );
+
+   void reset();
+
+   ViewType getView();
+
+   ConstViewType getConstView() const;
+
+   static TNL::String getKernelType();
+
+   template< typename OffsetsView,
+             typename Fetch,
+             typename Reduction,
+             typename ResultKeeper,
+             typename Real,
+             typename... Args >
+   static void segmentsReduction( const OffsetsView& offsets,
+                                  Index first,
+                                  Index last,
+                                  Fetch& fetch,
+                                  const Reduction& reduction,
+                                  ResultKeeper& keeper,
+                                  const Real& zero,
+                                  Args... args );
 };
 
       } // namespace Segments
diff --git a/src/TNL/Algorithms/Segments/CSRKernelVector.hpp b/src/TNL/Algorithms/Segments/CSRKernelVector.hpp
index d6f5bb7ec3..faa0308648 100644
--- a/src/TNL/Algorithms/Segments/CSRKernelVector.hpp
+++ b/src/TNL/Algorithms/Segments/CSRKernelVector.hpp
@@ -111,6 +111,14 @@ getConstView() const -> ConstViewType
     return *this;
 };
 
+template< typename Index,
+          typename Device >
+TNL::String
+CSRKernelVector< Index, Device >::
+getKernelType()
+{
+    return "Vector";
+}
 
 template< typename Index,
           typename Device >
diff --git a/src/TNL/Algorithms/Segments/CSRView.hpp b/src/TNL/Algorithms/Segments/CSRView.hpp
index 045b6bc5a2..8b1dce064e 100644
--- a/src/TNL/Algorithms/Segments/CSRView.hpp
+++ b/src/TNL/Algorithms/Segments/CSRView.hpp
@@ -91,7 +91,7 @@ String
 CSRView< Device, Index, Kernel >::
 getSegmentsType()
 {
-   return "CSR";
+   return "CSR< " + KernelType::getKernelType() + " >";
 }
 
 template< typename Device,
-- 
GitLab


From 92a039dcb9796d8e863b9927266be6778c6635ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Thu, 28 Jan 2021 21:24:14 +0100
Subject: [PATCH 3/3] Added new CSR kernels to SpMV postprocessing Python
 script.

---
 .../scripts/tnl-spmv-benchmark-make-tables.py    | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py b/src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py
index 2af4b9ffc6..8899dc9ebb 100755
--- a/src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py
+++ b/src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py
@@ -8,24 +8,25 @@ import pandas
 from collections import defaultdict
 from TNL.LogParser import LogParser
 
-""" 
+"""
 Sparse matrix formats as they appear in the log file.
 """
-cpu_matrix_formats = [ 'CSR', 
+cpu_matrix_formats = [ 'CSR',
                        'Ellpack', 'Ellpack Legacy',
                        'SlicedEllpack', 'SlicedEllpack Legacy',
                        'ChunkedEllpack', 'ChunkedEllpack Legacy',
                        'BiEllpack', 'BiEllpack Legacy' ]
 
 gpu_matrix_formats = [ 'CSR Legacy Scalar', 'CSR Legacy Vector', 'CSR Legacy MultiVector',
-                       'CSR Legacy Light', 'CSR Legacy Light2', 'CSR Legacy Light3', 'CSR Legacy Light4', 'CSR Legacy Light5', 'CSR Legacy Light6', 'CSR Legacy LightWithoutAtomic', 
+                       'CSR Legacy Light', 'CSR Legacy Light2', 'CSR Legacy Light3', 'CSR Legacy Light4', 'CSR Legacy Light5', 'CSR Legacy Light6', 'CSR Legacy LightWithoutAtomic',
                        'CSR Legacy Adaptive',
+                       'CSR< Scalar >', 'CSR< Vector >', 'CSR< Hybrid >', 'CSR< Adaptive >',
                        'Ellpack', 'Ellpack Legacy',
                        'SlicedEllpack', 'SlicedEllpack Legacy',
                        'ChunkedEllpack', 'ChunkedEllpack Legacy',
                        'BiEllpack', 'BiEllpack Legacy' ]
 """
-CPU formats to be compared 
+CPU formats to be compared
 """
 cpu_comparison_formats = { 'CSR' : 'CSR Legacy Scalar',
                            'Ellpack' : 'Ellpack Legacy',
@@ -34,9 +35,12 @@ cpu_comparison_formats = { 'CSR' : 'CSR Legacy Scalar',
                           }
 
 """
-GPU formats to be compared 
+GPU formats to be compared
 """
-gpu_comparison_formats = { #'CSR' : 'CSR Legacy Scalar',
+gpu_comparison_formats = { 'CSR< Scalar >' : 'CSR Legacy Scalar',
+                           'CSR< Vector >' : 'CSR Legacy Vector',
+                           'CSR< Hybrid >' : 'CSR Legacy LightWithoutAtomic',
+                           'CSR< Adaptive >' : 'CSR Legacy Adaptive',
                            'Ellpack' : 'Ellpack Legacy',
                            'SlicedEllpack' : 'SlicedEllpack Legacy',
                            'BiEllpack' : 'BiEllpack Legacy'
-- 
GitLab