From edb2644aca85e22250f98c2504a42ed10effb8ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com> Date: Thu, 28 Jan 2021 21:22:18 +0100 Subject: [PATCH 1/3] Optimizing initiation of Adaptive CSR kernel. --- .../Algorithms/Segments/CSRKernelAdaptive.h | 28 ++++++++----------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h b/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h index feed58a587..7c36b0df84 100644 --- a/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h +++ b/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h @@ -373,10 +373,11 @@ struct CSRKernelAdaptive Index &sum ) { sum = 0; + TNL::Containers::Vector< typename Offsets::IndexType, TNL::Devices::Host, typename Offsets::IndexType > + hostOffsets( offsets ); for (Index current = start; current < size - 1; current++ ) { - Index elements = offsets.getElement(current + 1) - - offsets.getElement(current); + Index elements = hostOffsets[ current + 1 ] - hostOffsets[ current ]; sum += elements; if( sum > SHARED_PER_WARP ) { @@ -407,8 +408,8 @@ struct CSRKernelAdaptive Index sum, start( 0 ), nextStart( 0 ); // Fill blocks - std::vector< details::CSRAdaptiveKernelBlockDescriptor< Index > > inBlock; - inBlock.reserve( rows ); + std::vector< details::CSRAdaptiveKernelBlockDescriptor< Index > > inBlocks; + inBlocks.reserve( rows ); while( nextStart != rows - 1 ) { @@ -417,35 +418,30 @@ struct CSRKernelAdaptive if( type == details::Type::LONG ) { - const Index blocksCount = inBlock.size(); + const Index blocksCount = inBlocks.size(); const Index warpsPerCudaBlock = THREADS_ADAPTIVE / TNL::Cuda::getWarpSize(); Index warpsLeft = roundUpDivision( blocksCount, warpsPerCudaBlock ) * warpsPerCudaBlock - blocksCount; if( warpsLeft == 0 ) warpsLeft = warpsPerCudaBlock; //Index parts = roundUpDivision(sum, this->SHARED_PER_WARP); - inBlock.emplace_back( start, details::Type::LONG, 0, warpsLeft ); + inBlocks.emplace_back( start, details::Type::LONG, 0, warpsLeft ); for( Index index = 1; index < warpsLeft; index++ ) { - inBlock.emplace_back( start, details::Type::LONG, index, warpsLeft ); + inBlocks.emplace_back( start, details::Type::LONG, index, warpsLeft ); } } else { - inBlock.emplace_back(start, type, + inBlocks.emplace_back(start, type, nextStart, offsets.getElement(nextStart), offsets.getElement(start) ); } start = nextStart; } - inBlock.emplace_back(nextStart); - - // Copy values - this->blocks.setSize(inBlock.size()); - for (size_t i = 0; i < inBlock.size(); ++i) - this->blocks.setElement(i, inBlock[i]); - - this->view.setBlocks( blocks ); + inBlocks.emplace_back(nextStart); + this->blocks = inBlocks; + this->view.setBlocks( blocks ); }; void reset() -- GitLab From 01f4ab27ee1ee680ca4ee6d8b8fb8823287a424d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com> Date: Thu, 28 Jan 2021 21:23:06 +0100 Subject: [PATCH 2/3] Added CSR kernel type getter. --- .../Algorithms/Segments/CSRKernelAdaptive.h | 9 +++ src/TNL/Algorithms/Segments/CSRKernelHybrid.h | 1 + .../Algorithms/Segments/CSRKernelHybrid.hpp | 9 +++ src/TNL/Algorithms/Segments/CSRKernelScalar.h | 2 + .../Algorithms/Segments/CSRKernelScalar.hpp | 9 +++ src/TNL/Algorithms/Segments/CSRKernelVector.h | 59 ++++++++++--------- .../Algorithms/Segments/CSRKernelVector.hpp | 8 +++ src/TNL/Algorithms/Segments/CSRView.hpp | 2 +- 8 files changed, 69 insertions(+), 30 deletions(-) diff --git a/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h b/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h index 7c36b0df84..0b71c2b917 100644 --- a/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h +++ b/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h @@ -220,6 +220,11 @@ struct CSRKernelAdaptiveView ConstViewType getConstView() const { return *this; }; + static TNL::String getKernelType() + { + return "Adaptive"; + }; + template< typename OffsetsView, typename Fetch, typename Reduction, @@ -344,6 +349,10 @@ struct CSRKernelAdaptive using BlocksType = typename ViewType::BlocksType; using BlocksView = typename BlocksType::ViewType; + static TNL::String getKernelType() + { + return ViewType::getKernelType(); + }; static constexpr Index THREADS_ADAPTIVE = sizeof(Index) == 8 ? 128 : 256; diff --git a/src/TNL/Algorithms/Segments/CSRKernelHybrid.h b/src/TNL/Algorithms/Segments/CSRKernelHybrid.h index 92a4a54ee6..c24c9fa108 100644 --- a/src/TNL/Algorithms/Segments/CSRKernelHybrid.h +++ b/src/TNL/Algorithms/Segments/CSRKernelHybrid.h @@ -38,6 +38,7 @@ struct CSRKernelHybrid ConstViewType getConstView() const; + static TNL::String getKernelType(); template< typename OffsetsView, typename Fetch, diff --git a/src/TNL/Algorithms/Segments/CSRKernelHybrid.hpp b/src/TNL/Algorithms/Segments/CSRKernelHybrid.hpp index 06d2d2868d..c559169946 100644 --- a/src/TNL/Algorithms/Segments/CSRKernelHybrid.hpp +++ b/src/TNL/Algorithms/Segments/CSRKernelHybrid.hpp @@ -114,6 +114,15 @@ getView() -> ViewType return *this; } +template< typename Index, + typename Device > +TNL::String +CSRKernelHybrid< Index, Device >:: +getKernelType() +{ + return "Hybrid"; +} + template< typename Index, typename Device > auto diff --git a/src/TNL/Algorithms/Segments/CSRKernelScalar.h b/src/TNL/Algorithms/Segments/CSRKernelScalar.h index 4a716c890d..1de467a399 100644 --- a/src/TNL/Algorithms/Segments/CSRKernelScalar.h +++ b/src/TNL/Algorithms/Segments/CSRKernelScalar.h @@ -38,6 +38,8 @@ struct CSRKernelScalar ConstViewType getConstView() const; + static TNL::String getKernelType(); + template< typename OffsetsView, typename Fetch, typename Reduction, diff --git a/src/TNL/Algorithms/Segments/CSRKernelScalar.hpp b/src/TNL/Algorithms/Segments/CSRKernelScalar.hpp index 7dd0f5cd7b..b5a396e159 100644 --- a/src/TNL/Algorithms/Segments/CSRKernelScalar.hpp +++ b/src/TNL/Algorithms/Segments/CSRKernelScalar.hpp @@ -56,6 +56,15 @@ getConstView() const -> ConstViewType return *this; }; +template< typename Index, + typename Device > +TNL::String +CSRKernelScalar< Index, Device >:: +getKernelType() +{ + return "Scalar"; +} + template< typename Index, typename Device > template< typename OffsetsView, diff --git a/src/TNL/Algorithms/Segments/CSRKernelVector.h b/src/TNL/Algorithms/Segments/CSRKernelVector.h index 7a6ccf7ff7..a5eb772108 100644 --- a/src/TNL/Algorithms/Segments/CSRKernelVector.h +++ b/src/TNL/Algorithms/Segments/CSRKernelVector.h @@ -24,35 +24,36 @@ template< typename Index, typename Device > struct CSRKernelVector { - using IndexType = Index; - using DeviceType = Device; - using ViewType = CSRKernelVector< Index, Device >; - using ConstViewType = CSRKernelVector< Index, Device >; - - template< typename Offsets > - void init( const Offsets& offsets ); - - void reset(); - - ViewType getView(); - - ConstViewType getConstView() const; - - - template< typename OffsetsView, - typename Fetch, - typename Reduction, - typename ResultKeeper, - typename Real, - typename... Args > - static void segmentsReduction( const OffsetsView& offsets, - Index first, - Index last, - Fetch& fetch, - const Reduction& reduction, - ResultKeeper& keeper, - const Real& zero, - Args... args ); + using IndexType = Index; + using DeviceType = Device; + using ViewType = CSRKernelVector< Index, Device >; + using ConstViewType = CSRKernelVector< Index, Device >; + + template< typename Offsets > + void init( const Offsets& offsets ); + + void reset(); + + ViewType getView(); + + ConstViewType getConstView() const; + + static TNL::String getKernelType(); + + template< typename OffsetsView, + typename Fetch, + typename Reduction, + typename ResultKeeper, + typename Real, + typename... Args > + static void segmentsReduction( const OffsetsView& offsets, + Index first, + Index last, + Fetch& fetch, + const Reduction& reduction, + ResultKeeper& keeper, + const Real& zero, + Args... args ); }; } // namespace Segments diff --git a/src/TNL/Algorithms/Segments/CSRKernelVector.hpp b/src/TNL/Algorithms/Segments/CSRKernelVector.hpp index d6f5bb7ec3..faa0308648 100644 --- a/src/TNL/Algorithms/Segments/CSRKernelVector.hpp +++ b/src/TNL/Algorithms/Segments/CSRKernelVector.hpp @@ -111,6 +111,14 @@ getConstView() const -> ConstViewType return *this; }; +template< typename Index, + typename Device > +TNL::String +CSRKernelVector< Index, Device >:: +getKernelType() +{ + return "Vector"; +} template< typename Index, typename Device > diff --git a/src/TNL/Algorithms/Segments/CSRView.hpp b/src/TNL/Algorithms/Segments/CSRView.hpp index 045b6bc5a2..8b1dce064e 100644 --- a/src/TNL/Algorithms/Segments/CSRView.hpp +++ b/src/TNL/Algorithms/Segments/CSRView.hpp @@ -91,7 +91,7 @@ String CSRView< Device, Index, Kernel >:: getSegmentsType() { - return "CSR"; + return "CSR< " + KernelType::getKernelType() + " >"; } template< typename Device, -- GitLab From 92a039dcb9796d8e863b9927266be6778c6635ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com> Date: Thu, 28 Jan 2021 21:24:14 +0100 Subject: [PATCH 3/3] Added new CSR kernels to SpMV postprocessing Python script. --- .../scripts/tnl-spmv-benchmark-make-tables.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py b/src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py index 2af4b9ffc6..8899dc9ebb 100755 --- a/src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py +++ b/src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py @@ -8,24 +8,25 @@ import pandas from collections import defaultdict from TNL.LogParser import LogParser -""" +""" Sparse matrix formats as they appear in the log file. """ -cpu_matrix_formats = [ 'CSR', +cpu_matrix_formats = [ 'CSR', 'Ellpack', 'Ellpack Legacy', 'SlicedEllpack', 'SlicedEllpack Legacy', 'ChunkedEllpack', 'ChunkedEllpack Legacy', 'BiEllpack', 'BiEllpack Legacy' ] gpu_matrix_formats = [ 'CSR Legacy Scalar', 'CSR Legacy Vector', 'CSR Legacy MultiVector', - 'CSR Legacy Light', 'CSR Legacy Light2', 'CSR Legacy Light3', 'CSR Legacy Light4', 'CSR Legacy Light5', 'CSR Legacy Light6', 'CSR Legacy LightWithoutAtomic', + 'CSR Legacy Light', 'CSR Legacy Light2', 'CSR Legacy Light3', 'CSR Legacy Light4', 'CSR Legacy Light5', 'CSR Legacy Light6', 'CSR Legacy LightWithoutAtomic', 'CSR Legacy Adaptive', + 'CSR< Scalar >', 'CSR< Vector >', 'CSR< Hybrid >', 'CSR< Adaptive >', 'Ellpack', 'Ellpack Legacy', 'SlicedEllpack', 'SlicedEllpack Legacy', 'ChunkedEllpack', 'ChunkedEllpack Legacy', 'BiEllpack', 'BiEllpack Legacy' ] """ -CPU formats to be compared +CPU formats to be compared """ cpu_comparison_formats = { 'CSR' : 'CSR Legacy Scalar', 'Ellpack' : 'Ellpack Legacy', @@ -34,9 +35,12 @@ cpu_comparison_formats = { 'CSR' : 'CSR Legacy Scalar', } """ -GPU formats to be compared +GPU formats to be compared """ -gpu_comparison_formats = { #'CSR' : 'CSR Legacy Scalar', +gpu_comparison_formats = { 'CSR< Scalar >' : 'CSR Legacy Scalar', + 'CSR< Vector >' : 'CSR Legacy Vector', + 'CSR< Hybrid >' : 'CSR Legacy LightWithoutAtomic', + 'CSR< Adaptive >' : 'CSR Legacy Adaptive', 'Ellpack' : 'Ellpack Legacy', 'SlicedEllpack' : 'SlicedEllpack Legacy', 'BiEllpack' : 'BiEllpack Legacy' -- GitLab