diff --git a/src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py b/src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py index 2af4b9ffc65b06476054858228b4b7d19b68c48f..8899dc9ebbc1b14a74a304e578b63c38a3bea3d2 100755 --- a/src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py +++ b/src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py @@ -8,24 +8,25 @@ import pandas from collections import defaultdict from TNL.LogParser import LogParser -""" +""" Sparse matrix formats as they appear in the log file. """ -cpu_matrix_formats = [ 'CSR', +cpu_matrix_formats = [ 'CSR', 'Ellpack', 'Ellpack Legacy', 'SlicedEllpack', 'SlicedEllpack Legacy', 'ChunkedEllpack', 'ChunkedEllpack Legacy', 'BiEllpack', 'BiEllpack Legacy' ] gpu_matrix_formats = [ 'CSR Legacy Scalar', 'CSR Legacy Vector', 'CSR Legacy MultiVector', - 'CSR Legacy Light', 'CSR Legacy Light2', 'CSR Legacy Light3', 'CSR Legacy Light4', 'CSR Legacy Light5', 'CSR Legacy Light6', 'CSR Legacy LightWithoutAtomic', + 'CSR Legacy Light', 'CSR Legacy Light2', 'CSR Legacy Light3', 'CSR Legacy Light4', 'CSR Legacy Light5', 'CSR Legacy Light6', 'CSR Legacy LightWithoutAtomic', 'CSR Legacy Adaptive', + 'CSR< Scalar >', 'CSR< Vector >', 'CSR< Hybrid >', 'CSR< Adaptive >', 'Ellpack', 'Ellpack Legacy', 'SlicedEllpack', 'SlicedEllpack Legacy', 'ChunkedEllpack', 'ChunkedEllpack Legacy', 'BiEllpack', 'BiEllpack Legacy' ] """ -CPU formats to be compared +CPU formats to be compared """ cpu_comparison_formats = { 'CSR' : 'CSR Legacy Scalar', 'Ellpack' : 'Ellpack Legacy', @@ -34,9 +35,12 @@ cpu_comparison_formats = { 'CSR' : 'CSR Legacy Scalar', } """ -GPU formats to be compared +GPU formats to be compared """ -gpu_comparison_formats = { #'CSR' : 'CSR Legacy Scalar', +gpu_comparison_formats = { 'CSR< Scalar >' : 'CSR Legacy Scalar', + 'CSR< Vector >' : 'CSR Legacy Vector', + 'CSR< Hybrid >' : 'CSR Legacy LightWithoutAtomic', + 'CSR< Adaptive >' : 'CSR Legacy Adaptive', 'Ellpack' : 'Ellpack Legacy', 'SlicedEllpack' : 'SlicedEllpack Legacy', 'BiEllpack' : 'BiEllpack Legacy' diff --git a/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h b/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h index feed58a58792c0ba01c4c78040758fd21b9f5eed..0b71c2b917871b72dd0299fd4c0298a129e4ea1d 100644 --- a/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h +++ b/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h @@ -220,6 +220,11 @@ struct CSRKernelAdaptiveView ConstViewType getConstView() const { return *this; }; + static TNL::String getKernelType() + { + return "Adaptive"; + }; + template< typename OffsetsView, typename Fetch, typename Reduction, @@ -344,6 +349,10 @@ struct CSRKernelAdaptive using BlocksType = typename ViewType::BlocksType; using BlocksView = typename BlocksType::ViewType; + static TNL::String getKernelType() + { + return ViewType::getKernelType(); + }; static constexpr Index THREADS_ADAPTIVE = sizeof(Index) == 8 ? 128 : 256; @@ -373,10 +382,11 @@ struct CSRKernelAdaptive Index &sum ) { sum = 0; + TNL::Containers::Vector< typename Offsets::IndexType, TNL::Devices::Host, typename Offsets::IndexType > + hostOffsets( offsets ); for (Index current = start; current < size - 1; current++ ) { - Index elements = offsets.getElement(current + 1) - - offsets.getElement(current); + Index elements = hostOffsets[ current + 1 ] - hostOffsets[ current ]; sum += elements; if( sum > SHARED_PER_WARP ) { @@ -407,8 +417,8 @@ struct CSRKernelAdaptive Index sum, start( 0 ), nextStart( 0 ); // Fill blocks - std::vector< details::CSRAdaptiveKernelBlockDescriptor< Index > > inBlock; - inBlock.reserve( rows ); + std::vector< details::CSRAdaptiveKernelBlockDescriptor< Index > > inBlocks; + inBlocks.reserve( rows ); while( nextStart != rows - 1 ) { @@ -417,35 +427,30 @@ struct CSRKernelAdaptive if( type == details::Type::LONG ) { - const Index blocksCount = inBlock.size(); + const Index blocksCount = inBlocks.size(); const Index warpsPerCudaBlock = THREADS_ADAPTIVE / TNL::Cuda::getWarpSize(); Index warpsLeft = roundUpDivision( blocksCount, warpsPerCudaBlock ) * warpsPerCudaBlock - blocksCount; if( warpsLeft == 0 ) warpsLeft = warpsPerCudaBlock; //Index parts = roundUpDivision(sum, this->SHARED_PER_WARP); - inBlock.emplace_back( start, details::Type::LONG, 0, warpsLeft ); + inBlocks.emplace_back( start, details::Type::LONG, 0, warpsLeft ); for( Index index = 1; index < warpsLeft; index++ ) { - inBlock.emplace_back( start, details::Type::LONG, index, warpsLeft ); + inBlocks.emplace_back( start, details::Type::LONG, index, warpsLeft ); } } else { - inBlock.emplace_back(start, type, + inBlocks.emplace_back(start, type, nextStart, offsets.getElement(nextStart), offsets.getElement(start) ); } start = nextStart; } - inBlock.emplace_back(nextStart); - - // Copy values - this->blocks.setSize(inBlock.size()); - for (size_t i = 0; i < inBlock.size(); ++i) - this->blocks.setElement(i, inBlock[i]); - - this->view.setBlocks( blocks ); + inBlocks.emplace_back(nextStart); + this->blocks = inBlocks; + this->view.setBlocks( blocks ); }; void reset() diff --git a/src/TNL/Algorithms/Segments/CSRKernelHybrid.h b/src/TNL/Algorithms/Segments/CSRKernelHybrid.h index 92a4a54ee692f620a8b9983a5b637c25cfd1a09d..c24c9fa10858d5e139009ca194643ed450bbc192 100644 --- a/src/TNL/Algorithms/Segments/CSRKernelHybrid.h +++ b/src/TNL/Algorithms/Segments/CSRKernelHybrid.h @@ -38,6 +38,7 @@ struct CSRKernelHybrid ConstViewType getConstView() const; + static TNL::String getKernelType(); template< typename OffsetsView, typename Fetch, diff --git a/src/TNL/Algorithms/Segments/CSRKernelHybrid.hpp b/src/TNL/Algorithms/Segments/CSRKernelHybrid.hpp index 06d2d2868d04f0b8562a991b34fa91539cb43d03..c55916994613bc9d5e88e720d68ac4db3b898298 100644 --- a/src/TNL/Algorithms/Segments/CSRKernelHybrid.hpp +++ b/src/TNL/Algorithms/Segments/CSRKernelHybrid.hpp @@ -114,6 +114,15 @@ getView() -> ViewType return *this; } +template< typename Index, + typename Device > +TNL::String +CSRKernelHybrid< Index, Device >:: +getKernelType() +{ + return "Hybrid"; +} + template< typename Index, typename Device > auto diff --git a/src/TNL/Algorithms/Segments/CSRKernelScalar.h b/src/TNL/Algorithms/Segments/CSRKernelScalar.h index 4a716c890d07b9fd797235540ee89dd85de86196..1de467a39987733ec0b798c62f41177fab7930ec 100644 --- a/src/TNL/Algorithms/Segments/CSRKernelScalar.h +++ b/src/TNL/Algorithms/Segments/CSRKernelScalar.h @@ -38,6 +38,8 @@ struct CSRKernelScalar ConstViewType getConstView() const; + static TNL::String getKernelType(); + template< typename OffsetsView, typename Fetch, typename Reduction, diff --git a/src/TNL/Algorithms/Segments/CSRKernelScalar.hpp b/src/TNL/Algorithms/Segments/CSRKernelScalar.hpp index 7dd0f5cd7b87cc213585ad955e91e4b363ee3a9c..b5a396e1592e76ed75eee07b5bba3e342f0bc2ca 100644 --- a/src/TNL/Algorithms/Segments/CSRKernelScalar.hpp +++ b/src/TNL/Algorithms/Segments/CSRKernelScalar.hpp @@ -56,6 +56,15 @@ getConstView() const -> ConstViewType return *this; }; +template< typename Index, + typename Device > +TNL::String +CSRKernelScalar< Index, Device >:: +getKernelType() +{ + return "Scalar"; +} + template< typename Index, typename Device > template< typename OffsetsView, diff --git a/src/TNL/Algorithms/Segments/CSRKernelVector.h b/src/TNL/Algorithms/Segments/CSRKernelVector.h index 7a6ccf7ff71cea21829121450dd2e09fbbc68b0f..a5eb7721088afa39ce70c8468a381c2689ba5c7d 100644 --- a/src/TNL/Algorithms/Segments/CSRKernelVector.h +++ b/src/TNL/Algorithms/Segments/CSRKernelVector.h @@ -24,35 +24,36 @@ template< typename Index, typename Device > struct CSRKernelVector { - using IndexType = Index; - using DeviceType = Device; - using ViewType = CSRKernelVector< Index, Device >; - using ConstViewType = CSRKernelVector< Index, Device >; - - template< typename Offsets > - void init( const Offsets& offsets ); - - void reset(); - - ViewType getView(); - - ConstViewType getConstView() const; - - - template< typename OffsetsView, - typename Fetch, - typename Reduction, - typename ResultKeeper, - typename Real, - typename... Args > - static void segmentsReduction( const OffsetsView& offsets, - Index first, - Index last, - Fetch& fetch, - const Reduction& reduction, - ResultKeeper& keeper, - const Real& zero, - Args... args ); + using IndexType = Index; + using DeviceType = Device; + using ViewType = CSRKernelVector< Index, Device >; + using ConstViewType = CSRKernelVector< Index, Device >; + + template< typename Offsets > + void init( const Offsets& offsets ); + + void reset(); + + ViewType getView(); + + ConstViewType getConstView() const; + + static TNL::String getKernelType(); + + template< typename OffsetsView, + typename Fetch, + typename Reduction, + typename ResultKeeper, + typename Real, + typename... Args > + static void segmentsReduction( const OffsetsView& offsets, + Index first, + Index last, + Fetch& fetch, + const Reduction& reduction, + ResultKeeper& keeper, + const Real& zero, + Args... args ); }; } // namespace Segments diff --git a/src/TNL/Algorithms/Segments/CSRKernelVector.hpp b/src/TNL/Algorithms/Segments/CSRKernelVector.hpp index d6f5bb7ec321b25ef89a4654197d86c985123706..faa0308648cd062a60257b346f14c83ce768a2fb 100644 --- a/src/TNL/Algorithms/Segments/CSRKernelVector.hpp +++ b/src/TNL/Algorithms/Segments/CSRKernelVector.hpp @@ -111,6 +111,14 @@ getConstView() const -> ConstViewType return *this; }; +template< typename Index, + typename Device > +TNL::String +CSRKernelVector< Index, Device >:: +getKernelType() +{ + return "Vector"; +} template< typename Index, typename Device > diff --git a/src/TNL/Algorithms/Segments/CSRView.hpp b/src/TNL/Algorithms/Segments/CSRView.hpp index 045b6bc5a2fe2c13a59cfcdcf7bc6d7234f76f17..8b1dce064e9e4dd086b61c5c3d9e46c4cc66086b 100644 --- a/src/TNL/Algorithms/Segments/CSRView.hpp +++ b/src/TNL/Algorithms/Segments/CSRView.hpp @@ -91,7 +91,7 @@ String CSRView< Device, Index, Kernel >:: getSegmentsType() { - return "CSR"; + return "CSR< " + KernelType::getKernelType() + " >"; } template< typename Device,