From 91806e1de466b3c06aea63811c9dcb6672341079 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com> Date: Tue, 9 Feb 2021 20:16:45 +0100 Subject: [PATCH] Moving implementation of CSRAdaptiveKernel to .hpp file. --- .../Algorithms/Segments/CSRAdaptiveKernel.h | 94 ++---------- .../Algorithms/Segments/CSRAdaptiveKernel.hpp | 143 +++++++++++++++++- 2 files changed, 152 insertions(+), 85 deletions(-) diff --git a/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.h b/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.h index 899e22ff9a..6b64f1a851 100644 --- a/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.h +++ b/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.h @@ -63,10 +63,7 @@ struct CSRAdaptiveKernel using BlocksType = typename ViewType::BlocksType; using BlocksView = typename BlocksType::ViewType; - static TNL::String getKernelType() - { - return ViewType::getKernelType(); - }; + static TNL::String getKernelType(); static constexpr Index THREADS_ADAPTIVE = sizeof(Index) == 8 ? 128 : 256; @@ -93,84 +90,16 @@ struct CSRAdaptiveKernel const Offsets& offsets, const Index size, details::Type &type, - Index &sum ) - { - sum = 0; - for (Index current = start; current < size - 1; current++ ) - { - Index elements = offsets[ current + 1 ] - offsets[ current ]; - sum += elements; - if( sum > SHARED_PER_WARP ) - { - if( current - start > 0 ) // extra row - { - type = details::Type::STREAM; - return current; - } - else - { // one long row - if( sum <= 2 * MAX_ELEMENTS_PER_WARP_ADAPT ) - type = details::Type::VECTOR; - else - type = details::Type::LONG; - return current + 1; - } - } - } - type = details::Type::STREAM; - return size - 1; // return last row pointer - } + Index &sum ); template< typename Offsets > - void init( const Offsets& offsets ) - { - using HostOffsetsType = TNL::Containers::Vector< typename Offsets::IndexType, TNL::Devices::Host, typename Offsets::IndexType >; - HostOffsetsType hostOffsets( offsets ); - const Index rows = offsets.getSize(); - Index sum, start( 0 ), nextStart( 0 ); - - // Fill blocks - std::vector< details::CSRAdaptiveKernelBlockDescriptor< Index > > inBlocks; - inBlocks.reserve( rows ); - - while( nextStart != rows - 1 ) - { - details::Type type; - nextStart = findLimit( start, hostOffsets, rows, type, sum ); - - if( type == details::Type::LONG ) - { - const Index blocksCount = inBlocks.size(); - const Index warpsPerCudaBlock = THREADS_ADAPTIVE / TNL::Cuda::getWarpSize(); - Index warpsLeft = roundUpDivision( blocksCount, warpsPerCudaBlock ) * warpsPerCudaBlock - blocksCount; - if( warpsLeft == 0 ) - warpsLeft = warpsPerCudaBlock; - for( Index index = 0; index < warpsLeft; index++ ) - inBlocks.emplace_back( start, details::Type::LONG, index, warpsLeft ); - } - else - { - inBlocks.emplace_back(start, type, - nextStart, - offsets.getElement(nextStart), - offsets.getElement(start) ); - } - start = nextStart; - } - inBlocks.emplace_back(nextStart); - this->blocks = inBlocks; - this->view.setBlocks( blocks ); - } - - void reset() - { - this->blocks.reset(); - this->view.setBlocks( blocks ); - } - - ViewType getView() { return this->view; }; - - ConstViewType getConstView() const { return this->view; }; + void init( const Offsets& offsets ); + + void reset(); + + ViewType getView(); + + ConstViewType getConstView() const; template< typename OffsetsView, typename Fetch, @@ -185,10 +114,7 @@ struct CSRAdaptiveKernel const Reduction& reduction, ResultKeeper& keeper, const Real& zero, - Args... args ) const - { - view.segmentsReduction( offsets, first, last, fetch, reduction, keeper, zero, args... ); - } + Args... args ) const; protected: BlocksType blocks; diff --git a/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.hpp b/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.hpp index b795a52f57..4c53a83ca3 100644 --- a/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.hpp +++ b/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.hpp @@ -22,7 +22,148 @@ namespace TNL { namespace Algorithms { namespace Segments { +template< typename Index, + typename Device > +TNL::String +CSRAdaptiveKernel< Index, Device >:: +getKernelType() +{ + return ViewType::getKernelType(); +}; + + +template< typename Index, + typename Device > + template< typename Offsets > +Index +CSRAdaptiveKernel< Index, Device >:: +findLimit( const Index start, + const Offsets& offsets, + const Index size, + details::Type &type, + Index &sum ) +{ + sum = 0; + for (Index current = start; current < size - 1; current++ ) + { + Index elements = offsets[ current + 1 ] - offsets[ current ]; + sum += elements; + if( sum > SHARED_PER_WARP ) + { + if( current - start > 0 ) // extra row + { + type = details::Type::STREAM; + return current; + } + else + { // one long row + if( sum <= 2 * MAX_ELEMENTS_PER_WARP_ADAPT ) + type = details::Type::VECTOR; + else + type = details::Type::LONG; + return current + 1; + } + } + } + type = details::Type::STREAM; + return size - 1; // return last row pointer + } + +template< typename Index, + typename Device > + template< typename Offsets > +void +CSRAdaptiveKernel< Index, Device >:: +init( const Offsets& offsets ) +{ + using HostOffsetsType = TNL::Containers::Vector< typename Offsets::IndexType, TNL::Devices::Host, typename Offsets::IndexType >; + HostOffsetsType hostOffsets( offsets ); + const Index rows = offsets.getSize(); + Index sum, start( 0 ), nextStart( 0 ); + + // Fill blocks + std::vector< details::CSRAdaptiveKernelBlockDescriptor< Index > > inBlocks; + inBlocks.reserve( rows ); + + while( nextStart != rows - 1 ) + { + details::Type type; + nextStart = findLimit( start, hostOffsets, rows, type, sum ); + + if( type == details::Type::LONG ) + { + const Index blocksCount = inBlocks.size(); + const Index warpsPerCudaBlock = THREADS_ADAPTIVE / TNL::Cuda::getWarpSize(); + Index warpsLeft = roundUpDivision( blocksCount, warpsPerCudaBlock ) * warpsPerCudaBlock - blocksCount; + if( warpsLeft == 0 ) + warpsLeft = warpsPerCudaBlock; + for( Index index = 0; index < warpsLeft; index++ ) + inBlocks.emplace_back( start, details::Type::LONG, index, warpsLeft ); + } + else + { + inBlocks.emplace_back(start, type, + nextStart, + offsets.getElement(nextStart), + offsets.getElement(start) ); + } + start = nextStart; + } + inBlocks.emplace_back(nextStart); + this->blocks = inBlocks; + this->view.setBlocks( blocks ); +} + +template< typename Index, + typename Device > +void +CSRAdaptiveKernel< Index, Device >:: +reset() +{ + this->blocks.reset(); + this->view.setBlocks( blocks ); +} + +template< typename Index, + typename Device > +auto +CSRAdaptiveKernel< Index, Device >:: +getView() -> ViewType +{ + return this->view; +} + +template< typename Index, + typename Device > +auto +CSRAdaptiveKernel< Index, Device >:: +getConstView() const -> ConstViewType +{ + return this->view; +}; + +template< typename Index, + typename Device > + template< typename OffsetsView, + typename Fetch, + typename Reduction, + typename ResultKeeper, + typename Real, + typename... Args > +void +CSRAdaptiveKernel< Index, Device >:: +segmentsReduction( const OffsetsView& offsets, + Index first, + Index last, + Fetch& fetch, + const Reduction& reduction, + ResultKeeper& keeper, + const Real& zero, + Args... args ) const +{ + view.segmentsReduction( offsets, first, last, fetch, reduction, keeper, zero, args... ); +} } // namespace Segments } // namespace Algorithms -} // namespace TNL \ No newline at end of file +} // namespace TNL -- GitLab