Loading src/TNL/Algorithms/Segments/CSR.h +2 −2 Original line number Diff line number Diff line Loading @@ -22,7 +22,7 @@ namespace TNL { template< typename Device, typename Index, typename Kernel = CSRScalarKernel< Index, Device >, typename Kernel = CSRKernelScalar< Index, Device >, typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > class CSR { Loading Loading @@ -133,7 +133,7 @@ class CSR template< typename Device, typename Index, typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > using CSRScalar = CSR< Device, Index, CSRScalarKernel< Index, Device >, IndexAllocator >; using CSRScalar = CSR< Device, Index, CSRKernelScalar< Index, Device >, IndexAllocator >; template< typename Device, typename Index, Loading src/TNL/Algorithms/Segments/CSRKernelScalar.h 0 → 100644 +61 −0 Original line number Diff line number Diff line /*************************************************************************** CSRKernelScalar.h - description ------------------- begin : Jan 23, 2021 -> Joe Biden inauguration copyright : (C) 2021 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ #pragma once #include <TNL/Assert.h> #include <TNL/Cuda/LaunchHelpers.h> #include <TNL/Containers/VectorView.h> #include <TNL/Algorithms/ParallelFor.h> #include <TNL/Algorithms/Segments/details/LambdaAdapter.h> namespace TNL { namespace Algorithms { namespace Segments { template< typename Index, typename Device > struct CSRKernelScalar { using IndexType = Index; using DeviceType = Device; using ViewType = CSRKernelScalar< Index, Device >; using ConstViewType = CSRKernelScalar< Index, Device >; template< typename Offsets > void init( const Offsets& offsets ); void reset(); ViewType getView(); ConstViewType getConstView() const; template< typename OffsetsView, typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > static void segmentsReduction( const OffsetsView& offsets, Index first, Index last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ); }; } // namespace Segments } // namespace Algorithms } // namespace TNL #include <TNL/Algorithms/Segments/CSRKernelScalar.hpp> No newline at end of file src/TNL/Algorithms/Segments/CSRKernelScalar.hpp 0 → 100644 +92 −0 Original line number Diff line number Diff line /*************************************************************************** CSRKernelScalar.h - description ------------------- begin : Jan 23, 2021 -> Joe Biden inauguration copyright : (C) 2021 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ #pragma once #include <TNL/Assert.h> #include <TNL/Cuda/LaunchHelpers.h> #include <TNL/Containers/VectorView.h> #include <TNL/Algorithms/ParallelFor.h> #include <TNL/Algorithms/Segments/CSRKernelScalar.h> #include <TNL/Algorithms/Segments/details/LambdaAdapter.h> namespace TNL { namespace Algorithms { namespace Segments { template< typename Index, typename Device > template< typename Offsets > void CSRKernelScalar< Index, Device >:: init( const Offsets& offsets ) { } template< typename Index, typename Device > void CSRKernelScalar< Index, Device >:: reset() { } template< typename Index, typename Device > auto CSRKernelScalar< Index, Device >:: getView() -> ViewType { return *this; } template< typename Index, typename Device > auto CSRKernelScalar< Index, Device >:: getConstView() const -> ConstViewType { return *this; }; template< typename Index, typename Device > template< typename OffsetsView, typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void CSRKernelScalar< Index, Device >:: segmentsReduction( const OffsetsView& offsets, Index first, Index last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) { auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { const IndexType begin = offsets[ segmentIdx ]; const IndexType end = offsets[ segmentIdx + 1 ]; Real aux( zero ); IndexType localIdx( 0 ); bool compute( true ); for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++ ) aux = reduction( aux, details::FetchLambdaAdapter< IndexType, Fetch >::call( fetch, segmentIdx, localIdx++, globalIdx, compute ) ); keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } } // namespace Segments } // namespace Algorithms } // namespace TNL src/TNL/Algorithms/Segments/CSRKernels.h +0 −47 Original line number Diff line number Diff line Loading @@ -20,53 +20,6 @@ namespace TNL { namespace Algorithms { namespace Segments { template< typename Index, typename Device > struct CSRScalarKernel { using IndexType = Index; using DeviceType = Device; using ViewType = CSRScalarKernel< Index, Device >; using ConstViewType = CSRScalarKernel< Index, Device >; template< typename Offsets > void init( const Offsets& offsets ) {}; void reset(){}; ViewType getView() { return *this; }; ConstViewType getConstView() const { return *this; }; template< typename OffsetsView, typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > static void segmentsReduction( const OffsetsView& offsets, Index first, Index last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) { auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { const IndexType begin = offsets[ segmentIdx ]; const IndexType end = offsets[ segmentIdx + 1 ]; Real aux( zero ); IndexType localIdx( 0 ); bool compute( true ); for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++ ) aux = reduction( aux, details::FetchLambdaAdapter< IndexType, Fetch >::call( fetch, segmentIdx, localIdx++, globalIdx, compute ) ); keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } }; #ifdef HAVE_CUDA template< typename Offsets, typename Index, Loading src/TNL/Algorithms/Segments/CSRView.h +3 −2 Original line number Diff line number Diff line Loading @@ -14,6 +14,7 @@ #include <TNL/Containers/Vector.h> #include <TNL/Algorithms/Segments/SegmentView.h> #include <TNL/Algorithms/Segments/CSRKernelScalar.h> #include <TNL/Algorithms/Segments/CSRKernels.h> namespace TNL { Loading @@ -22,7 +23,7 @@ namespace TNL { template< typename Device, typename Index, typename Kernel = CSRScalarKernel< Index, Device > > typename Kernel = CSRKernelScalar< Index, Device > > class CSRView { public: Loading Loading @@ -131,7 +132,7 @@ class CSRView template< typename Device, typename Index > using CSRViewScalar = CSRView< Device, Index, CSRScalarKernel< Index, Device > >; using CSRViewScalar = CSRView< Device, Index, CSRKernelScalar< Index, Device > >; template< typename Device, typename Index > Loading Loading
src/TNL/Algorithms/Segments/CSR.h +2 −2 Original line number Diff line number Diff line Loading @@ -22,7 +22,7 @@ namespace TNL { template< typename Device, typename Index, typename Kernel = CSRScalarKernel< Index, Device >, typename Kernel = CSRKernelScalar< Index, Device >, typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > class CSR { Loading Loading @@ -133,7 +133,7 @@ class CSR template< typename Device, typename Index, typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > using CSRScalar = CSR< Device, Index, CSRScalarKernel< Index, Device >, IndexAllocator >; using CSRScalar = CSR< Device, Index, CSRKernelScalar< Index, Device >, IndexAllocator >; template< typename Device, typename Index, Loading
src/TNL/Algorithms/Segments/CSRKernelScalar.h 0 → 100644 +61 −0 Original line number Diff line number Diff line /*************************************************************************** CSRKernelScalar.h - description ------------------- begin : Jan 23, 2021 -> Joe Biden inauguration copyright : (C) 2021 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ #pragma once #include <TNL/Assert.h> #include <TNL/Cuda/LaunchHelpers.h> #include <TNL/Containers/VectorView.h> #include <TNL/Algorithms/ParallelFor.h> #include <TNL/Algorithms/Segments/details/LambdaAdapter.h> namespace TNL { namespace Algorithms { namespace Segments { template< typename Index, typename Device > struct CSRKernelScalar { using IndexType = Index; using DeviceType = Device; using ViewType = CSRKernelScalar< Index, Device >; using ConstViewType = CSRKernelScalar< Index, Device >; template< typename Offsets > void init( const Offsets& offsets ); void reset(); ViewType getView(); ConstViewType getConstView() const; template< typename OffsetsView, typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > static void segmentsReduction( const OffsetsView& offsets, Index first, Index last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ); }; } // namespace Segments } // namespace Algorithms } // namespace TNL #include <TNL/Algorithms/Segments/CSRKernelScalar.hpp> No newline at end of file
src/TNL/Algorithms/Segments/CSRKernelScalar.hpp 0 → 100644 +92 −0 Original line number Diff line number Diff line /*************************************************************************** CSRKernelScalar.h - description ------------------- begin : Jan 23, 2021 -> Joe Biden inauguration copyright : (C) 2021 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ #pragma once #include <TNL/Assert.h> #include <TNL/Cuda/LaunchHelpers.h> #include <TNL/Containers/VectorView.h> #include <TNL/Algorithms/ParallelFor.h> #include <TNL/Algorithms/Segments/CSRKernelScalar.h> #include <TNL/Algorithms/Segments/details/LambdaAdapter.h> namespace TNL { namespace Algorithms { namespace Segments { template< typename Index, typename Device > template< typename Offsets > void CSRKernelScalar< Index, Device >:: init( const Offsets& offsets ) { } template< typename Index, typename Device > void CSRKernelScalar< Index, Device >:: reset() { } template< typename Index, typename Device > auto CSRKernelScalar< Index, Device >:: getView() -> ViewType { return *this; } template< typename Index, typename Device > auto CSRKernelScalar< Index, Device >:: getConstView() const -> ConstViewType { return *this; }; template< typename Index, typename Device > template< typename OffsetsView, typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void CSRKernelScalar< Index, Device >:: segmentsReduction( const OffsetsView& offsets, Index first, Index last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) { auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { const IndexType begin = offsets[ segmentIdx ]; const IndexType end = offsets[ segmentIdx + 1 ]; Real aux( zero ); IndexType localIdx( 0 ); bool compute( true ); for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++ ) aux = reduction( aux, details::FetchLambdaAdapter< IndexType, Fetch >::call( fetch, segmentIdx, localIdx++, globalIdx, compute ) ); keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } } // namespace Segments } // namespace Algorithms } // namespace TNL
src/TNL/Algorithms/Segments/CSRKernels.h +0 −47 Original line number Diff line number Diff line Loading @@ -20,53 +20,6 @@ namespace TNL { namespace Algorithms { namespace Segments { template< typename Index, typename Device > struct CSRScalarKernel { using IndexType = Index; using DeviceType = Device; using ViewType = CSRScalarKernel< Index, Device >; using ConstViewType = CSRScalarKernel< Index, Device >; template< typename Offsets > void init( const Offsets& offsets ) {}; void reset(){}; ViewType getView() { return *this; }; ConstViewType getConstView() const { return *this; }; template< typename OffsetsView, typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > static void segmentsReduction( const OffsetsView& offsets, Index first, Index last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) { auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { const IndexType begin = offsets[ segmentIdx ]; const IndexType end = offsets[ segmentIdx + 1 ]; Real aux( zero ); IndexType localIdx( 0 ); bool compute( true ); for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++ ) aux = reduction( aux, details::FetchLambdaAdapter< IndexType, Fetch >::call( fetch, segmentIdx, localIdx++, globalIdx, compute ) ); keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } }; #ifdef HAVE_CUDA template< typename Offsets, typename Index, Loading
src/TNL/Algorithms/Segments/CSRView.h +3 −2 Original line number Diff line number Diff line Loading @@ -14,6 +14,7 @@ #include <TNL/Containers/Vector.h> #include <TNL/Algorithms/Segments/SegmentView.h> #include <TNL/Algorithms/Segments/CSRKernelScalar.h> #include <TNL/Algorithms/Segments/CSRKernels.h> namespace TNL { Loading @@ -22,7 +23,7 @@ namespace TNL { template< typename Device, typename Index, typename Kernel = CSRScalarKernel< Index, Device > > typename Kernel = CSRKernelScalar< Index, Device > > class CSRView { public: Loading Loading @@ -131,7 +132,7 @@ class CSRView template< typename Device, typename Index > using CSRViewScalar = CSRView< Device, Index, CSRScalarKernel< Index, Device > >; using CSRViewScalar = CSRView< Device, Index, CSRKernelScalar< Index, Device > >; template< typename Device, typename Index > Loading