From 92dc4a4720d74e31eeb8cb2e7e0ee724c7f057e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com> Date: Tue, 27 Aug 2019 15:41:31 +0200 Subject: [PATCH] Renaming PrefixSum to Scan. --- .../CMakeLists.txt | 0 .../ComparisonExample.cpp | 0 .../ComparisonExample.cu | 0 .../MapReduceExample-1.cpp | 0 .../MapReduceExample-2.cpp | 0 .../MapReduceExample-3.cpp | 0 .../MaximumNormExample.cpp | 0 .../MaximumNormExample.cu | 0 .../ProductExample.cpp | 0 .../ProductExample.cu | 0 .../ScalarProductExample.cpp | 0 .../ScalarProductExample.cu | 0 .../SequentialSum.cpp | 0 .../SequentialSumWithLambdas.cpp | 0 .../SumExample.cpp | 0 .../SumExample.cu | 0 .../UpdateAndResidueExample.cpp | 0 .../UpdateAndResidueExample.cu | 0 .../tutorial_03_Reduction.md | 0 src/Benchmarks/BLAS/vector-operations.h | 4 +- ...CudaPrefixSumKernel.h => CudaScanKernel.h} | 22 +++--- ...stributedPrefixSum.h => DistributedScan.h} | 14 ++-- .../Algorithms/{PrefixSum.h => Scan.h} | 30 ++++---- .../Algorithms/{PrefixSum.hpp => Scan.hpp} | 50 ++++++------- src/TNL/Containers/DistributedVector.h | 2 +- src/TNL/Containers/DistributedVector.hpp | 6 +- src/TNL/Containers/DistributedVectorView.h | 2 +- src/TNL/Containers/DistributedVectorView.hpp | 6 +- src/TNL/Containers/Vector.h | 8 +-- src/TNL/Containers/Vector.hpp | 12 ++-- src/TNL/Containers/VectorView.h | 10 +-- src/TNL/Containers/VectorView.hpp | 12 ++-- src/TNL/Matrices/BiEllpack_impl.h | 2 +- src/TNL/Matrices/CSR_impl.h | 2 +- .../Matrices/SlicedEllpackSymmetric_impl.h | 2 +- src/TNL/Matrices/SlicedEllpack_impl.h | 2 +- .../Containers/DistributedVectorTest.h | 56 +++++++-------- .../Containers/VectorPrefixSumTest.h | 72 +++++++++---------- 38 files changed, 157 insertions(+), 157 deletions(-) rename Documentation/Tutorials/{Reduction => ReductionAndScan}/CMakeLists.txt (100%) rename Documentation/Tutorials/{Reduction => ReductionAndScan}/ComparisonExample.cpp (100%) rename Documentation/Tutorials/{Reduction => ReductionAndScan}/ComparisonExample.cu (100%) rename Documentation/Tutorials/{Reduction => ReductionAndScan}/MapReduceExample-1.cpp (100%) rename Documentation/Tutorials/{Reduction => ReductionAndScan}/MapReduceExample-2.cpp (100%) rename Documentation/Tutorials/{Reduction => ReductionAndScan}/MapReduceExample-3.cpp (100%) rename Documentation/Tutorials/{Reduction => ReductionAndScan}/MaximumNormExample.cpp (100%) rename Documentation/Tutorials/{Reduction => ReductionAndScan}/MaximumNormExample.cu (100%) rename Documentation/Tutorials/{Reduction => ReductionAndScan}/ProductExample.cpp (100%) rename Documentation/Tutorials/{Reduction => ReductionAndScan}/ProductExample.cu (100%) rename Documentation/Tutorials/{Reduction => ReductionAndScan}/ScalarProductExample.cpp (100%) rename Documentation/Tutorials/{Reduction => ReductionAndScan}/ScalarProductExample.cu (100%) rename Documentation/Tutorials/{Reduction => ReductionAndScan}/SequentialSum.cpp (100%) rename Documentation/Tutorials/{Reduction => ReductionAndScan}/SequentialSumWithLambdas.cpp (100%) rename Documentation/Tutorials/{Reduction => ReductionAndScan}/SumExample.cpp (100%) rename Documentation/Tutorials/{Reduction => ReductionAndScan}/SumExample.cu (100%) rename Documentation/Tutorials/{Reduction => ReductionAndScan}/UpdateAndResidueExample.cpp (100%) rename Documentation/Tutorials/{Reduction => ReductionAndScan}/UpdateAndResidueExample.cu (100%) rename Documentation/Tutorials/{Reduction => ReductionAndScan}/tutorial_03_Reduction.md (100%) rename src/TNL/Containers/Algorithms/{CudaPrefixSumKernel.h => CudaScanKernel.h} (95%) rename src/TNL/Containers/Algorithms/{DistributedPrefixSum.h => DistributedScan.h} (82%) rename src/TNL/Containers/Algorithms/{PrefixSum.h => Scan.h} (85%) rename src/TNL/Containers/Algorithms/{PrefixSum.hpp => Scan.hpp} (88%) diff --git a/Documentation/Tutorials/Reduction/CMakeLists.txt b/Documentation/Tutorials/ReductionAndScan/CMakeLists.txt similarity index 100% rename from Documentation/Tutorials/Reduction/CMakeLists.txt rename to Documentation/Tutorials/ReductionAndScan/CMakeLists.txt diff --git a/Documentation/Tutorials/Reduction/ComparisonExample.cpp b/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp similarity index 100% rename from Documentation/Tutorials/Reduction/ComparisonExample.cpp rename to Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp diff --git a/Documentation/Tutorials/Reduction/ComparisonExample.cu b/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cu similarity index 100% rename from Documentation/Tutorials/Reduction/ComparisonExample.cu rename to Documentation/Tutorials/ReductionAndScan/ComparisonExample.cu diff --git a/Documentation/Tutorials/Reduction/MapReduceExample-1.cpp b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp similarity index 100% rename from Documentation/Tutorials/Reduction/MapReduceExample-1.cpp rename to Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp diff --git a/Documentation/Tutorials/Reduction/MapReduceExample-2.cpp b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-2.cpp similarity index 100% rename from Documentation/Tutorials/Reduction/MapReduceExample-2.cpp rename to Documentation/Tutorials/ReductionAndScan/MapReduceExample-2.cpp diff --git a/Documentation/Tutorials/Reduction/MapReduceExample-3.cpp b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-3.cpp similarity index 100% rename from Documentation/Tutorials/Reduction/MapReduceExample-3.cpp rename to Documentation/Tutorials/ReductionAndScan/MapReduceExample-3.cpp diff --git a/Documentation/Tutorials/Reduction/MaximumNormExample.cpp b/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp similarity index 100% rename from Documentation/Tutorials/Reduction/MaximumNormExample.cpp rename to Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp diff --git a/Documentation/Tutorials/Reduction/MaximumNormExample.cu b/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cu similarity index 100% rename from Documentation/Tutorials/Reduction/MaximumNormExample.cu rename to Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cu diff --git a/Documentation/Tutorials/Reduction/ProductExample.cpp b/Documentation/Tutorials/ReductionAndScan/ProductExample.cpp similarity index 100% rename from Documentation/Tutorials/Reduction/ProductExample.cpp rename to Documentation/Tutorials/ReductionAndScan/ProductExample.cpp diff --git a/Documentation/Tutorials/Reduction/ProductExample.cu b/Documentation/Tutorials/ReductionAndScan/ProductExample.cu similarity index 100% rename from Documentation/Tutorials/Reduction/ProductExample.cu rename to Documentation/Tutorials/ReductionAndScan/ProductExample.cu diff --git a/Documentation/Tutorials/Reduction/ScalarProductExample.cpp b/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp similarity index 100% rename from Documentation/Tutorials/Reduction/ScalarProductExample.cpp rename to Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp diff --git a/Documentation/Tutorials/Reduction/ScalarProductExample.cu b/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cu similarity index 100% rename from Documentation/Tutorials/Reduction/ScalarProductExample.cu rename to Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cu diff --git a/Documentation/Tutorials/Reduction/SequentialSum.cpp b/Documentation/Tutorials/ReductionAndScan/SequentialSum.cpp similarity index 100% rename from Documentation/Tutorials/Reduction/SequentialSum.cpp rename to Documentation/Tutorials/ReductionAndScan/SequentialSum.cpp diff --git a/Documentation/Tutorials/Reduction/SequentialSumWithLambdas.cpp b/Documentation/Tutorials/ReductionAndScan/SequentialSumWithLambdas.cpp similarity index 100% rename from Documentation/Tutorials/Reduction/SequentialSumWithLambdas.cpp rename to Documentation/Tutorials/ReductionAndScan/SequentialSumWithLambdas.cpp diff --git a/Documentation/Tutorials/Reduction/SumExample.cpp b/Documentation/Tutorials/ReductionAndScan/SumExample.cpp similarity index 100% rename from Documentation/Tutorials/Reduction/SumExample.cpp rename to Documentation/Tutorials/ReductionAndScan/SumExample.cpp diff --git a/Documentation/Tutorials/Reduction/SumExample.cu b/Documentation/Tutorials/ReductionAndScan/SumExample.cu similarity index 100% rename from Documentation/Tutorials/Reduction/SumExample.cu rename to Documentation/Tutorials/ReductionAndScan/SumExample.cu diff --git a/Documentation/Tutorials/Reduction/UpdateAndResidueExample.cpp b/Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cpp similarity index 100% rename from Documentation/Tutorials/Reduction/UpdateAndResidueExample.cpp rename to Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cpp diff --git a/Documentation/Tutorials/Reduction/UpdateAndResidueExample.cu b/Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cu similarity index 100% rename from Documentation/Tutorials/Reduction/UpdateAndResidueExample.cu rename to Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cu diff --git a/Documentation/Tutorials/Reduction/tutorial_03_Reduction.md b/Documentation/Tutorials/ReductionAndScan/tutorial_03_Reduction.md similarity index 100% rename from Documentation/Tutorials/Reduction/tutorial_03_Reduction.md rename to Documentation/Tutorials/ReductionAndScan/tutorial_03_Reduction.md diff --git a/src/Benchmarks/BLAS/vector-operations.h b/src/Benchmarks/BLAS/vector-operations.h index f2b22c7dfa..ce2114f313 100644 --- a/src/Benchmarks/BLAS/vector-operations.h +++ b/src/Benchmarks/BLAS/vector-operations.h @@ -578,13 +578,13 @@ benchmarkVectorOperations( Benchmark & benchmark, //// // Exclusive prefix sum auto exclusivePrefixSumHost = [&]() { - hostVector.template prefixSum< Containers::Algorithms::PrefixSumType::Exclusive >(); + hostVector.template prefixSum< Containers::Algorithms::ScanType::Exclusive >(); }; benchmark.setOperation( "exclusive prefix sum", 2 * datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU ET", exclusivePrefixSumHost ); #ifdef HAVE_CUDA auto exclusivePrefixSumCuda = [&]() { - deviceVector.template prefixSum< Containers::Algorithms::PrefixSumType::Exclusive >(); + deviceVector.template prefixSum< Containers::Algorithms::ScanType::Exclusive >(); }; benchmark.time< Devices::Cuda >( reset1, "GPU ET", exclusivePrefixSumCuda ); #endif diff --git a/src/TNL/Containers/Algorithms/CudaPrefixSumKernel.h b/src/TNL/Containers/Algorithms/CudaScanKernel.h similarity index 95% rename from src/TNL/Containers/Algorithms/CudaPrefixSumKernel.h rename to src/TNL/Containers/Algorithms/CudaScanKernel.h index ae3bb84de5..52ea4a32be 100644 --- a/src/TNL/Containers/Algorithms/CudaPrefixSumKernel.h +++ b/src/TNL/Containers/Algorithms/CudaScanKernel.h @@ -1,5 +1,5 @@ /*************************************************************************** - CudaPrefixSumKernel.h - description + CudaScanKernel.h - description ------------------- begin : Jan 18, 2014 copyright : (C) 2014 by Tomas Oberhuber @@ -27,7 +27,7 @@ template< typename Real, typename Reduction, typename Index > __global__ void -cudaFirstPhaseBlockPrefixSum( const PrefixSumType prefixSumType, +cudaFirstPhaseBlockScan( const ScanType ScanType, Reduction reduction, const Real zero, const Index size, @@ -48,7 +48,7 @@ cudaFirstPhaseBlockPrefixSum( const PrefixSumType prefixSumType, */ const int blockOffset = blockIdx.x * elementsInBlock; int idx = threadIdx.x; - if( prefixSumType == PrefixSumType::Exclusive ) + if( ScanType == ScanType::Exclusive ) { if( idx == 0 ) sharedData[ 0 ] = zero; @@ -145,7 +145,7 @@ cudaFirstPhaseBlockPrefixSum( const PrefixSumType prefixSumType, if( threadIdx.x == 0 ) { - if( prefixSumType == PrefixSumType::Exclusive ) + if( ScanType == ScanType::Exclusive ) { auxArray[ blockIdx.x ] = reduction( sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock - 1 ) ], sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock ) ] ); @@ -159,7 +159,7 @@ template< typename Real, typename Reduction, typename Index > __global__ void -cudaSecondPhaseBlockPrefixSum( Reduction reduction, +cudaSecondPhaseBlockScan( Reduction reduction, const Index size, const int elementsInBlock, const Index gridIdx, @@ -179,10 +179,10 @@ cudaSecondPhaseBlockPrefixSum( Reduction reduction, } } -template< PrefixSumType prefixSumType, +template< ScanType ScanType, typename Real, typename Index > -struct CudaPrefixSumKernelLauncher +struct CudaScanKernelLauncher { /**** * \brief Performs both phases of prefix sum. @@ -270,8 +270,8 @@ struct CudaPrefixSumKernelLauncher const std::size_t sharedDataSize = elementsInBlock + elementsInBlock / Devices::Cuda::getNumberOfSharedMemoryBanks() + 2; const std::size_t sharedMemory = ( sharedDataSize + blockSize + Devices::Cuda::getWarpSize() ) * sizeof( Real ); - cudaFirstPhaseBlockPrefixSum<<< cudaGridSize, cudaBlockSize, sharedMemory >>> - ( prefixSumType, + cudaFirstPhaseBlockScan<<< cudaGridSize, cudaBlockSize, sharedMemory >>> + ( ScanType, reduction, zero, currentSize, @@ -288,7 +288,7 @@ struct CudaPrefixSumKernelLauncher // blockSums now contains sums of numbers in each block. The first phase // ends by computing prefix-sum of this array. if( numberOfBlocks > 1 ) { - CudaPrefixSumKernelLauncher< PrefixSumType::Inclusive, Real, Index >::perform( + CudaScanKernelLauncher< ScanType::Inclusive, Real, Index >::perform( blockSums.getSize(), blockSums.getData(), blockSums.getData(), @@ -347,7 +347,7 @@ struct CudaPrefixSumKernelLauncher cudaGridSize.x = roundUpDivision( currentSize, elementsInBlock ); // run the kernel - cudaSecondPhaseBlockPrefixSum<<< cudaGridSize, cudaBlockSize >>> + cudaSecondPhaseBlockScan<<< cudaGridSize, cudaBlockSize >>> ( reduction, size, elementsInBlock, diff --git a/src/TNL/Containers/Algorithms/DistributedPrefixSum.h b/src/TNL/Containers/Algorithms/DistributedScan.h similarity index 82% rename from src/TNL/Containers/Algorithms/DistributedPrefixSum.h rename to src/TNL/Containers/Algorithms/DistributedScan.h index b81e2ac94d..6466b6bd5f 100644 --- a/src/TNL/Containers/Algorithms/DistributedPrefixSum.h +++ b/src/TNL/Containers/Algorithms/DistributedScan.h @@ -1,5 +1,5 @@ /*************************************************************************** - PrefixSum.h - description + Scan.h - description ------------------- begin : Aug 16, 2019 copyright : (C) 2019 by Tomas Oberhuber et al. @@ -12,15 +12,15 @@ #pragma once -#include <TNL/Containers/Algorithms/PrefixSum.h> +#include <TNL/Containers/Algorithms/Scan.h> #include <TNL/Containers/Vector.h> namespace TNL { namespace Containers { namespace Algorithms { -template< PrefixSumType Type > -struct DistributedPrefixSum +template< ScanType Type > +struct DistributedScan { template< typename DistributedVector, typename Reduction > @@ -44,7 +44,7 @@ struct DistributedPrefixSum // perform first phase on the local data auto localView = v.getLocalView(); - const auto blockShifts = PrefixSum< DeviceType, Type >::performFirstPhase( localView, begin, end, reduction, zero ); + const auto blockShifts = Scan< DeviceType, Type >::performFirstPhase( localView, begin, end, reduction, zero ); const RealType localSum = blockShifts.getElement( blockShifts.getSize() - 1 ); // exchange local sums between ranks @@ -56,11 +56,11 @@ struct DistributedPrefixSum CommunicatorType::Alltoall( dataForScatter, 1, rankSums.getData(), 1, group ); // compute prefix-sum of the per-rank sums - PrefixSum< Devices::Host, PrefixSumType::Exclusive >::perform( rankSums, 0, nproc, reduction, zero ); + Scan< Devices::Host, ScanType::Exclusive >::perform( rankSums, 0, nproc, reduction, zero ); // perform second phase: shift by the per-block and per-rank offsets const int rank = CommunicatorType::GetRank( group ); - PrefixSum< DeviceType, Type >::performSecondPhase( localView, blockShifts, begin, end, reduction, rankSums[ rank ] ); + Scan< DeviceType, Type >::performSecondPhase( localView, blockShifts, begin, end, reduction, rankSums[ rank ] ); } } }; diff --git a/src/TNL/Containers/Algorithms/PrefixSum.h b/src/TNL/Containers/Algorithms/Scan.h similarity index 85% rename from src/TNL/Containers/Algorithms/PrefixSum.h rename to src/TNL/Containers/Algorithms/Scan.h index 2b0e404580..9b9b116c87 100644 --- a/src/TNL/Containers/Algorithms/PrefixSum.h +++ b/src/TNL/Containers/Algorithms/Scan.h @@ -1,5 +1,5 @@ /*************************************************************************** - PrefixSum.h - description + Scan.h - description ------------------- begin : May 9, 2019 copyright : (C) 2019 by Tomas Oberhuber et al. @@ -19,22 +19,22 @@ namespace TNL { namespace Containers { namespace Algorithms { -enum class PrefixSumType { +enum class ScanType { Exclusive, Inclusive }; template< typename Device, - PrefixSumType Type = PrefixSumType::Inclusive > -struct PrefixSum; + ScanType Type = ScanType::Inclusive > +struct Scan; template< typename Device, - PrefixSumType Type = PrefixSumType::Inclusive > -struct SegmentedPrefixSum; + ScanType Type = ScanType::Inclusive > +struct SegmentedScan; -template< PrefixSumType Type > -struct PrefixSum< Devices::Host, Type > +template< ScanType Type > +struct Scan< Devices::Host, Type > { template< typename Vector, typename Reduction > @@ -66,8 +66,8 @@ struct PrefixSum< Devices::Host, Type > const typename Vector::RealType shift ); }; -template< PrefixSumType Type > -struct PrefixSum< Devices::Cuda, Type > +template< ScanType Type > +struct Scan< Devices::Cuda, Type > { template< typename Vector, typename Reduction > @@ -99,8 +99,8 @@ struct PrefixSum< Devices::Cuda, Type > const typename Vector::RealType shift ); }; -template< PrefixSumType Type > -struct SegmentedPrefixSum< Devices::Host, Type > +template< ScanType Type > +struct SegmentedScan< Devices::Host, Type > { template< typename Vector, typename Reduction, @@ -114,8 +114,8 @@ struct SegmentedPrefixSum< Devices::Host, Type > const typename Vector::RealType zero ); }; -template< PrefixSumType Type > -struct SegmentedPrefixSum< Devices::Cuda, Type > +template< ScanType Type > +struct SegmentedScan< Devices::Cuda, Type > { template< typename Vector, typename Reduction, @@ -133,4 +133,4 @@ struct SegmentedPrefixSum< Devices::Cuda, Type > } // namespace Containers } // namespace TNL -#include <TNL/Containers/Algorithms/PrefixSum.hpp> +#include <TNL/Containers/Algorithms/Scan.hpp> diff --git a/src/TNL/Containers/Algorithms/PrefixSum.hpp b/src/TNL/Containers/Algorithms/Scan.hpp similarity index 88% rename from src/TNL/Containers/Algorithms/PrefixSum.hpp rename to src/TNL/Containers/Algorithms/Scan.hpp index 8af19d09a5..3afc2e2715 100644 --- a/src/TNL/Containers/Algorithms/PrefixSum.hpp +++ b/src/TNL/Containers/Algorithms/Scan.hpp @@ -1,5 +1,5 @@ /*************************************************************************** - PrefixSum.hpp - description + Scan.hpp - description ------------------- begin : Mar 24, 2013 copyright : (C) 2013 by Tomas Oberhuber et al. @@ -12,11 +12,11 @@ #pragma once -#include "PrefixSum.h" +#include "Scan.h" #include <TNL/Assert.h> #include <TNL/Containers/Array.h> -#include <TNL/Containers/Algorithms/CudaPrefixSumKernel.h> +#include <TNL/Containers/Algorithms/CudaScanKernel.h> #include <TNL/Exceptions/CudaSupportMissing.h> #include <TNL/Exceptions/NotImplementedError.h> @@ -24,11 +24,11 @@ namespace TNL { namespace Containers { namespace Algorithms { -template< PrefixSumType Type > +template< ScanType Type > template< typename Vector, typename Reduction > void -PrefixSum< Devices::Host, Type >:: +Scan< Devices::Host, Type >:: perform( Vector& v, const typename Vector::IndexType begin, const typename Vector::IndexType end, @@ -44,11 +44,11 @@ perform( Vector& v, #endif } -template< PrefixSumType Type > +template< ScanType Type > template< typename Vector, typename Reduction > auto -PrefixSum< Devices::Host, Type >:: +Scan< Devices::Host, Type >:: performFirstPhase( Vector& v, const typename Vector::IndexType begin, const typename Vector::IndexType end, @@ -70,7 +70,7 @@ performFirstPhase( Vector& v, RealType block_sum = zero; // perform prefix-sum on blocks statically assigned to threads - if( Type == PrefixSumType::Inclusive ) { + if( Type == ScanType::Inclusive ) { #pragma omp for schedule(static) for( IndexType i = begin; i < end; i++ ) { block_sum = reduction( block_sum, v[ i ] ); @@ -98,7 +98,7 @@ performFirstPhase( Vector& v, // block_sums now contains shift values for each block - to be used in the second phase return block_sums; #else - if( Type == PrefixSumType::Inclusive ) { + if( Type == ScanType::Inclusive ) { for( IndexType i = begin + 1; i < end; i++ ) v[ i ] = reduction( v[ i ], v[ i - 1 ] ); } @@ -116,12 +116,12 @@ performFirstPhase( Vector& v, #endif } -template< PrefixSumType Type > +template< ScanType Type > template< typename Vector, typename BlockShifts, typename Reduction > void -PrefixSum< Devices::Host, Type >:: +Scan< Devices::Host, Type >:: performSecondPhase( Vector& v, const BlockShifts& blockShifts, const typename Vector::IndexType begin, @@ -152,11 +152,11 @@ performSecondPhase( Vector& v, #endif } -template< PrefixSumType Type > +template< ScanType Type > template< typename Vector, typename Reduction > void -PrefixSum< Devices::Cuda, Type >:: +Scan< Devices::Cuda, Type >:: perform( Vector& v, const typename Vector::IndexType begin, const typename Vector::IndexType end, @@ -167,7 +167,7 @@ perform( Vector& v, using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; - CudaPrefixSumKernelLauncher< Type, RealType, IndexType >::perform( + CudaScanKernelLauncher< Type, RealType, IndexType >::perform( end - begin, &v[ begin ], // input &v[ begin ], // output @@ -178,11 +178,11 @@ perform( Vector& v, #endif } -template< PrefixSumType Type > +template< ScanType Type > template< typename Vector, typename Reduction > auto -PrefixSum< Devices::Cuda, Type >:: +Scan< Devices::Cuda, Type >:: performFirstPhase( Vector& v, const typename Vector::IndexType begin, const typename Vector::IndexType end, @@ -193,7 +193,7 @@ performFirstPhase( Vector& v, using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; - return CudaPrefixSumKernelLauncher< Type, RealType, IndexType >::performFirstPhase( + return CudaScanKernelLauncher< Type, RealType, IndexType >::performFirstPhase( end - begin, &v[ begin ], // input &v[ begin ], // output @@ -204,12 +204,12 @@ performFirstPhase( Vector& v, #endif } -template< PrefixSumType Type > +template< ScanType Type > template< typename Vector, typename BlockShifts, typename Reduction > void -PrefixSum< Devices::Cuda, Type >:: +Scan< Devices::Cuda, Type >:: performSecondPhase( Vector& v, const BlockShifts& blockShifts, const typename Vector::IndexType begin, @@ -221,7 +221,7 @@ performSecondPhase( Vector& v, using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; - CudaPrefixSumKernelLauncher< Type, RealType, IndexType >::performSecondPhase( + CudaScanKernelLauncher< Type, RealType, IndexType >::performSecondPhase( end - begin, &v[ begin ], // output blockShifts.getData(), @@ -233,12 +233,12 @@ performSecondPhase( Vector& v, } -template< PrefixSumType Type > +template< ScanType Type > template< typename Vector, typename Reduction, typename Flags > void -SegmentedPrefixSum< Devices::Host, Type >:: +SegmentedScan< Devices::Host, Type >:: perform( Vector& v, Flags& flags, const typename Vector::IndexType begin, @@ -250,7 +250,7 @@ perform( Vector& v, using IndexType = typename Vector::IndexType; // TODO: parallelize with OpenMP - if( Type == PrefixSumType::Inclusive ) + if( Type == ScanType::Inclusive ) { for( IndexType i = begin + 1; i < end; i++ ) if( ! flags[ i ] ) @@ -271,12 +271,12 @@ perform( Vector& v, } } -template< PrefixSumType Type > +template< ScanType Type > template< typename Vector, typename Reduction, typename Flags > void -SegmentedPrefixSum< Devices::Cuda, Type >:: +SegmentedScan< Devices::Cuda, Type >:: perform( Vector& v, Flags& flags, const typename Vector::IndexType begin, diff --git a/src/TNL/Containers/DistributedVector.h b/src/TNL/Containers/DistributedVector.h index 3438ddbd00..51d7c537c5 100644 --- a/src/TNL/Containers/DistributedVector.h +++ b/src/TNL/Containers/DistributedVector.h @@ -127,7 +127,7 @@ public: typename = std::enable_if_t< HasSubscriptOperator<Vector>::value > > DistributedVector& operator/=( const Vector& vector ); - template< Algorithms::PrefixSumType Type = Algorithms::PrefixSumType::Inclusive > + template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive > void prefixSum( IndexType begin = 0, IndexType end = 0 ); }; diff --git a/src/TNL/Containers/DistributedVector.hpp b/src/TNL/Containers/DistributedVector.hpp index 0a6ac1547f..dbf8b10b8e 100644 --- a/src/TNL/Containers/DistributedVector.hpp +++ b/src/TNL/Containers/DistributedVector.hpp @@ -13,7 +13,7 @@ #pragma once #include "DistributedVector.h" -#include <TNL/Containers/Algorithms/DistributedPrefixSum.h> +#include <TNL/Containers/Algorithms/DistributedScan.h> namespace TNL { namespace Containers { @@ -298,14 +298,14 @@ template< typename Real, typename Device, typename Index, typename Communicator > - template< Algorithms::PrefixSumType Type > + template< Algorithms::ScanType Type > void DistributedVector< Real, Device, Index, Communicator >:: prefixSum( IndexType begin, IndexType end ) { if( end == 0 ) end = this->getSize(); - Algorithms::DistributedPrefixSum< Type >::perform( *this, begin, end, std::plus<>{}, (RealType) 0.0 ); + Algorithms::DistributedScan< Type >::perform( *this, begin, end, std::plus<>{}, (RealType) 0.0 ); } } // namespace Containers diff --git a/src/TNL/Containers/DistributedVectorView.h b/src/TNL/Containers/DistributedVectorView.h index f0e7d91278..99764432de 100644 --- a/src/TNL/Containers/DistributedVectorView.h +++ b/src/TNL/Containers/DistributedVectorView.h @@ -127,7 +127,7 @@ public: typename = std::enable_if_t< HasSubscriptOperator<Vector>::value > > DistributedVectorView& operator/=( const Vector& vector ); - template< Algorithms::PrefixSumType Type = Algorithms::PrefixSumType::Inclusive > + template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive > void prefixSum( IndexType begin = 0, IndexType end = 0 ); }; diff --git a/src/TNL/Containers/DistributedVectorView.hpp b/src/TNL/Containers/DistributedVectorView.hpp index 0268e35da5..6a934d8c25 100644 --- a/src/TNL/Containers/DistributedVectorView.hpp +++ b/src/TNL/Containers/DistributedVectorView.hpp @@ -13,7 +13,7 @@ #pragma once #include "DistributedVectorView.h" -#include <TNL/Containers/Algorithms/DistributedPrefixSum.h> +#include <TNL/Containers/Algorithms/DistributedScan.h> namespace TNL { namespace Containers { @@ -274,14 +274,14 @@ template< typename Real, typename Device, typename Index, typename Communicator > - template< Algorithms::PrefixSumType Type > + template< Algorithms::ScanType Type > void DistributedVectorView< Real, Device, Index, Communicator >:: prefixSum( IndexType begin, IndexType end ) { if( end == 0 ) end = this->getSize(); - Algorithms::DistributedPrefixSum< Type >::perform( *this, begin, end, std::plus<>{}, (RealType) 0.0 ); + Algorithms::DistributedScan< Type >::perform( *this, begin, end, std::plus<>{}, (RealType) 0.0 ); } } // namespace Containers diff --git a/src/TNL/Containers/Vector.h b/src/TNL/Containers/Vector.h index 1fb6a01aa1..31a3972bb4 100644 --- a/src/TNL/Containers/Vector.h +++ b/src/TNL/Containers/Vector.h @@ -209,18 +209,18 @@ public: * \param begin Index of the element in this vector which to begin with. * \param end Index of the element in this vector which to end with. */ - template< Algorithms::PrefixSumType Type = Algorithms::PrefixSumType::Inclusive > + template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive > void prefixSum( IndexType begin = 0, IndexType end = 0 ); - template< Algorithms::PrefixSumType Type = Algorithms::PrefixSumType::Inclusive, + template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive, typename FlagsArray > void segmentedPrefixSum( FlagsArray& flags, IndexType begin = 0, IndexType end = 0 ); - template< Algorithms::PrefixSumType Type = Algorithms::PrefixSumType::Inclusive, + template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive, typename VectorExpression > void prefixSum( const VectorExpression& expression, IndexType begin = 0, IndexType end = 0 ); - template< Algorithms::PrefixSumType Type = Algorithms::PrefixSumType::Inclusive, + template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive, typename VectorExpression, typename FlagsArray > void segmentedPrefixSum( const VectorExpression& expression, FlagsArray& flags, IndexType begin = 0, IndexType end = 0 ); diff --git a/src/TNL/Containers/Vector.hpp b/src/TNL/Containers/Vector.hpp index a8c626ee5e..0468fc7491 100644 --- a/src/TNL/Containers/Vector.hpp +++ b/src/TNL/Containers/Vector.hpp @@ -168,21 +168,21 @@ template< typename Real, typename Device, typename Index, typename Allocator > - template< Algorithms::PrefixSumType Type > + template< Algorithms::ScanType Type > void Vector< Real, Device, Index, Allocator >:: prefixSum( IndexType begin, IndexType end ) { if( end == 0 ) end = this->getSize(); - Algorithms::PrefixSum< DeviceType, Type >::perform( *this, begin, end, std::plus<>{}, (RealType) 0.0 ); + Algorithms::Scan< DeviceType, Type >::perform( *this, begin, end, std::plus<>{}, (RealType) 0.0 ); } template< typename Real, typename Device, typename Index, typename Allocator > - template< Algorithms::PrefixSumType Type, + template< Algorithms::ScanType Type, typename FlagsArray > void Vector< Real, Device, Index, Allocator >:: @@ -190,14 +190,14 @@ segmentedPrefixSum( FlagsArray& flags, IndexType begin, IndexType end ) { if( end == 0 ) end = this->getSize(); - Algorithms::SegmentedPrefixSum< DeviceType, Type >::perform( *this, flags, begin, end, std::plus<>{}, (RealType) 0.0 ); + Algorithms::SegmentedScan< DeviceType, Type >::perform( *this, flags, begin, end, std::plus<>{}, (RealType) 0.0 ); } template< typename Real, typename Device, typename Index, typename Allocator > - template< Algorithms::PrefixSumType Type, + template< Algorithms::ScanType Type, typename VectorExpression > void Vector< Real, Device, Index, Allocator >:: @@ -210,7 +210,7 @@ template< typename Real, typename Device, typename Index, typename Allocator > - template< Algorithms::PrefixSumType Type, + template< Algorithms::ScanType Type, typename VectorExpression, typename FlagsArray > void diff --git a/src/TNL/Containers/VectorView.h b/src/TNL/Containers/VectorView.h index 0d03954549..f133d9c102 100644 --- a/src/TNL/Containers/VectorView.h +++ b/src/TNL/Containers/VectorView.h @@ -14,7 +14,7 @@ #include <TNL/Containers/ArrayView.h> #include <TNL/Containers/Expressions/ExpressionTemplates.h> -#include <TNL/Containers/Algorithms/PrefixSum.h> +#include <TNL/Containers/Algorithms/Scan.h> namespace TNL { namespace Containers { @@ -198,18 +198,18 @@ public: * \param begin Index of the element in this vector view which to begin with. * \param end Index of the element in this vector view which to end with. */ - template< Algorithms::PrefixSumType Type = Algorithms::PrefixSumType::Inclusive > + template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive > void prefixSum( IndexType begin = 0, IndexType end = 0 ); - template< Algorithms::PrefixSumType Type = Algorithms::PrefixSumType::Inclusive, + template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive, typename FlagsArray > void segmentedPrefixSum( FlagsArray& flags, IndexType begin = 0, IndexType end = 0 ); - template< Algorithms::PrefixSumType Type = Algorithms::PrefixSumType::Inclusive, + template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive, typename VectorExpression > void prefixSum( const VectorExpression& expression, IndexType begin = 0, IndexType end = 0 ); - template< Algorithms::PrefixSumType Type = Algorithms::PrefixSumType::Inclusive, + template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive, typename VectorExpression, typename FlagsArray > void segmentedPrefixSum( const VectorExpression& expression, FlagsArray& flags, IndexType begin = 0, IndexType end = 0 ); diff --git a/src/TNL/Containers/VectorView.hpp b/src/TNL/Containers/VectorView.hpp index 9cdde8ef2a..7c342703bc 100644 --- a/src/TNL/Containers/VectorView.hpp +++ b/src/TNL/Containers/VectorView.hpp @@ -118,20 +118,20 @@ operator/=( const VectorExpression& expression ) template< typename Real, typename Device, typename Index > - template< Algorithms::PrefixSumType Type > + template< Algorithms::ScanType Type > void VectorView< Real, Device, Index >:: prefixSum( IndexType begin, IndexType end ) { if( end == 0 ) end = this->getSize(); - Algorithms::PrefixSum< DeviceType, Type >::perform( *this, begin, end, std::plus<>{}, (RealType) 0.0 ); + Algorithms::Scan< DeviceType, Type >::perform( *this, begin, end, std::plus<>{}, (RealType) 0.0 ); } template< typename Real, typename Device, typename Index > - template< Algorithms::PrefixSumType Type, + template< Algorithms::ScanType Type, typename FlagsArray > void VectorView< Real, Device, Index >:: @@ -139,13 +139,13 @@ segmentedPrefixSum( FlagsArray& flags, IndexType begin, IndexType end ) { if( end == 0 ) end = this->getSize(); - Algorithms::SegmentedPrefixSum< DeviceType, Type >::perform( *this, flags, begin, end, std::plus<>{}, (RealType) 0.0 ); + Algorithms::SegmentedScan< DeviceType, Type >::perform( *this, flags, begin, end, std::plus<>{}, (RealType) 0.0 ); } template< typename Real, typename Device, typename Index > - template< Algorithms::PrefixSumType Type, + template< Algorithms::ScanType Type, typename VectorExpression > void VectorView< Real, Device, Index >:: @@ -157,7 +157,7 @@ prefixSum( const VectorExpression& expression, IndexType begin, IndexType end ) template< typename Real, typename Device, typename Index > - template< Algorithms::PrefixSumType Type, + template< Algorithms::ScanType Type, typename VectorExpression, typename FlagsArray > void diff --git a/src/TNL/Matrices/BiEllpack_impl.h b/src/TNL/Matrices/BiEllpack_impl.h index dc520cc912..0be6ac4b06 100644 --- a/src/TNL/Matrices/BiEllpack_impl.h +++ b/src/TNL/Matrices/BiEllpack_impl.h @@ -116,7 +116,7 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) //DeviceDependentCode::performRowBubbleSort( *this, rowLengths ); //DeviceDependentCode::computeColumnSizes( *this, rowLengths ); - this->groupPointers.template prefixSum< Containers::Algorithms::PrefixSumType::Exclusive >(); + this->groupPointers.template prefixSum< Containers::Algorithms::ScanType::Exclusive >(); // uncomment to perform structure test //DeviceDependentCode::verifyRowPerm( *this, rowLengths ); diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/CSR_impl.h index 9c499c60f4..74ff682fda 100644 --- a/src/TNL/Matrices/CSR_impl.h +++ b/src/TNL/Matrices/CSR_impl.h @@ -104,7 +104,7 @@ void CSR< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLeng rowPtrs.bind( this->rowPointers.getData(), this->getRows() ); rowPtrs = rowLengths; this->rowPointers.setElement( this->rows, 0 ); - this->rowPointers.template prefixSum< Containers::Algorithms::PrefixSumType::Exclusive >(); + this->rowPointers.template prefixSum< Containers::Algorithms::ScanType::Exclusive >(); this->maxRowLength = max( rowLengths ); /**** diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h index 5792abfe52..c9dee062c9 100644 --- a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h +++ b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h @@ -80,7 +80,7 @@ void SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::setCompressedRowL this->maxRowLength = max( rowLengths ); - this->slicePointers.template prefixSum< Containers::Algorithms::PrefixSumType::Exclusive >(); + this->slicePointers.template prefixSum< Containers::Algorithms::ScanType::Exclusive >(); this->allocateMatrixElements( this->slicePointers.getElement( slices ) ); } diff --git a/src/TNL/Matrices/SlicedEllpack_impl.h b/src/TNL/Matrices/SlicedEllpack_impl.h index 762a4fafa9..016edf6996 100644 --- a/src/TNL/Matrices/SlicedEllpack_impl.h +++ b/src/TNL/Matrices/SlicedEllpack_impl.h @@ -97,7 +97,7 @@ void SlicedEllpack< Real, Device, Index, SliceSize >::setCompressedRowLengths( C this->maxRowLength = max( rowLengths ); - this->slicePointers.template prefixSum< Containers::Algorithms::PrefixSumType::Exclusive >(); + this->slicePointers.template prefixSum< Containers::Algorithms::ScanType::Exclusive >(); this->allocateMatrixElements( this->slicePointers.getElement( slices ) ); } diff --git a/src/UnitTests/Containers/DistributedVectorTest.h b/src/UnitTests/Containers/DistributedVectorTest.h index 430a42d5ec..9740063844 100644 --- a/src/UnitTests/Containers/DistributedVectorTest.h +++ b/src/UnitTests/Containers/DistributedVectorTest.h @@ -143,12 +143,12 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) if( std::is_same< DeviceType, Devices::Cuda >::value ) { #ifdef HAVE_CUDA - Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::maxGridSize() = 3; + Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::maxGridSize() = 3; setConstantSequence( v, 0 ); v_host = -1; v.prefixSum(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ); @@ -156,7 +156,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) setConstantSequence( v, 1 ); v_host = -1; v.prefixSum(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i + 1 ); @@ -164,7 +164,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) setLinearSequence( v ); v_host = -1; v.prefixSum(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i; @@ -173,7 +173,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) setConstantSequence( v, 0 ); v_host = -1; v_view.prefixSum(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ); @@ -181,7 +181,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) setConstantSequence( v, 1 ); v_host = -1; v_view.prefixSum(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i + 1 ); @@ -189,12 +189,12 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) setLinearSequence( v ); v_host = -1; v_view.prefixSum(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i; - Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::resetMaxGridSize(); + Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::resetMaxGridSize(); #endif } } @@ -218,21 +218,21 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum ) setConstantSequence( v, 0 ); v_host = -1; - v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); + v.template prefixSum< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; - v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); + v.template prefixSum< Algorithms::ScanType::Exclusive >(); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i ) << "i = " << i; setLinearSequence( v ); v_host = -1; - v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); + v.template prefixSum< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; @@ -240,21 +240,21 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum ) // test views setConstantSequence( v, 0 ); v_host = -1; - v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); + v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; - v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); + v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i ) << "i = " << i; setLinearSequence( v ); v_host = -1; - v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); + v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; @@ -264,28 +264,28 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum ) if( std::is_same< DeviceType, Devices::Cuda >::value ) { #ifdef HAVE_CUDA - Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::maxGridSize() = 3; + Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::maxGridSize() = 3; setConstantSequence( v, 0 ); v_host = -1; - v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); + v.template prefixSum< Algorithms::ScanType::Exclusive >(); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ); setConstantSequence( v, 1 ); v_host = -1; - v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); + v.template prefixSum< Algorithms::ScanType::Exclusive >(); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i ); setLinearSequence( v ); v_host = -1; - v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); + v.template prefixSum< Algorithms::ScanType::Exclusive >(); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; @@ -293,29 +293,29 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum ) // test views setConstantSequence( v, 0 ); v_host = -1; - v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); + v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ); setConstantSequence( v, 1 ); v_host = -1; - v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); + v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i ); setLinearSequence( v ); v_host = -1; - v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); + v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; - Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::resetMaxGridSize(); + Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::resetMaxGridSize(); #endif } } diff --git a/src/UnitTests/Containers/VectorPrefixSumTest.h b/src/UnitTests/Containers/VectorPrefixSumTest.h index 4659d365d8..be295001ad 100644 --- a/src/UnitTests/Containers/VectorPrefixSumTest.h +++ b/src/UnitTests/Containers/VectorPrefixSumTest.h @@ -82,12 +82,12 @@ TYPED_TEST( VectorTest, prefixSum ) if( std::is_same< DeviceType, Devices::Cuda >::value ) { #ifdef HAVE_CUDA - Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::maxGridSize() = 3; + Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::maxGridSize() = 3; setConstantSequence( v, 0 ); v_host = -1; v.prefixSum(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; @@ -95,7 +95,7 @@ TYPED_TEST( VectorTest, prefixSum ) setConstantSequence( v, 1 ); v_host = -1; v.prefixSum(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i; @@ -103,7 +103,7 @@ TYPED_TEST( VectorTest, prefixSum ) setLinearSequence( v ); v_host = -1; v.prefixSum(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i; @@ -112,7 +112,7 @@ TYPED_TEST( VectorTest, prefixSum ) setConstantSequence( v, 0 ); v_host = -1; v_view.prefixSum(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; @@ -120,7 +120,7 @@ TYPED_TEST( VectorTest, prefixSum ) setConstantSequence( v, 1 ); v_host = -1; v_view.prefixSum(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i; @@ -128,17 +128,17 @@ TYPED_TEST( VectorTest, prefixSum ) setLinearSequence( v ); v_host = -1; v_view.prefixSum(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i; - Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::resetMaxGridSize(); + Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::resetMaxGridSize(); #endif } } -TYPED_TEST( VectorTest, exclusivePrefixSum ) +TYPED_TEST( VectorTest, exclusiveScan ) { using VectorType = typename TestFixture::VectorType; using ViewType = typename TestFixture::ViewType; @@ -158,21 +158,21 @@ TYPED_TEST( VectorTest, exclusivePrefixSum ) setConstantSequence( v, 0 ); v_host = -1; - v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); + v.template prefixSum< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; - v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); + v.template prefixSum< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], i ) << "i = " << i; setLinearSequence( v ); v_host = -1; - v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); + v.template prefixSum< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; @@ -180,21 +180,21 @@ TYPED_TEST( VectorTest, exclusivePrefixSum ) // test views setConstantSequence( v, 0 ); v_host = -1; - v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); + v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; - v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); + v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], i ) << "i = " << i; setLinearSequence( v ); v_host = -1; - v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); + v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; @@ -204,28 +204,28 @@ TYPED_TEST( VectorTest, exclusivePrefixSum ) if( std::is_same< DeviceType, Devices::Cuda >::value ) { #ifdef HAVE_CUDA - Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::maxGridSize() = 3; + Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::maxGridSize() = 3; setConstantSequence( v, 0 ); v_host = -1; - v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); + v.template prefixSum< Algorithms::ScanType::Exclusive >(); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; - v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); + v.template prefixSum< Algorithms::ScanType::Exclusive >(); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], i ) << "i = " << i; setLinearSequence( v ); v_host = -1; - v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); + v.template prefixSum< Algorithms::ScanType::Exclusive >(); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; @@ -233,29 +233,29 @@ TYPED_TEST( VectorTest, exclusivePrefixSum ) // test views setConstantSequence( v, 0 ); v_host = -1; - v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); + v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; - v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); + v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], i ) << "i = " << i; setLinearSequence( v ); v_host = -1; - v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); - EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); + v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; - Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::resetMaxGridSize(); + Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::resetMaxGridSize(); #endif } } @@ -271,7 +271,7 @@ void setupFlags( FlagsView& f ) } /* -TYPED_TEST( VectorTest, segmentedPrefixSum ) +TYPED_TEST( VectorTest, segmentedScan ) { using VectorType = typename TestFixture::VectorType; using ViewType = typename TestFixture::ViewType; @@ -293,19 +293,19 @@ TYPED_TEST( VectorTest, segmentedPrefixSum ) flags_copy = flags_view; v = 0; - v.computeSegmentedPrefixSum( flags_view ); + v.computeSegmentedScan( flags_view ); for( int i = 0; i < size; i++ ) EXPECT_EQ( v.getElement( i ), 0 ); flags_view = flags_copy; v = 1; - v.computeSegmentedPrefixSum( flags_view ); + v.computeSegmentedScan( flags_view ); for( int i = 0; i < size; i++ ) EXPECT_EQ( v.getElement( i ), ( i % 5 ) + 1 ); flags_view = flags_copy; setLinearSequence( v ); - v.computeSegmentedPrefixSum( flags_view ); + v.computeSegmentedScan( flags_view ); for( int i = 1; i < size; i++ ) { if( flags.getElement( i ) ) @@ -316,20 +316,20 @@ TYPED_TEST( VectorTest, segmentedPrefixSum ) flags_view = flags_copy; v_view = 0; - v_view.computeSegmentedPrefixSum( flags_view ); + v_view.computeSegmentedScan( flags_view ); for( int i = 0; i < size; i++ ) EXPECT_EQ( v_view.getElement( i ), 0 ); flags_view = flags_copy; v_view = 1; - v_view.computeSegmentedPrefixSum( flags_view ); + v_view.computeSegmentedScan( flags_view ); for( int i = 0; i < size; i++ ) EXPECT_EQ( v_view.getElement( i ), ( i % 5 ) + 1 ); flags_view = flags_copy; //v_view.evaluate( [] __cuda_callable__ ( IndexType i ) { return i; } ); setLinearSequence( v ); - v_view.computeSegmentedPrefixSum( flags_view ); + v_view.computeSegmentedScan( flags_view ); for( int i = 1; i < size; i++ ) { if( flags.getElement( i ) ) -- GitLab