Loading src/Benchmarks/BLAS/vector-operations.h +16 −16 Original line number Diff line number Diff line Loading @@ -562,31 +562,31 @@ benchmarkVectorOperations( Benchmark & benchmark, #endif //// // Inclusive prefix sum auto inclusivePrefixSumHost = [&]() { hostVector.prefixSum(); // Inclusive scan auto inclusiveScanHost = [&]() { hostVector.scan(); }; benchmark.setOperation( "inclusive prefix sum", 2 * datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU ET", inclusivePrefixSumHost ); benchmark.setOperation( "inclusive scan", 2 * datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU ET", inclusiveScanHost ); #ifdef HAVE_CUDA auto inclusivePrefixSumCuda = [&]() { deviceVector.prefixSum(); auto inclusiveScanCuda = [&]() { deviceVector.scan(); }; benchmark.time< Devices::Cuda >( reset1, "GPU ET", inclusivePrefixSumCuda ); benchmark.time< Devices::Cuda >( reset1, "GPU ET", inclusiveScanCuda ); #endif //// // Exclusive prefix sum auto exclusivePrefixSumHost = [&]() { hostVector.template prefixSum< Algorithms::ScanType::Exclusive >(); // Exclusive scan auto exclusiveScanHost = [&]() { hostVector.template scan< Algorithms::ScanType::Exclusive >(); }; benchmark.setOperation( "exclusive prefix sum", 2 * datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU ET", exclusivePrefixSumHost ); benchmark.setOperation( "exclusive scan", 2 * datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU ET", exclusiveScanHost ); #ifdef HAVE_CUDA auto exclusivePrefixSumCuda = [&]() { deviceVector.template prefixSum< Algorithms::ScanType::Exclusive >(); auto exclusiveScanCuda = [&]() { deviceVector.template scan< Algorithms::ScanType::Exclusive >(); }; benchmark.time< Devices::Cuda >( reset1, "GPU ET", exclusivePrefixSumCuda ); benchmark.time< Devices::Cuda >( reset1, "GPU ET", exclusiveScanCuda ); #endif #ifdef HAVE_CUDA Loading src/TNL/Algorithms/DistributedScan.h +1 −1 Original line number Diff line number Diff line Loading @@ -54,7 +54,7 @@ struct DistributedScan // NOTE: exchanging general data types does not work with MPI CommunicatorType::Alltoall( dataForScatter, 1, rankSums.getData(), 1, group ); // compute prefix-sum of the per-rank sums // compute the scan of the per-rank sums Scan< Devices::Host, ScanType::Exclusive >::perform( rankSums, 0, nproc, reduction, zero ); // perform second phase: shift by the per-block and per-rank offsets Loading src/TNL/Containers/DistributedVector.h +1 −1 Original line number Diff line number Diff line Loading @@ -131,7 +131,7 @@ public: DistributedVector& operator/=( const Vector& vector ); template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive > void prefixSum( IndexType begin = 0, IndexType end = 0 ); void scan( IndexType begin = 0, IndexType end = 0 ); }; } // namespace Containers Loading src/TNL/Containers/DistributedVector.hpp +1 −1 Original line number Diff line number Diff line Loading @@ -273,7 +273,7 @@ template< typename Real, template< Algorithms::ScanType Type > void DistributedVector< Real, Device, Index, Communicator >:: prefixSum( IndexType begin, IndexType end ) scan( IndexType begin, IndexType end ) { if( end == 0 ) end = this->getSize(); Loading src/TNL/Containers/DistributedVectorView.h +1 −1 Original line number Diff line number Diff line Loading @@ -134,7 +134,7 @@ public: DistributedVectorView& operator/=( const Vector& vector ); template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive > void prefixSum( IndexType begin = 0, IndexType end = 0 ); void scan( IndexType begin = 0, IndexType end = 0 ); }; } // namespace Containers Loading Loading
src/Benchmarks/BLAS/vector-operations.h +16 −16 Original line number Diff line number Diff line Loading @@ -562,31 +562,31 @@ benchmarkVectorOperations( Benchmark & benchmark, #endif //// // Inclusive prefix sum auto inclusivePrefixSumHost = [&]() { hostVector.prefixSum(); // Inclusive scan auto inclusiveScanHost = [&]() { hostVector.scan(); }; benchmark.setOperation( "inclusive prefix sum", 2 * datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU ET", inclusivePrefixSumHost ); benchmark.setOperation( "inclusive scan", 2 * datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU ET", inclusiveScanHost ); #ifdef HAVE_CUDA auto inclusivePrefixSumCuda = [&]() { deviceVector.prefixSum(); auto inclusiveScanCuda = [&]() { deviceVector.scan(); }; benchmark.time< Devices::Cuda >( reset1, "GPU ET", inclusivePrefixSumCuda ); benchmark.time< Devices::Cuda >( reset1, "GPU ET", inclusiveScanCuda ); #endif //// // Exclusive prefix sum auto exclusivePrefixSumHost = [&]() { hostVector.template prefixSum< Algorithms::ScanType::Exclusive >(); // Exclusive scan auto exclusiveScanHost = [&]() { hostVector.template scan< Algorithms::ScanType::Exclusive >(); }; benchmark.setOperation( "exclusive prefix sum", 2 * datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU ET", exclusivePrefixSumHost ); benchmark.setOperation( "exclusive scan", 2 * datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU ET", exclusiveScanHost ); #ifdef HAVE_CUDA auto exclusivePrefixSumCuda = [&]() { deviceVector.template prefixSum< Algorithms::ScanType::Exclusive >(); auto exclusiveScanCuda = [&]() { deviceVector.template scan< Algorithms::ScanType::Exclusive >(); }; benchmark.time< Devices::Cuda >( reset1, "GPU ET", exclusivePrefixSumCuda ); benchmark.time< Devices::Cuda >( reset1, "GPU ET", exclusiveScanCuda ); #endif #ifdef HAVE_CUDA Loading
src/TNL/Algorithms/DistributedScan.h +1 −1 Original line number Diff line number Diff line Loading @@ -54,7 +54,7 @@ struct DistributedScan // NOTE: exchanging general data types does not work with MPI CommunicatorType::Alltoall( dataForScatter, 1, rankSums.getData(), 1, group ); // compute prefix-sum of the per-rank sums // compute the scan of the per-rank sums Scan< Devices::Host, ScanType::Exclusive >::perform( rankSums, 0, nproc, reduction, zero ); // perform second phase: shift by the per-block and per-rank offsets Loading
src/TNL/Containers/DistributedVector.h +1 −1 Original line number Diff line number Diff line Loading @@ -131,7 +131,7 @@ public: DistributedVector& operator/=( const Vector& vector ); template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive > void prefixSum( IndexType begin = 0, IndexType end = 0 ); void scan( IndexType begin = 0, IndexType end = 0 ); }; } // namespace Containers Loading
src/TNL/Containers/DistributedVector.hpp +1 −1 Original line number Diff line number Diff line Loading @@ -273,7 +273,7 @@ template< typename Real, template< Algorithms::ScanType Type > void DistributedVector< Real, Device, Index, Communicator >:: prefixSum( IndexType begin, IndexType end ) scan( IndexType begin, IndexType end ) { if( end == 0 ) end = this->getSize(); Loading
src/TNL/Containers/DistributedVectorView.h +1 −1 Original line number Diff line number Diff line Loading @@ -134,7 +134,7 @@ public: DistributedVectorView& operator/=( const Vector& vector ); template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive > void prefixSum( IndexType begin = 0, IndexType end = 0 ); void scan( IndexType begin = 0, IndexType end = 0 ); }; } // namespace Containers Loading