From afba52d9805c5f8af2c0dfcafd4dc79e56571ebd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz> Date: Sun, 13 Oct 2019 09:26:22 +0200 Subject: [PATCH] Renamed prefixSum methods to scan Closes #49 --- src/Benchmarks/BLAS/vector-operations.h | 32 +++---- src/TNL/Algorithms/DistributedScan.h | 2 +- src/TNL/Containers/DistributedVector.h | 2 +- src/TNL/Containers/DistributedVector.hpp | 2 +- src/TNL/Containers/DistributedVectorView.h | 2 +- src/TNL/Containers/DistributedVectorView.hpp | 2 +- src/TNL/Containers/Vector.h | 95 ++++++++++--------- src/TNL/Containers/Vector.hpp | 12 +-- src/TNL/Containers/VectorView.h | 95 ++++++++++--------- src/TNL/Containers/VectorView.hpp | 12 +-- src/TNL/Matrices/BiEllpack_impl.h | 2 +- src/TNL/Matrices/CSR_impl.h | 2 +- src/TNL/Matrices/ChunkedEllpack_impl.h | 2 +- .../Matrices/SlicedEllpackSymmetric_impl.h | 2 +- src/TNL/Matrices/SlicedEllpack_impl.h | 2 +- .../Containers/DistributedVectorTest.h | 54 +++++------ .../Containers/VectorPrefixSumTest.h | 52 +++++----- 17 files changed, 195 insertions(+), 177 deletions(-) diff --git a/src/Benchmarks/BLAS/vector-operations.h b/src/Benchmarks/BLAS/vector-operations.h index 5f5cd989f8..7254ba9f40 100644 --- a/src/Benchmarks/BLAS/vector-operations.h +++ b/src/Benchmarks/BLAS/vector-operations.h @@ -562,31 +562,31 @@ benchmarkVectorOperations( Benchmark & benchmark, #endif //// - // Inclusive prefix sum - auto inclusivePrefixSumHost = [&]() { - hostVector.prefixSum(); + // Inclusive scan + auto inclusiveScanHost = [&]() { + hostVector.scan(); }; - benchmark.setOperation( "inclusive prefix sum", 2 * datasetSize ); - benchmark.time< Devices::Host >( reset1, "CPU ET", inclusivePrefixSumHost ); + benchmark.setOperation( "inclusive scan", 2 * datasetSize ); + benchmark.time< Devices::Host >( reset1, "CPU ET", inclusiveScanHost ); #ifdef HAVE_CUDA - auto inclusivePrefixSumCuda = [&]() { - deviceVector.prefixSum(); + auto inclusiveScanCuda = [&]() { + deviceVector.scan(); }; - benchmark.time< Devices::Cuda >( reset1, "GPU ET", inclusivePrefixSumCuda ); + benchmark.time< Devices::Cuda >( reset1, "GPU ET", inclusiveScanCuda ); #endif //// - // Exclusive prefix sum - auto exclusivePrefixSumHost = [&]() { - hostVector.template prefixSum< Algorithms::ScanType::Exclusive >(); + // Exclusive scan + auto exclusiveScanHost = [&]() { + hostVector.template scan< Algorithms::ScanType::Exclusive >(); }; - benchmark.setOperation( "exclusive prefix sum", 2 * datasetSize ); - benchmark.time< Devices::Host >( reset1, "CPU ET", exclusivePrefixSumHost ); + benchmark.setOperation( "exclusive scan", 2 * datasetSize ); + benchmark.time< Devices::Host >( reset1, "CPU ET", exclusiveScanHost ); #ifdef HAVE_CUDA - auto exclusivePrefixSumCuda = [&]() { - deviceVector.template prefixSum< Algorithms::ScanType::Exclusive >(); + auto exclusiveScanCuda = [&]() { + deviceVector.template scan< Algorithms::ScanType::Exclusive >(); }; - benchmark.time< Devices::Cuda >( reset1, "GPU ET", exclusivePrefixSumCuda ); + benchmark.time< Devices::Cuda >( reset1, "GPU ET", exclusiveScanCuda ); #endif #ifdef HAVE_CUDA diff --git a/src/TNL/Algorithms/DistributedScan.h b/src/TNL/Algorithms/DistributedScan.h index f294b0cf30..742acd5ed9 100644 --- a/src/TNL/Algorithms/DistributedScan.h +++ b/src/TNL/Algorithms/DistributedScan.h @@ -54,7 +54,7 @@ struct DistributedScan // NOTE: exchanging general data types does not work with MPI CommunicatorType::Alltoall( dataForScatter, 1, rankSums.getData(), 1, group ); - // compute prefix-sum of the per-rank sums + // compute the scan of the per-rank sums Scan< Devices::Host, ScanType::Exclusive >::perform( rankSums, 0, nproc, reduction, zero ); // perform second phase: shift by the per-block and per-rank offsets diff --git a/src/TNL/Containers/DistributedVector.h b/src/TNL/Containers/DistributedVector.h index f1736b3784..db4e46e686 100644 --- a/src/TNL/Containers/DistributedVector.h +++ b/src/TNL/Containers/DistributedVector.h @@ -131,7 +131,7 @@ public: DistributedVector& operator/=( const Vector& vector ); template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive > - void prefixSum( IndexType begin = 0, IndexType end = 0 ); + void scan( IndexType begin = 0, IndexType end = 0 ); }; } // namespace Containers diff --git a/src/TNL/Containers/DistributedVector.hpp b/src/TNL/Containers/DistributedVector.hpp index dbe9760f6d..fa49591e8a 100644 --- a/src/TNL/Containers/DistributedVector.hpp +++ b/src/TNL/Containers/DistributedVector.hpp @@ -273,7 +273,7 @@ template< typename Real, template< Algorithms::ScanType Type > void DistributedVector< Real, Device, Index, Communicator >:: -prefixSum( IndexType begin, IndexType end ) +scan( IndexType begin, IndexType end ) { if( end == 0 ) end = this->getSize(); diff --git a/src/TNL/Containers/DistributedVectorView.h b/src/TNL/Containers/DistributedVectorView.h index 47ad788360..70452c50d1 100644 --- a/src/TNL/Containers/DistributedVectorView.h +++ b/src/TNL/Containers/DistributedVectorView.h @@ -134,7 +134,7 @@ public: DistributedVectorView& operator/=( const Vector& vector ); template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive > - void prefixSum( IndexType begin = 0, IndexType end = 0 ); + void scan( IndexType begin = 0, IndexType end = 0 ); }; } // namespace Containers diff --git a/src/TNL/Containers/DistributedVectorView.hpp b/src/TNL/Containers/DistributedVectorView.hpp index 5669a52b68..70f61979fd 100644 --- a/src/TNL/Containers/DistributedVectorView.hpp +++ b/src/TNL/Containers/DistributedVectorView.hpp @@ -261,7 +261,7 @@ template< typename Real, template< Algorithms::ScanType Type > void DistributedVectorView< Real, Device, Index, Communicator >:: -prefixSum( IndexType begin, IndexType end ) +scan( IndexType begin, IndexType end ) { if( end == 0 ) end = this->getSize(); diff --git a/src/TNL/Containers/Vector.h b/src/TNL/Containers/Vector.h index c23154e94b..be08266b61 100644 --- a/src/TNL/Containers/Vector.h +++ b/src/TNL/Containers/Vector.h @@ -244,75 +244,84 @@ public: Vector& operator/=( const VectorExpression& expression ); /** - * \brief Computes prefix sum of the vector elements. + * \brief Computes the scan (prefix sum) of the vector elements. * - * Computes prefix sum for elements within the index range [ \e begin to \e end ). - * The other elements of this vector remain unchanged. + * By default, scan is computed for the whole vector. If \e begin + * or \e end is set to a non-zero value, only elements in the sub-interval + * `[begin, end)` are scanned. * - * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive. - * - * \param begin beginning of the index range - * \param end end of the index range. + * \tparam Type The scan type - either \e Inclusive or \e Exclusive. + * + * \param begin The beginning of the vector sub-interval. It is 0 by + * default. + * \param end The end of the vector sub-interval. The default value is 0 + * which is, however, replaced with the array size. */ template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive > - void prefixSum( IndexType begin = 0, IndexType end = 0 ); + void scan( IndexType begin = 0, IndexType end = 0 ); /** - * \brief Computes segmented prefix sum of the vector elements. + * \brief Computes the segmented scan (prefix sum) of the vector elements. * - * Computes segmented prefix sum for elements within the index range [ \e begin to \e end ). - * The other elements of this vector remain unchanged. Whole vector is assumed - * by default, i.e. when \e begin and \e end are set to zero. + * By default, segmented scan is computed for the whole vector. If \e begin + * or \e end is set to a non-zero value, only elements in the sub-interval + * `[begin, end)` are scanned. * - * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive. - * \tparam FlagsArray is an array type describing beginnings of the segments. - * - * \param flags is an array having `1` at the beginning of each segment and `0` on any other position - * \param begin beginning of the index range - * \param end end of the index range. + * \tparam Type The scan type - either \e Inclusive or \e Exclusive. + * + * \param flags A binary array where ones indicate the beginning of each + * segment. + * \param begin The beginning of the vector sub-interval. It is 0 by + * default. + * \param end The end of the vector sub-interval. The default value is 0 + * which is, however, replaced with the array size. */ template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive, typename FlagsArray > - void segmentedPrefixSum( FlagsArray& flags, IndexType begin = 0, IndexType end = 0 ); + void segmentedScan( FlagsArray& flags, IndexType begin = 0, IndexType end = 0 ); /** - * \brief Computes prefix sum of the vector expression. + * \brief Computes the scan (prefix sum) of the vector expression. * - * Computes prefix sum for elements within the index range [ \e begin to \e end ). - * The other elements of this vector remain unchanged. Whole vector expression is assumed - * by default, i.e. when \e begin and \e end are set to zero. + * By default, scan is computed for the whole vector. If \e begin + * or \e end is set to a non-zero value, only elements in the sub-interval + * `[begin, end)` are scanned. * - * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive. - * \tparam VectorExpression is the vector expression. - * - * \param expression is the vector expression. - * \param begin beginning of the index range - * \param end end of the index range. + * \tparam Type The scan type - either \e Inclusive or \e Exclusive. + * + * \param expression A vector expression for which scan is computed and + * stored in this vector. + * \param begin The beginning of the vector sub-interval. It is 0 by + * default. + * \param end The end of the vector sub-interval. The default value is 0 + * which is, however, replaced with the array size. */ template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive, typename VectorExpression > - void prefixSum( const VectorExpression& expression, IndexType begin = 0, IndexType end = 0 ); + void scan( const VectorExpression& expression, IndexType begin = 0, IndexType end = 0 ); /** - * \brief Computes segmented prefix sum of a vector expression. + * \brief Computes the segmented scan (prefix sum) of a vector expression. * - * Computes segmented prefix sum for elements within the index range [ \e begin to \e end ). - * The other elements of this vector remain unchanged. Whole vector expression is assumed - * by default, i.e. when \e begin and \e end are set to zero. + * By default, segmented scan is computed for the whole vector. If \e begin + * or \e end is set to a non-zero value, only elements in the sub-interval + * `[begin, end)` are scanned. * - * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive. - * \tparam VectorExpression is the vector expression. - * \tparam FlagsArray is an array type describing beginnings of the segments. - * - * \param expression is the vector expression. - * \param flags is an array having `1` at the beginning of each segment and `0` on any other position - * \param begin beginning of the index range - * \param end end of the index range. + * \tparam Type The scan type - either \e Inclusive or \e Exclusive. + * + * \param expression A vector expression for which scan is computed and + * stored in this vector. + * \param flags A binary array where ones indicate the beginning of each + * segment. + * \param begin The beginning of the vector sub-interval. It is 0 by + * default. + * \param end The end of the vector sub-interval. The default value is 0 + * which is, however, replaced with the array size. */ template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive, typename VectorExpression, typename FlagsArray > - void segmentedPrefixSum( const VectorExpression& expression, FlagsArray& flags, IndexType begin = 0, IndexType end = 0 ); + void segmentedScan( const VectorExpression& expression, FlagsArray& flags, IndexType begin = 0, IndexType end = 0 ); }; } // namespace Containers diff --git a/src/TNL/Containers/Vector.hpp b/src/TNL/Containers/Vector.hpp index a5c20d5965..5fdce0d09d 100644 --- a/src/TNL/Containers/Vector.hpp +++ b/src/TNL/Containers/Vector.hpp @@ -146,7 +146,7 @@ template< typename Real, template< Algorithms::ScanType Type > void Vector< Real, Device, Index, Allocator >:: -prefixSum( IndexType begin, IndexType end ) +scan( IndexType begin, IndexType end ) { if( end == 0 ) end = this->getSize(); @@ -161,7 +161,7 @@ template< typename Real, typename FlagsArray > void Vector< Real, Device, Index, Allocator >:: -segmentedPrefixSum( FlagsArray& flags, IndexType begin, IndexType end ) +segmentedScan( FlagsArray& flags, IndexType begin, IndexType end ) { if( end == 0 ) end = this->getSize(); @@ -176,9 +176,9 @@ template< typename Real, typename VectorExpression > void Vector< Real, Device, Index, Allocator >:: -prefixSum( const VectorExpression& expression, IndexType begin, IndexType end ) +scan( const VectorExpression& expression, IndexType begin, IndexType end ) { - throw Exceptions::NotImplementedError( "Prefix sum with vector expressions is not implemented." ); + throw Exceptions::NotImplementedError( "Scan (prefix sum) with vector expressions is not implemented." ); } template< typename Real, @@ -190,9 +190,9 @@ template< typename Real, typename FlagsArray > void Vector< Real, Device, Index, Allocator >:: -segmentedPrefixSum( const VectorExpression& expression, FlagsArray& flags, IndexType begin, IndexType end ) +segmentedScan( const VectorExpression& expression, FlagsArray& flags, IndexType begin, IndexType end ) { - throw Exceptions::NotImplementedError( "Prefix sum with vector expressions is not implemented." ); + throw Exceptions::NotImplementedError( "Segmented scan (prefix sum) with vector expressions is not implemented." ); } } // namespace Containers diff --git a/src/TNL/Containers/VectorView.h b/src/TNL/Containers/VectorView.h index 8200b0d39f..1a144ea5cd 100644 --- a/src/TNL/Containers/VectorView.h +++ b/src/TNL/Containers/VectorView.h @@ -214,75 +214,84 @@ public: VectorView& operator/=( const VectorExpression& expression ); /** - * \brief Computes prefix sum of the vector view elements. + * \brief Computes the scan (prefix sum) of the vector elements. * - * Computes prefix sum for elements within the index range [ \e begin to \e end ). - * The other elements of this vector view remain unchanged. + * By default, scan is computed for the whole vector. If \e begin + * or \e end is set to a non-zero value, only elements in the sub-interval + * `[begin, end)` are scanned. * - * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive. - * - * \param begin beginning of the index range - * \param end end of the index range. + * \tparam Type The scan type - either \e Inclusive or \e Exclusive. + * + * \param begin The beginning of the vector sub-interval. It is 0 by + * default. + * \param end The end of the vector sub-interval. The default value is 0 + * which is, however, replaced with the array size. */ template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive > - void prefixSum( IndexType begin = 0, IndexType end = 0 ); + void scan( IndexType begin = 0, IndexType end = 0 ); /** - * \brief Computes segmented prefix sum of the vector view elements. + * \brief Computes the segmented scan (prefix sum) of the vector elements. * - * Computes segmented prefix sum for elements within the index range [ \e begin to \e end ). - * The other elements of this vector view remain unchanged. Whole vector view is assumed - * by default, i.e. when \e begin and \e end are set to zero. + * By default, segmented scan is computed for the whole vector. If \e begin + * or \e end is set to a non-zero value, only elements in the sub-interval + * `[begin, end)` are scanned. * - * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive. - * \tparam FlagsArray is an array type describing beginnings of the segments. - * - * \param flags is an array having `1` at the beginning of each segment and `0` on any other position - * \param begin beginning of the index range - * \param end end of the index range. + * \tparam Type The scan type - either \e Inclusive or \e Exclusive. + * + * \param flags A binary array where ones indicate the beginning of each + * segment. + * \param begin The beginning of the vector sub-interval. It is 0 by + * default. + * \param end The end of the vector sub-interval. The default value is 0 + * which is, however, replaced with the array size. */ template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive, typename FlagsArray > - void segmentedPrefixSum( FlagsArray& flags, IndexType begin = 0, IndexType end = 0 ); + void segmentedScan( FlagsArray& flags, IndexType begin = 0, IndexType end = 0 ); /** - * \brief Computes prefix sum of the vector expression. + * \brief Computes the scan (prefix sum) of the vector expression. * - * Computes prefix sum for elements within the index range [ \e begin to \e end ). - * The other elements of this vector remain unchanged. Whole vector expression is assumed - * by default, i.e. when \e begin and \e end are set to zero. + * By default, scan is computed for the whole vector. If \e begin + * or \e end is set to a non-zero value, only elements in the sub-interval + * `[begin, end)` are scanned. * - * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive. - * \tparam VectorExpression is the vector expression. - * - * \param expression is the vector expression. - * \param begin beginning of the index range - * \param end end of the index range. + * \tparam Type The scan type - either \e Inclusive or \e Exclusive. + * + * \param expression A vector expression for which scan is computed and + * stored in this vector. + * \param begin The beginning of the vector sub-interval. It is 0 by + * default. + * \param end The end of the vector sub-interval. The default value is 0 + * which is, however, replaced with the array size. */ template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive, typename VectorExpression > - void prefixSum( const VectorExpression& expression, IndexType begin = 0, IndexType end = 0 ); + void scan( const VectorExpression& expression, IndexType begin = 0, IndexType end = 0 ); /** - * \brief Computes segmented prefix sum of a vector expression. + * \brief Computes the segmented scan (prefix sum) of a vector expression. * - * Computes segmented prefix sum for elements within the index range [ \e begin to \e end ). - * The other elements of this vector remain unchanged. Whole vector expression is assumed - * by default, i.e. when \e begin and \e end are set to zero. + * By default, segmented scan is computed for the whole vector. If \e begin + * or \e end is set to a non-zero value, only elements in the sub-interval + * `[begin, end)` are scanned. * - * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive. - * \tparam VectorExpression is the vector expression. - * \tparam FlagsArray is an array type describing beginnings of the segments. - * - * \param expression is the vector expression. - * \param flags is an array having `1` at the beginning of each segment and `0` on any other position - * \param begin beginning of the index range - * \param end end of the index range. + * \tparam Type The scan type - either \e Inclusive or \e Exclusive. + * + * \param expression A vector expression for which scan is computed and + * stored in this vector. + * \param flags A binary array where ones indicate the beginning of each + * segment. + * \param begin The beginning of the vector sub-interval. It is 0 by + * default. + * \param end The end of the vector sub-interval. The default value is 0 + * which is, however, replaced with the array size. */ template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive, typename VectorExpression, typename FlagsArray > - void segmentedPrefixSum( const VectorExpression& expression, FlagsArray& flags, IndexType begin = 0, IndexType end = 0 ); + void segmentedScan( const VectorExpression& expression, FlagsArray& flags, IndexType begin = 0, IndexType end = 0 ); }; } // namespace Containers diff --git a/src/TNL/Containers/VectorView.hpp b/src/TNL/Containers/VectorView.hpp index 490288e6bd..2c1cd02c81 100644 --- a/src/TNL/Containers/VectorView.hpp +++ b/src/TNL/Containers/VectorView.hpp @@ -108,7 +108,7 @@ template< typename Real, template< Algorithms::ScanType Type > void VectorView< Real, Device, Index >:: -prefixSum( IndexType begin, IndexType end ) +scan( IndexType begin, IndexType end ) { if( end == 0 ) end = this->getSize(); @@ -122,7 +122,7 @@ template< typename Real, typename FlagsArray > void VectorView< Real, Device, Index >:: -segmentedPrefixSum( FlagsArray& flags, IndexType begin, IndexType end ) +segmentedScan( FlagsArray& flags, IndexType begin, IndexType end ) { if( end == 0 ) end = this->getSize(); @@ -136,9 +136,9 @@ template< typename Real, typename VectorExpression > void VectorView< Real, Device, Index >:: -prefixSum( const VectorExpression& expression, IndexType begin, IndexType end ) +scan( const VectorExpression& expression, IndexType begin, IndexType end ) { - throw Exceptions::NotImplementedError( "Prefix sum with vector expressions is not implemented." ); + throw Exceptions::NotImplementedError( "Scan (prefix sum) with vector expressions is not implemented." ); } template< typename Real, @@ -149,9 +149,9 @@ template< typename Real, typename FlagsArray > void VectorView< Real, Device, Index >:: -segmentedPrefixSum( const VectorExpression& expression, FlagsArray& flags, IndexType begin, IndexType end ) +segmentedScan( const VectorExpression& expression, FlagsArray& flags, IndexType begin, IndexType end ) { - throw Exceptions::NotImplementedError( "Prefix sum with vector expressions is not implemented." ); + throw Exceptions::NotImplementedError( "Segmented scan (prefix sum) with vector expressions is not implemented." ); } } // namespace Containers diff --git a/src/TNL/Matrices/BiEllpack_impl.h b/src/TNL/Matrices/BiEllpack_impl.h index 53f61903eb..51646152e8 100644 --- a/src/TNL/Matrices/BiEllpack_impl.h +++ b/src/TNL/Matrices/BiEllpack_impl.h @@ -94,7 +94,7 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) //DeviceDependentCode::performRowBubbleSort( *this, rowLengths ); //DeviceDependentCode::computeColumnSizes( *this, rowLengths ); - this->groupPointers.template prefixSum< Algorithms::ScanType::Exclusive >(); + this->groupPointers.template scan< Algorithms::ScanType::Exclusive >(); // uncomment to perform structure test //DeviceDependentCode::verifyRowPerm( *this, rowLengths ); diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/CSR_impl.h index 9d8fd64560..327d250028 100644 --- a/src/TNL/Matrices/CSR_impl.h +++ b/src/TNL/Matrices/CSR_impl.h @@ -88,7 +88,7 @@ void CSR< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLeng rowPtrs.bind( this->rowPointers.getData(), this->getRows() ); rowPtrs = rowLengths; this->rowPointers.setElement( this->rows, 0 ); - this->rowPointers.template prefixSum< Algorithms::ScanType::Exclusive >(); + this->rowPointers.template scan< Algorithms::ScanType::Exclusive >(); this->maxRowLength = max( rowLengths ); /**** diff --git a/src/TNL/Matrices/ChunkedEllpack_impl.h b/src/TNL/Matrices/ChunkedEllpack_impl.h index 89e525e87b..48119c6591 100644 --- a/src/TNL/Matrices/ChunkedEllpack_impl.h +++ b/src/TNL/Matrices/ChunkedEllpack_impl.h @@ -232,7 +232,7 @@ void ChunkedEllpack< Real, Device, Index >::setCompressedRowLengths( ConstCompre this->rowPointers.setElement( 0, 0 ); for( IndexType sliceIndex = 0; sliceIndex < numberOfSlices; sliceIndex++ ) this->setSlice( rowLengths, sliceIndex, elementsToAllocation ); - this->rowPointers.prefixSum(); + this->rowPointers.scan(); } // std::cout << "\ngetRowLength after first if: " << std::endl; diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h index 00df43cd42..c403fd4c84 100644 --- a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h +++ b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h @@ -58,7 +58,7 @@ void SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::setCompressedRowL this->maxRowLength = max( rowLengths ); - this->slicePointers.template prefixSum< Algorithms::ScanType::Exclusive >(); + this->slicePointers.template scan< Algorithms::ScanType::Exclusive >(); this->allocateMatrixElements( this->slicePointers.getElement( slices ) ); } diff --git a/src/TNL/Matrices/SlicedEllpack_impl.h b/src/TNL/Matrices/SlicedEllpack_impl.h index aa76caf7b5..45e8cdee77 100644 --- a/src/TNL/Matrices/SlicedEllpack_impl.h +++ b/src/TNL/Matrices/SlicedEllpack_impl.h @@ -79,7 +79,7 @@ void SlicedEllpack< Real, Device, Index, SliceSize >::setCompressedRowLengths( C this->maxRowLength = max( rowLengths ); - this->slicePointers.template prefixSum< Algorithms::ScanType::Exclusive >(); + this->slicePointers.template scan< Algorithms::ScanType::Exclusive >(); this->allocateMatrixElements( this->slicePointers.getElement( slices ) ); } diff --git a/src/UnitTests/Containers/DistributedVectorTest.h b/src/UnitTests/Containers/DistributedVectorTest.h index 69dd543deb..2a1834f318 100644 --- a/src/UnitTests/Containers/DistributedVectorTest.h +++ b/src/UnitTests/Containers/DistributedVectorTest.h @@ -54,7 +54,7 @@ protected: const int nproc = CommunicatorType::GetSize(group); // should be small enough to have fast tests, but large enough to test - // prefix-sum with multiple CUDA grids + // scan with multiple CUDA grids const int globalSize = 10000 * nproc; DistributedVectorTest() @@ -80,7 +80,7 @@ using DistributedVectorTypes = ::testing::Types< TYPED_TEST_SUITE( DistributedVectorTest, DistributedVectorTypes ); -TYPED_TEST( DistributedVectorTest, prefixSum ) +TYPED_TEST( DistributedVectorTest, scan ) { using RealType = typename TestFixture::DistributedVectorType::RealType; using DeviceType = typename TestFixture::DistributedVectorType::DeviceType; @@ -97,21 +97,21 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) setConstantSequence( v, 0 ); v_host = -1; - v.prefixSum(); + v.scan(); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; - v.prefixSum(); + v.scan(); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i; setLinearSequence( v ); v_host = -1; - v.prefixSum(); + v.scan(); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i; @@ -119,21 +119,21 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) // test views setConstantSequence( v, 0 ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i; setLinearSequence( v ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i; @@ -147,7 +147,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) setConstantSequence( v, 0 ); v_host = -1; - v.prefixSum(); + v.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) @@ -155,7 +155,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) setConstantSequence( v, 1 ); v_host = -1; - v.prefixSum(); + v.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) @@ -163,7 +163,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) setLinearSequence( v ); v_host = -1; - v.prefixSum(); + v.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) @@ -172,7 +172,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) // test views setConstantSequence( v, 0 ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) @@ -180,7 +180,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) setConstantSequence( v, 1 ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) @@ -188,7 +188,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) setLinearSequence( v ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) @@ -199,7 +199,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) } } -TYPED_TEST( DistributedVectorTest, exclusivePrefixSum ) +TYPED_TEST( DistributedVectorTest, exclusiveScan ) { using RealType = typename TestFixture::DistributedVectorType::RealType; using DeviceType = typename TestFixture::DistributedVectorType::DeviceType; @@ -216,21 +216,21 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum ) setConstantSequence( v, 0 ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i ) << "i = " << i; setLinearSequence( v ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; @@ -238,21 +238,21 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum ) // test views setConstantSequence( v, 0 ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i ) << "i = " << i; setLinearSequence( v ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; @@ -266,7 +266,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum ) setConstantSequence( v, 0 ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) @@ -274,7 +274,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum ) setConstantSequence( v, 1 ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) @@ -282,7 +282,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum ) setLinearSequence( v ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) @@ -291,7 +291,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum ) // test views setConstantSequence( v, 0 ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) @@ -299,7 +299,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum ) setConstantSequence( v, 1 ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) @@ -307,7 +307,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum ) setLinearSequence( v ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) diff --git a/src/UnitTests/Containers/VectorPrefixSumTest.h b/src/UnitTests/Containers/VectorPrefixSumTest.h index 67281dba0e..7f2151c5ef 100644 --- a/src/UnitTests/Containers/VectorPrefixSumTest.h +++ b/src/UnitTests/Containers/VectorPrefixSumTest.h @@ -17,7 +17,7 @@ // and large enough to require multiple CUDA blocks for reduction constexpr int VECTOR_TEST_SIZE = 10000; -TYPED_TEST( VectorTest, prefixSum ) +TYPED_TEST( VectorTest, scan ) { using VectorType = typename TestFixture::VectorType; using ViewType = typename TestFixture::ViewType; @@ -37,21 +37,21 @@ TYPED_TEST( VectorTest, prefixSum ) setConstantSequence( v, 0 ); v_host = -1; - v.prefixSum(); + v.scan(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; - v.prefixSum(); + v.scan(); v_host = v_view; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i; setLinearSequence( v ); v_host = -1; - v.prefixSum(); + v.scan(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i; @@ -59,21 +59,21 @@ TYPED_TEST( VectorTest, prefixSum ) // test views setConstantSequence( v, 0 ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); v_host = v_view; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i; setLinearSequence( v ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i; @@ -87,7 +87,7 @@ TYPED_TEST( VectorTest, prefixSum ) setConstantSequence( v, 0 ); v_host = -1; - v.prefixSum(); + v.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) @@ -95,7 +95,7 @@ TYPED_TEST( VectorTest, prefixSum ) setConstantSequence( v, 1 ); v_host = -1; - v.prefixSum(); + v.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = 0; i < size; i++ ) @@ -103,7 +103,7 @@ TYPED_TEST( VectorTest, prefixSum ) setLinearSequence( v ); v_host = -1; - v.prefixSum(); + v.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) @@ -112,7 +112,7 @@ TYPED_TEST( VectorTest, prefixSum ) // test views setConstantSequence( v, 0 ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) @@ -120,7 +120,7 @@ TYPED_TEST( VectorTest, prefixSum ) setConstantSequence( v, 1 ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = 0; i < size; i++ ) @@ -128,7 +128,7 @@ TYPED_TEST( VectorTest, prefixSum ) setLinearSequence( v ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) @@ -160,21 +160,21 @@ TYPED_TEST( VectorTest, exclusiveScan ) setConstantSequence( v, 0 ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], i ) << "i = " << i; setLinearSequence( v ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; @@ -182,21 +182,21 @@ TYPED_TEST( VectorTest, exclusiveScan ) // test views setConstantSequence( v, 0 ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], i ) << "i = " << i; setLinearSequence( v ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; @@ -210,7 +210,7 @@ TYPED_TEST( VectorTest, exclusiveScan ) setConstantSequence( v, 0 ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) @@ -218,7 +218,7 @@ TYPED_TEST( VectorTest, exclusiveScan ) setConstantSequence( v, 1 ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) @@ -226,7 +226,7 @@ TYPED_TEST( VectorTest, exclusiveScan ) setLinearSequence( v ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) @@ -235,7 +235,7 @@ TYPED_TEST( VectorTest, exclusiveScan ) // test views setConstantSequence( v, 0 ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) @@ -243,7 +243,7 @@ TYPED_TEST( VectorTest, exclusiveScan ) setConstantSequence( v, 1 ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) @@ -251,7 +251,7 @@ TYPED_TEST( VectorTest, exclusiveScan ) setLinearSequence( v ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) @@ -262,7 +262,7 @@ TYPED_TEST( VectorTest, exclusiveScan ) } } -// TODO: test prefix sum with custom begin and end parameters +// TODO: test scan with custom begin and end parameters template< typename FlagsView > -- GitLab