From 53c48f25e7391132e07bb5570468da5b044e4b72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com> Date: Sun, 14 Mar 2021 14:42:19 +0100 Subject: [PATCH] Changing order of parameters of Algorithms::Reduction::reduce(withArgument) from (reduce,fetch) to (fetch,reduce). --- .../DenseMatrixExample_getConstRow.cpp | 2 +- .../DenseMatrixViewExample_getConstRow.cpp | 2 +- ...MultidiagonalMatrixExample_getConstRow.cpp | 2 +- ...idiagonalMatrixViewExample_getConstRow.cpp | 4 +- .../SparseMatrixExample_getConstRow.cpp | 2 +- .../SparseMatrixViewExample_getConstRow.cpp | 2 +- .../TridiagonalMatrixExample_getConstRow.cpp | 2 +- ...idiagonalMatrixViewExample_getConstRow.cpp | 2 +- ...orithms_and_lambda_functions_reduction.cpp | 2 +- ...ithms_and_lambda_functions_reduction_2.cpp | 2 +- .../ReductionAndScan/ComparisonExample.cpp | 2 +- .../ReductionAndScan/MapReduceExample-1.cpp | 2 +- .../ReductionAndScan/MapReduceExample-2.cpp | 2 +- .../ReductionAndScan/MapReduceExample-3.cpp | 2 +- .../ReductionAndScan/MaximumNormExample.cpp | 2 +- .../ReductionAndScan/ProductExample.cpp | 2 +- .../ReductionWithArgument.cpp | 2 +- .../ReductionAndScan/ScalarProductExample.cpp | 2 +- .../Tutorials/ReductionAndScan/SumExample.cpp | 2 +- .../UpdateAndResidueExample.cpp | 2 +- .../BLAS/CommonVectorOperations.hpp | 34 +-- src/TNL/Algorithms/MemoryOperationsCuda.hpp | 6 +- src/TNL/Algorithms/MemoryOperationsHost.hpp | 6 +- src/TNL/Algorithms/Reduction.h | 166 +++++++-------- src/TNL/Algorithms/Reduction.hpp | 196 +++++++++--------- src/TNL/Containers/ArrayView.hpp | 4 +- src/TNL/Containers/Expressions/Comparison.h | 30 +-- .../DistributedExpressionTemplates.h | 12 +- .../Expressions/ExpressionTemplates.h | 12 +- .../Expressions/VerticalOperations.h | 20 +- src/TNL/Matrices/DenseMatrixView.hpp | 2 +- src/TNL/Matrices/Matrix.hpp | 2 +- src/TNL/Matrices/MatrixView.hpp | 2 +- src/TNL/Matrices/MultidiagonalMatrixView.hpp | 2 +- src/TNL/Matrices/SparseMatrixView.hpp | 4 +- src/TNL/Matrices/TridiagonalMatrixView.hpp | 2 +- .../MeshDetails/layers/EntityTags/Layer.h | 4 +- src/UnitTests/Matrices/DenseMatrixTest.h | 2 +- 38 files changed, 274 insertions(+), 274 deletions(-) diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cpp index 445ba2d518..c61a1c8221 100644 --- a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cpp +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cpp @@ -36,7 +36,7 @@ void getRowExample() /*** * Compute the matrix trace. */ - int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix->getRows(), std::plus<>{}, fetch, 0 ); + int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix->getRows(), fetch, std::plus<>{}, 0 ); std::cout << "Matrix trace is " << trace << "." << std::endl; } diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cpp index 1e139fa4b2..a0b9980242 100644 --- a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cpp +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cpp @@ -29,7 +29,7 @@ void getRowExample() return row.getElement( rowIdx ); }; - int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix.getRows(), std::plus<>{}, fetch, 0 ); + int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix.getRows(), fetch, std::plus<>{}, 0 ); std::cout << "Matrix trace is " << trace << "." << std::endl; } diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cpp index 6d0f7aeb35..b8ebf91817 100644 --- a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cpp +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cpp @@ -41,7 +41,7 @@ void getRowExample() /*** * Compute the matrix trace. */ - int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix->getRows(), std::plus<>{}, fetch, 0 ); + int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix->getRows(), fetch, std::plus<>{}, 0 ); std::cout << "Matrix reads as: " << std::endl << *matrix << std::endl; std::cout << "Matrix trace is: " << trace << "." << std::endl; } diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getConstRow.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getConstRow.cpp index 65fa867f15..346e331dba 100644 --- a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getConstRow.cpp +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getConstRow.cpp @@ -13,7 +13,7 @@ void getRowExample() using MatrixType = TNL::Matrices::MultidiagonalMatrix< double, Device >; MatrixType matrix ( matrixSize, // number of matrix columns - diagonalsOffsets, + diagonalsOffsets, { { 0.0, 0.0, 1.0 }, // matrix elements { 0.0, 2.0, 1.0 }, { 3.0, 2.0, 1.0 }, @@ -32,7 +32,7 @@ void getRowExample() /*** * Compute the matrix trace. */ - int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix.getRows(), std::plus<>{}, fetch, 0 ); + int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix.getRows(), fetch, std::plus<>{}, 0 ); std::cout << "Matrix reads as: " << std::endl << matrix << std::endl; std::cout << "Matrix trace is: " << trace << "." << std::endl; } diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cpp index 01689a6621..4d3ae4ff51 100644 --- a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cpp +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cpp @@ -36,7 +36,7 @@ void getRowExample() /*** * Compute the matrix trace. */ - int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix->getRows(), std::plus<>{}, fetch, 0 ); + int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix->getRows(), fetch, std::plus<>{}, 0 ); std::cout << "Matrix trace is " << trace << "." << std::endl; } diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cpp index d2e4d971ce..2b5f0faed2 100644 --- a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cpp +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cpp @@ -28,7 +28,7 @@ void getRowExample() /*** * Compute the matrix trace. */ - int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix.getRows(), std::plus<>{}, fetch, 0 ); + int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix.getRows(), fetch, std::plus<>{}, 0 ); std::cout << "Matrix trace is " << trace << "." << std::endl; } diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getConstRow.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getConstRow.cpp index e008c03a02..30bf9249ec 100644 --- a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getConstRow.cpp +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getConstRow.cpp @@ -40,7 +40,7 @@ void getRowExample() /*** * Compute the matrix trace. */ - int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix->getRows(), std::plus<>{}, fetch, 0 ); + int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix->getRows(), fetch, std::plus<>{}, 0 ); std::cout << "Matrix reads as: " << std::endl << *matrix << std::endl; std::cout << "Matrix trace is: " << trace << "." << std::endl; } diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getConstRow.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getConstRow.cpp index 83463d8686..20d55ff121 100644 --- a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getConstRow.cpp +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getConstRow.cpp @@ -30,7 +30,7 @@ void getRowExample() /*** * Compute the matrix trace. */ - int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, view.getRows(), std::plus<>{}, fetch, 0 ); + int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, view.getRows(), fetch, std::plus<>{}, 0 ); std::cout << "Matrix reads as: " << std::endl << matrix << std::endl; std::cout << "Matrix trace is: " << trace << "." << std::endl; } diff --git a/Documentation/Tutorials/GeneralConcepts/snippet_algorithms_and_lambda_functions_reduction.cpp b/Documentation/Tutorials/GeneralConcepts/snippet_algorithms_and_lambda_functions_reduction.cpp index 85ba934080..fda9a41b99 100644 --- a/Documentation/Tutorials/GeneralConcepts/snippet_algorithms_and_lambda_functions_reduction.cpp +++ b/Documentation/Tutorials/GeneralConcepts/snippet_algorithms_and_lambda_functions_reduction.cpp @@ -6,5 +6,5 @@ void scalarProduct( double* v1, double* v2, double* product, const int size ) } auto reduce = [] __cuda_callable__ ( const double& a, const double& b ) { return a + b; }; - TNL::Algorithms::Reduction< Device >::reduce( 0, size, reduce, fetch, 0.0 ); + TNL::Algorithms::Reduction< Device >::reduce( 0, size, fetch, reduce, 0.0 ); } \ No newline at end of file diff --git a/Documentation/Tutorials/GeneralConcepts/snippet_algorithms_and_lambda_functions_reduction_2.cpp b/Documentation/Tutorials/GeneralConcepts/snippet_algorithms_and_lambda_functions_reduction_2.cpp index deeb49dd51..ef17140ce0 100644 --- a/Documentation/Tutorials/GeneralConcepts/snippet_algorithms_and_lambda_functions_reduction_2.cpp +++ b/Documentation/Tutorials/GeneralConcepts/snippet_algorithms_and_lambda_functions_reduction_2.cpp @@ -8,5 +8,5 @@ void scalarProduct( double* u1, double* u2, } auto reduce = [] __cuda_callable__ ( const double& a, const double& b ) { return a + b; }; - TNL::Algorithms::Reduction< Device >::reduce( 0, size, reduce, fetch, 0.0 ); + TNL::Algorithms::Reduction< Device >::reduce( 0, size, fetch, reduce, 0.0 ); } \ No newline at end of file diff --git a/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp b/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp index b8c73530ce..3ef168a251 100644 --- a/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp +++ b/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp @@ -22,7 +22,7 @@ bool comparison( const Vector< double, Device >& u, const Vector< double, Device * Reduce performs logical AND on intermediate results obtained by fetch. */ auto reduce = [] __cuda_callable__ ( const bool& a, const bool& b ) { return a && b; }; - return Reduction< Device >::reduce( 0, v_view.getSize(), reduce, fetch, true ); + return Reduction< Device >::reduce( 0, v_view.getSize(), fetch, reduce, true ); } int main( int argc, char* argv[] ) diff --git a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp index ddcb5e2f97..eeccc728fb 100644 --- a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp +++ b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp @@ -14,7 +14,7 @@ double mapReduce( Vector< double, Device >& u ) auto fetch = [=] __cuda_callable__ ( int i )->double { return u_view[ i ] > 0 ? u_view[ i ] : 0.0; }; auto reduce = [] __cuda_callable__ ( const double& a, const double& b ) { return a + b; }; - return Reduction< Device >::reduce( 0, u_view.getSize(), reduce, fetch, 0.0 ); + return Reduction< Device >::reduce( 0, u_view.getSize(), fetch, reduce, 0.0 ); } int main( int argc, char* argv[] ) diff --git a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-2.cpp b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-2.cpp index 64f7be8cae..da7c1c9c6c 100644 --- a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-2.cpp +++ b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-2.cpp @@ -16,7 +16,7 @@ double mapReduce( Vector< double, Device >& u ) if( i % 2 == 0 ) return u_view[ i ]; return 0.0; }; auto reduce = [] __cuda_callable__ ( const double& a, const double& b ) { return a + b; }; - return Reduction< Device >::reduce( 0, u_view.getSize(), reduce, fetch, 0.0 ); + return Reduction< Device >::reduce( 0, u_view.getSize(), fetch, reduce, 0.0 ); } int main( int argc, char* argv[] ) diff --git a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-3.cpp b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-3.cpp index bfbf63f3b8..5b5f31131c 100644 --- a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-3.cpp +++ b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-3.cpp @@ -15,7 +15,7 @@ double mapReduce( Vector< double, Device >& u ) auto fetch = [=] __cuda_callable__ ( int i )->double { return u_view[ 2 * i ]; }; auto reduce = [] __cuda_callable__ ( const double& a, const double& b ) { return a + b; }; - return Reduction< Device >::reduce( 0, u_view.getSize() / 2, reduce, fetch, 0.0 ); + return Reduction< Device >::reduce( 0, u_view.getSize() / 2, fetch, reduce, 0.0 ); } int main( int argc, char* argv[] ) diff --git a/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp b/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp index 7dcd9a92b4..1b31eb5e53 100644 --- a/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp +++ b/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp @@ -13,7 +13,7 @@ double maximumNorm( const Vector< double, Device >& v ) auto view = v.getConstView(); auto fetch = [=] __cuda_callable__ ( int i ) { return abs( view[ i ] ); }; auto reduce = [] __cuda_callable__ ( const double& a, const double& b ) { return max( a, b ); }; - return Reduction< Device >::reduce( 0, view.getSize(), reduce, fetch, 0.0 ); + return Reduction< Device >::reduce( 0, view.getSize(), fetch, reduce, 0.0 ); } int main( int argc, char* argv[] ) diff --git a/Documentation/Tutorials/ReductionAndScan/ProductExample.cpp b/Documentation/Tutorials/ReductionAndScan/ProductExample.cpp index 6f37861dc6..9df9a6e4b5 100644 --- a/Documentation/Tutorials/ReductionAndScan/ProductExample.cpp +++ b/Documentation/Tutorials/ReductionAndScan/ProductExample.cpp @@ -17,7 +17,7 @@ double product( const Vector< double, Device >& v ) /*** * Since we compute the product of all elements, the reduction must be initialized by 1.0 not by 0.0. */ - return Reduction< Device >::reduce( 0, view.getSize(), reduce, fetch, 1.0 ); + return Reduction< Device >::reduce( 0, view.getSize(), fetch, reduce, 1.0 ); } int main( int argc, char* argv[] ) diff --git a/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp b/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp index 0d9c160208..689d8b599c 100644 --- a/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp +++ b/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp @@ -22,7 +22,7 @@ maximumNorm( const Vector< double, Device >& v ) else if( a == b && bIdx < aIdx ) aIdx = bIdx; }; - return Reduction< Device >::reduceWithArgument( 0, view.getSize(), reduction, fetch, std::numeric_limits< double >::max() ); + return Reduction< Device >::reduceWithArgument( 0, view.getSize(), fetch, reduction, std::numeric_limits< double >::max() ); } int main( int argc, char* argv[] ) diff --git a/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp b/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp index e830f7884e..5a63b460b8 100644 --- a/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp +++ b/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp @@ -18,7 +18,7 @@ double scalarProduct( const Vector< double, Device >& u, const Vector< double, D */ auto fetch = [=] __cuda_callable__ ( int i ) { return u_view[ i ] * v_view[ i ]; }; auto reduce = [] __cuda_callable__ ( const double& a, const double& b ) { return a + b; }; - return Reduction< Device >::reduce( 0, v_view.getSize(), reduce, fetch, 0.0 ); + return Reduction< Device >::reduce( 0, v_view.getSize(), fetch, reduce, 0.0 ); } int main( int argc, char* argv[] ) diff --git a/Documentation/Tutorials/ReductionAndScan/SumExample.cpp b/Documentation/Tutorials/ReductionAndScan/SumExample.cpp index 5db872f5e0..90c6f724a7 100644 --- a/Documentation/Tutorials/ReductionAndScan/SumExample.cpp +++ b/Documentation/Tutorials/ReductionAndScan/SumExample.cpp @@ -30,7 +30,7 @@ double sum( const Vector< double, Device >& v ) * lambdas defined above and finally value of idempotent element, zero in this case, which serve for the * reduction initiation. */ - return Reduction< Device >::reduce( 0, view.getSize(), reduce, fetch, 0.0 ); + return Reduction< Device >::reduce( 0, view.getSize(), fetch, reduce, 0.0 ); } int main( int argc, char* argv[] ) diff --git a/Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cpp b/Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cpp index fa2717ac32..8bd08e900d 100644 --- a/Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cpp +++ b/Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cpp @@ -17,7 +17,7 @@ double updateAndResidue( Vector< double, Device >& u, const Vector< double, Devi u_view[ i ] += tau * add; return add * add; }; auto reduce = [] __cuda_callable__ ( const double& a, const double& b ) { return a + b; }; - return sqrt( Reduction< Device >::reduce( 0, u_view.getSize(), reduce, fetch, 0.0 ) ); + return sqrt( Reduction< Device >::reduce( 0, u_view.getSize(), fetch, reduce, 0.0 ) ); } int main( int argc, char* argv[] ) diff --git a/src/Benchmarks/BLAS/CommonVectorOperations.hpp b/src/Benchmarks/BLAS/CommonVectorOperations.hpp index acb96fabbe..d6a459677d 100644 --- a/src/Benchmarks/BLAS/CommonVectorOperations.hpp +++ b/src/Benchmarks/BLAS/CommonVectorOperations.hpp @@ -30,7 +30,7 @@ getVectorMax( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), fetch, reduction, std::numeric_limits< ResultType >::lowest() ); } template< typename Device > @@ -47,7 +47,7 @@ getVectorMin( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return data[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), fetch, reduction, std::numeric_limits< ResultType >::max() ); } template< typename Device > @@ -64,7 +64,7 @@ getVectorAbsMax( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), fetch, reduction, std::numeric_limits< ResultType >::lowest() ); } template< typename Device > @@ -81,7 +81,7 @@ getVectorAbsMin( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), fetch, reduction, std::numeric_limits< ResultType >::max() ); } template< typename Device > @@ -97,7 +97,7 @@ getVectorL1Norm( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), fetch, std::plus<>{}, ( ResultType ) 0 ); } template< typename Device > @@ -113,7 +113,7 @@ getVectorL2Norm( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data[ i ] * data[ i ]; }; - return std::sqrt( Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ) ); + return std::sqrt( Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), fetch, std::plus<>{}, ( ResultType ) 0 ) ); } template< typename Device > @@ -136,7 +136,7 @@ getVectorLpNorm( const Vector& v, const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data[ i ] ), p ); }; - return std::pow( Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ), 1.0 / p ); + return std::pow( Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), fetch, std::plus<>{}, ( ResultType ) 0 ), 1.0 / p ); } template< typename Device > @@ -155,7 +155,7 @@ getVectorSum( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), fetch, std::plus<>{}, ( ResultType ) 0 ); } template< typename Device > @@ -175,7 +175,7 @@ getVectorDifferenceMax( const Vector1& v1, const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), fetch, reduction, std::numeric_limits< ResultType >::lowest() ); } template< typename Device > @@ -195,7 +195,7 @@ getVectorDifferenceMin( const Vector1& v1, const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), fetch, reduction, std::numeric_limits< ResultType >::max() ); } template< typename Device > @@ -215,7 +215,7 @@ getVectorDifferenceAbsMax( const Vector1& v1, const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), fetch, reduction, std::numeric_limits< ResultType >::lowest() ); } template< typename Device > @@ -235,7 +235,7 @@ getVectorDifferenceAbsMin( const Vector1& v1, const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), fetch, reduction, std::numeric_limits< ResultType >::max() ); } template< typename Device > @@ -254,7 +254,7 @@ getVectorDifferenceL1Norm( const Vector1& v1, const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), fetch, std::plus<>{}, ( ResultType ) 0 ); } template< typename Device > @@ -276,7 +276,7 @@ getVectorDifferenceL2Norm( const Vector1& v1, auto diff = data1[ i ] - data2[ i ]; return diff * diff; }; - return std::sqrt( Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ) ); + return std::sqrt( Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), fetch, std::plus<>{}, ( ResultType ) 0 ) ); } template< typename Device > @@ -302,7 +302,7 @@ getVectorDifferenceLpNorm( const Vector1& v1, const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data1[ i ] - data2[ i ] ), p ); }; - return std::pow( Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ), 1.0 / p ); + return std::pow( Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), fetch, std::plus<>{}, ( ResultType ) 0 ), 1.0 / p ); } template< typename Device > @@ -321,7 +321,7 @@ getVectorDifferenceSum( const Vector1& v1, const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), fetch, std::plus<>{}, ( ResultType ) 0 ); } template< typename Device > @@ -340,7 +340,7 @@ getScalarProduct( const Vector1& v1, const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] * data2[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), fetch, std::plus<>{}, ( ResultType ) 0 ); } } // namespace Benchmarks diff --git a/src/TNL/Algorithms/MemoryOperationsCuda.hpp b/src/TNL/Algorithms/MemoryOperationsCuda.hpp index 53b60bb392..5351b69625 100644 --- a/src/TNL/Algorithms/MemoryOperationsCuda.hpp +++ b/src/TNL/Algorithms/MemoryOperationsCuda.hpp @@ -148,7 +148,7 @@ compare( const Element1* destination, TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return destination[ i ] == source[ i ]; }; - return Reduction< Devices::Cuda >::reduce( ( Index ) 0, size, std::logical_and<>{}, fetch, true ); + return Reduction< Devices::Cuda >::reduce( ( Index ) 0, size, fetch, std::logical_and<>{}, true ); } template< typename Element, @@ -164,7 +164,7 @@ containsValue( const Element* data, TNL_ASSERT_GE( size, (Index) 0, "" ); auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; }; - return Reduction< Devices::Cuda >::reduce( ( Index ) 0, size, std::logical_or<>{}, fetch, false ); + return Reduction< Devices::Cuda >::reduce( ( Index ) 0, size, fetch, std::logical_or<>{}, false ); } template< typename Element, @@ -180,7 +180,7 @@ containsOnlyValue( const Element* data, TNL_ASSERT_GE( size, 0, "" ); auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; }; - return Reduction< Devices::Cuda >::reduce( ( Index ) 0, size, std::logical_and<>{}, fetch, true ); + return Reduction< Devices::Cuda >::reduce( ( Index ) 0, size, fetch, std::logical_and<>{}, true ); } } // namespace Algorithms diff --git a/src/TNL/Algorithms/MemoryOperationsHost.hpp b/src/TNL/Algorithms/MemoryOperationsHost.hpp index 090d0bb9ed..92b44f8cf5 100644 --- a/src/TNL/Algorithms/MemoryOperationsHost.hpp +++ b/src/TNL/Algorithms/MemoryOperationsHost.hpp @@ -113,7 +113,7 @@ compare( const DestinationElement* destination, if( Devices::Host::isOMPEnabled() && Devices::Host::getMaxThreadsCount() > 1 ) { auto fetch = [destination, source] ( Index i ) -> bool { return destination[ i ] == source[ i ]; }; - return Reduction< Devices::Host >::reduce( ( Index ) 0, size, std::logical_and<>{}, fetch, true ); + return Reduction< Devices::Host >::reduce( ( Index ) 0, size, fetch, std::logical_and<>{}, true ); } else { // sequential algorithm can return as soon as it finds a mismatch @@ -135,7 +135,7 @@ containsValue( const Element* data, if( Devices::Host::isOMPEnabled() && Devices::Host::getMaxThreadsCount() > 1 ) { auto fetch = [=] ( Index i ) -> bool { return data[ i ] == value; }; - return Reduction< Devices::Host >::reduce( ( Index ) 0, size, std::logical_or<>{}, fetch, false ); + return Reduction< Devices::Host >::reduce( ( Index ) 0, size, fetch, std::logical_or<>{}, false ); } else { // sequential algorithm can return as soon as it finds a match @@ -157,7 +157,7 @@ containsOnlyValue( const Element* data, if( Devices::Host::isOMPEnabled() && Devices::Host::getMaxThreadsCount() > 1 ) { auto fetch = [data, value] ( Index i ) -> bool { return data[ i ] == value; }; - return Reduction< Devices::Host >::reduce( ( Index ) 0, size, std::logical_and<>{}, fetch, true ); + return Reduction< Devices::Host >::reduce( ( Index ) 0, size, fetch, std::logical_and<>{}, true ); } else { // sequential algorithm can return as soon as it finds a mismatch diff --git a/src/TNL/Algorithms/Reduction.h b/src/TNL/Algorithms/Reduction.h index e36a706c13..d928ec6875 100644 --- a/src/TNL/Algorithms/Reduction.h +++ b/src/TNL/Algorithms/Reduction.h @@ -45,27 +45,27 @@ struct Reduction< Devices::Sequential > * * \tparam Index is a type for indexing. * \tparam Result is a type of the reduction result. - * \tparam ReductionOperation is a lambda function performing the reduction. - * \tparam DataFetcher is a lambda function for fetching the input data. + * \tparam Fetch is a lambda function for fetching the input data. + * \tparam Reduce is a lambda function performing the reduction. * * \param begin defines range [begin, end) of indexes which will be used for the reduction. * \param end defines range [begin, end) of indexes which will be used for the reduction. - * \param reduction is a lambda function defining the reduction operation. - * \param dataFetcher is a lambda function fetching the input data. + * \param fetch is a lambda function fetching the input data. + * \param reduce is a lambda function defining the reduction operation. * \param zero is the idempotent element for the reduction operation, i.e. element which * does not change the result of the reduction. * \return result of the reduction * - * The dataFetcher lambda function takes one argument which is index of the element to be fetched: + * The `fetch` lambda function takes one argument which is index of the element to be fetched: * * ``` - * auto dataFetcher1 = [=] __cuda_callable__ ( Index i ) { return ... }; + * auto fetch = [=] __cuda_callable__ ( Index i ) { return ... }; * ``` * - * The reduction lambda function takes two variables which are supposed to be reduced: + * The `reduce` lambda function takes two variables which are supposed to be reduced: * * ``` - * auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... }; + * auto reduce = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... }; * ``` * * \par Example @@ -78,13 +78,13 @@ struct Reduction< Devices::Sequential > */ template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > static constexpr Result reduce( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ); /** @@ -96,28 +96,28 @@ struct Reduction< Devices::Sequential > * * \tparam Index is a type for indexing. * \tparam Result is a type of the reduction result. - * \tparam ReductionOperation is a lambda function performing the reduction. - * \tparam DataFetcher is a lambda function for fetching the input data. + * \tparam Fetch is a lambda function for fetching the input data. + * \tparam Reduce is a lambda function performing the reduction. * * \param begin defines range [begin, end) of indexes which will be used for the reduction. * \param end defines range [begin, end) of indexes which will be used for the reduction. - * \param reduction is a lambda function defining the reduction operation and managing the elements positions. - * \param dataFetcher is a lambda function fetching the input data. + * \param fetch is a lambda function fetching the input data. + * \param reduce is a lambda function defining the reduction operation and managing the elements positions. * \param zero is the idempotent element for the reduction operation, i.e. element which * does not change the result of the reduction. * \return result of the reduction in a form of std::pair< Index, Result> structure. `pair.first' * is the element position and `pair.second` is the reduction result. * - * The dataFetcher lambda function takes one argument which is index of the element to be fetched: + * The `fetch` lambda function takes one argument which is index of the element to be fetched: * * ``` - * auto dataFetcher1 = [=] __cuda_callable__ ( Index i ) { return ... }; + * auto fetch = [=] __cuda_callable__ ( Index i ) { return ... }; * ``` * - * The reduction lambda function takes two variables which are supposed to be reduced: + * The `reduce` lambda function takes two variables which are supposed to be reduced: * * ``` - * auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b, Index& aIdx, const Index& bIdx ) { return ... }; + * auto reduce = [] __cuda_callable__ ( const Result& a, const Result& b, Index& aIdx, const Index& bIdx ) { return ... }; * ``` * * \par Example @@ -130,13 +130,13 @@ struct Reduction< Devices::Sequential > */ template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > static constexpr std::pair< Result, Index > reduceWithArgument( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ); }; @@ -148,27 +148,27 @@ struct Reduction< Devices::Host > * * \tparam Index is a type for indexing. * \tparam Result is a type of the reduction result. - * \tparam ReductionOperation is a lambda function performing the reduction. - * \tparam DataFetcher is a lambda function for fetching the input data. + * \tparam Fetch is a lambda function for fetching the input data. + * \tparam Reduce is a lambda function performing the reduction. * * \param begin defines range [begin, end) of indexes which will be used for the reduction. * \param end defines range [begin, end) of indexes which will be used for the reduction. - * \param reduction is a lambda function defining the reduction operation. - * \param dataFetcher is a lambda function fetching the input data. + * \param fetch is a lambda function fetching the input data. + * \param reduce is a lambda function defining the reduction operation. * \param zero is the idempotent element for the reduction operation, i.e. element which * does not change the result of the reduction. * \return result of the reduction * - * The dataFetcher lambda function takes one argument which is index of the element to be fetched: + * The `fetch` lambda function takes one argument which is index of the element to be fetched: * * ``` - * auto dataFetcher1 = [=] __cuda_callable__ ( Index i ) { return ... }; + * auto fetch = [=] __cuda_callable__ ( Index i ) { return ... }; * ``` * - * The reduction lambda function takes two variables which are supposed to be reduced: + * The `reduce` lambda function takes two variables which are supposed to be reduced: * * ``` - * auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... }; + * auto reduce = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... }; * ``` * * \par Example @@ -181,65 +181,65 @@ struct Reduction< Devices::Host > */ template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > static Result reduce( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ); /** * \brief Computes reduction on CPU and returns position of an element of interest. - * - * For example in case of computing minimal or maximal element in array/vector, + * + * For example in case of computing minimal or maximal element in array/vector, * the position of the element having given value can be obtained. The use of this method * is, however, more flexible. - * + * * \tparam Index is a type for indexing. * \tparam Result is a type of the reduction result. * \tparam ReductionOperation is a lambda function performing the reduction. * \tparam DataFetcher is a lambda function for fetching the input data. - * + * * \param begin defines range [begin, end) of indexes which will be used for the reduction. * \param end defines range [begin, end) of indexes which will be used for the reduction. - * \param reduction is a lambda function defining the reduction operation and managing the elements positions. - * \param dataFetcher is a lambda function fetching the input data. + * \param fetch is a lambda function fetching the input data. + * \param reduce is a lambda function defining the reduction operation and managing the elements positions. * \param zero is the idempotent element for the reduction operation, i.e. element which * does not change the result of the reduction. * \return result of the reduction in a form of std::pair< Index, Result> structure. `pair.first' * is the element position and `pair.second` is the reduction result. - * - * The dataFetcher lambda function takes one argument which is index of the element to be fetched: - * + * + * The `fetch` lambda function takes one argument which is index of the element to be fetched: + * * ``` - * auto dataFetcher1 = [=] __cuda_callable__ ( Index i ) { return ... }; + * auto fetch = [=] __cuda_callable__ ( Index i ) { return ... }; * ``` - * - * The reduction lambda function takes two variables which are supposed to be reduced: - * + * + * The `reduce` lambda function takes two variables which are supposed to be reduced: + * * ``` - * auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b, Index& aIdx, const Index& bIdx ) { return ... }; + * auto reduce = [] __cuda_callable__ ( const Result& a, const Result& b, Index& aIdx, const Index& bIdx ) { return ... }; * ``` - * + * * \par Example - * + * * \include ReductionAndScan/ReductionWithArgument.cpp - * + * * \par Output - * + * * \include ReductionWithArgument.out */ template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > static std::pair< Result, Index > reduceWithArgument( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ); }; @@ -251,27 +251,27 @@ struct Reduction< Devices::Cuda > * * \tparam Index is a type for indexing. * \tparam Result is a type of the reduction result. - * \tparam ReductionOperation is a lambda function performing the reduction. - * \tparam DataFetcher is a lambda function for fetching the input data. + * \tparam Fetch is a lambda function for fetching the input data. + * \tparam Reduce is a lambda function performing the reduction. * * \param begin defines range [begin, end) of indexes which will be used for the reduction. * \param end defines range [begin, end) of indexes which will be used for the reduction. - * \param reduction is a lambda function defining the reduction operation. - * \param dataFetcher is a lambda function fetching the input data. + * \param fetch is a lambda function fetching the input data. + * \param reduce is a lambda function defining the reduction operation. * \param zero is the idempotent element for the reduction operation, i.e. element which * does not change the result of the reduction. * \return result of the reduction * - * The dataFetcher lambda function takes one argument which is index of the element to be fetched: + * The `fetch` lambda function takes one argument which is index of the element to be fetched: * * ``` - * auto dataFetcher1 = [=] __cuda_callable__ ( Index i ) { return ... }; + * auto fetch = [=] __cuda_callable__ ( Index i ) { return ... }; * ``` * - * The reduction lambda function takes two variables which are supposed to be reduced: + * The `reduce` lambda function takes two variables which are supposed to be reduced: * * ``` - * auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... }; + * auto reduce = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... }; * ``` * * \par Example @@ -284,46 +284,46 @@ struct Reduction< Devices::Cuda > */ template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > static Result reduce( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ); /** * \brief Computes reduction on GPU and returns position of an element of interest. * - * For example in case of computing minimal or maximal element in array/vector, + * For example in case of computing minimal or maximal element in array/vector, * the position of the element having given value can be obtained. The use of this method * is, however, more flexible. * * \tparam Index is a type for indexing. * \tparam Result is a type of the reduction result. - * \tparam ReductionOperation is a lambda function performing the reduction. - * \tparam DataFetcher is a lambda function for fetching the input data. + * \tparam Fetch is a lambda function for fetching the input data. + * \tparam Reduce is a lambda function performing the reduction. * * \param begin defines range [begin, end) of indexes which will be used for the reduction. * \param end defines range [begin, end) of indexes which will be used for the reduction. - * \param reduction is a lambda function defining the reduction operation and managing the elements positions. - * \param dataFetcher is a lambda function fetching the input data. + * \param fetch is a lambda function fetching the input data. + * \param reduce is a lambda function defining the reduction operation and managing the elements positions. * \param zero is the idempotent element for the reduction operation, i.e. element which * does not change the result of the reduction. * \return result of the reduction in a form of std::pair< Index, Result> structure. `pair.first' * is the element position and `pair.second` is the reduction result. * - * The dataFetcher lambda function takes one argument which is index of the element to be fetched: + * The `fetch` lambda function takes one argument which is index of the element to be fetched: * * ``` - * auto dataFetcher1 = [=] __cuda_callable__ ( Index i ) { return ... }; + * auto fetch = [=] __cuda_callable__ ( Index i ) { return ... }; * ``` * - * The reduction lambda function takes two variables which are supposed to be reduced: + * The `reduce` lambda function takes two variables which are supposed to be reduced: * * ``` - * auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b, Index& aIdx, const Index& bIdx ) { return ... }; + * auto reduce = [] __cuda_callable__ ( const Result& a, const Result& b, Index& aIdx, const Index& bIdx ) { return ... }; * ``` * * \par Example @@ -336,13 +336,13 @@ struct Reduction< Devices::Cuda > */ template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > static std::pair< Result, Index > reduceWithArgument( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ); }; diff --git a/src/TNL/Algorithms/Reduction.hpp b/src/TNL/Algorithms/Reduction.hpp index 70e725af6b..7873f9c3c4 100644 --- a/src/TNL/Algorithms/Reduction.hpp +++ b/src/TNL/Algorithms/Reduction.hpp @@ -37,14 +37,14 @@ static constexpr int Reduction_minGpuDataSize = 256;//65536; //16384;//1024;//25 template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > constexpr Result Reduction< Devices::Sequential >:: reduce( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ) { constexpr int block_size = 128; @@ -55,45 +55,45 @@ reduce( const Index begin, // initialize array for unrolled results Result r[ 4 ] = { zero, zero, zero, zero }; - // main reduction (explicitly unrolled loop) + // main reduce (explicitly unrolled loop) for( Index b = 0; b < blocks; b++ ) { const Index offset = begin + b * block_size; for( int i = 0; i < block_size; i += 4 ) { - r[ 0 ] = reduction( r[ 0 ], dataFetcher( offset + i ) ); - r[ 1 ] = reduction( r[ 1 ], dataFetcher( offset + i + 1 ) ); - r[ 2 ] = reduction( r[ 2 ], dataFetcher( offset + i + 2 ) ); - r[ 3 ] = reduction( r[ 3 ], dataFetcher( offset + i + 3 ) ); + r[ 0 ] = reduce( r[ 0 ], fetch( offset + i ) ); + r[ 1 ] = reduce( r[ 1 ], fetch( offset + i + 1 ) ); + r[ 2 ] = reduce( r[ 2 ], fetch( offset + i + 2 ) ); + r[ 3 ] = reduce( r[ 3 ], fetch( offset + i + 3 ) ); } } - // reduction of the last, incomplete block (not unrolled) + // reduce of the last, incomplete block (not unrolled) for( Index i = begin + blocks * block_size; i < end; i++ ) - r[ 0 ] = reduction( r[ 0 ], dataFetcher( i ) ); + r[ 0 ] = reduce( r[ 0 ], fetch( i ) ); - // reduction of unrolled results - r[ 0 ] = reduction( r[ 0 ], r[ 2 ] ); - r[ 1 ] = reduction( r[ 1 ], r[ 3 ] ); - r[ 0 ] = reduction( r[ 0 ], r[ 1 ] ); + // reduce of unrolled results + r[ 0 ] = reduce( r[ 0 ], r[ 2 ] ); + r[ 1 ] = reduce( r[ 1 ], r[ 3 ] ); + r[ 0 ] = reduce( r[ 0 ], r[ 1 ] ); return r[ 0 ]; } else { Result result = zero; for( Index i = begin; i < end; i++ ) - result = reduction( result, dataFetcher( i ) ); + result = reduce( result, fetch( i ) ); return result; } } template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > constexpr std::pair< Result, Index > Reduction< Devices::Sequential >:: reduceWithArgument( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ) { constexpr int block_size = 128; @@ -106,7 +106,7 @@ reduceWithArgument( const Index begin, Result r[ 4 ] = { zero, zero, zero, zero }; bool initialized( false ); - // main reduction (explicitly unrolled loop) + // main reduce (explicitly unrolled loop) for( Index b = 0; b < blocks; b++ ) { const Index offset = begin + b * block_size; for( int i = 0; i < block_size; i += 4 ) { @@ -116,48 +116,48 @@ reduceWithArgument( const Index begin, arg[ 1 ] = offset + i + 1; arg[ 2 ] = offset + i + 2; arg[ 3 ] = offset + i + 3; - r[ 0 ] = dataFetcher( offset + i ); - r[ 1 ] = dataFetcher( offset + i + 1 ); - r[ 2 ] = dataFetcher( offset + i + 2 ); - r[ 3 ] = dataFetcher( offset + i + 3 ); + r[ 0 ] = fetch( offset + i ); + r[ 1 ] = fetch( offset + i + 1 ); + r[ 2 ] = fetch( offset + i + 2 ); + r[ 3 ] = fetch( offset + i + 3 ); initialized = true; continue; } - reduction( r[ 0 ], dataFetcher( offset + i ), arg[ 0 ], offset + i ); - reduction( r[ 1 ], dataFetcher( offset + i + 1 ), arg[ 1 ], offset + i + 1 ); - reduction( r[ 2 ], dataFetcher( offset + i + 2 ), arg[ 2 ], offset + i + 2 ); - reduction( r[ 3 ], dataFetcher( offset + i + 3 ), arg[ 3 ], offset + i + 3 ); + reduce( r[ 0 ], fetch( offset + i ), arg[ 0 ], offset + i ); + reduce( r[ 1 ], fetch( offset + i + 1 ), arg[ 1 ], offset + i + 1 ); + reduce( r[ 2 ], fetch( offset + i + 2 ), arg[ 2 ], offset + i + 2 ); + reduce( r[ 3 ], fetch( offset + i + 3 ), arg[ 3 ], offset + i + 3 ); } } - // reduction of the last, incomplete block (not unrolled) + // reduce of the last, incomplete block (not unrolled) for( Index i = begin + blocks * block_size; i < size; i++ ) - reduction( r[ 0 ], dataFetcher( i ), arg[ 0 ], i ); + reduce( r[ 0 ], fetch( i ), arg[ 0 ], i ); - // reduction of unrolled results - reduction( r[ 0 ], r[ 2 ], arg[ 0 ], arg[ 2 ] ); - reduction( r[ 1 ], r[ 3 ], arg[ 1 ], arg[ 3 ] ); - reduction( r[ 0 ], r[ 1 ], arg[ 0 ], arg[ 1 ] ); + // reduce of unrolled results + reduce( r[ 0 ], r[ 2 ], arg[ 0 ], arg[ 2 ] ); + reduce( r[ 1 ], r[ 3 ], arg[ 1 ], arg[ 3 ] ); + reduce( r[ 0 ], r[ 1 ], arg[ 0 ], arg[ 1 ] ); return std::make_pair( r[ 0 ], arg[ 0 ] ); } else { - std::pair< Result, Index > result( dataFetcher( begin ), begin ); + std::pair< Result, Index > result( fetch( begin ), begin ); for( Index i = begin + 1; i < end; i++ ) - reduction( result.first, dataFetcher( i ), result.second, i ); + reduce( result.first, fetch( i ), result.second, i ); return result; } } template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > Result Reduction< Devices::Host >:: reduce( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ) { #ifdef HAVE_OPENMP @@ -178,10 +178,10 @@ reduce( const Index begin, for( Index b = 0; b < blocks; b++ ) { const Index offset = begin + b * block_size; for( int i = 0; i < block_size; i += 4 ) { - r[ 0 ] = reduction( r[ 0 ], dataFetcher( offset + i ) ); - r[ 1 ] = reduction( r[ 1 ], dataFetcher( offset + i + 1 ) ); - r[ 2 ] = reduction( r[ 2 ], dataFetcher( offset + i + 2 ) ); - r[ 3 ] = reduction( r[ 3 ], dataFetcher( offset + i + 3 ) ); + r[ 0 ] = reduce( r[ 0 ], fetch( offset + i ) ); + r[ 1 ] = reduce( r[ 1 ], fetch( offset + i + 1 ) ); + r[ 2 ] = reduce( r[ 2 ], fetch( offset + i + 2 ) ); + r[ 3 ] = reduce( r[ 3 ], fetch( offset + i + 3 ) ); } } @@ -189,37 +189,37 @@ reduce( const Index begin, #pragma omp single nowait { for( Index i = begin + blocks * block_size; i < end; i++ ) - r[ 0 ] = reduction( r[ 0 ], dataFetcher( i ) ); + r[ 0 ] = reduce( r[ 0 ], fetch( i ) ); } - // local reduction of unrolled results - r[ 0 ] = reduction( r[ 0 ], r[ 2 ] ); - r[ 1 ] = reduction( r[ 1 ], r[ 3 ] ); - r[ 0 ] = reduction( r[ 0 ], r[ 1 ] ); + // local reduce of unrolled results + r[ 0 ] = reduce( r[ 0 ], r[ 2 ] ); + r[ 1 ] = reduce( r[ 1 ], r[ 3 ] ); + r[ 0 ] = reduce( r[ 0 ], r[ 1 ] ); - // inter-thread reduction of local results + // inter-thread reduce of local results #pragma omp critical { - result = reduction( result, r[ 0 ] ); + result = reduce( result, r[ 0 ] ); } } return result; } else #endif - return Reduction< Devices::Sequential >::reduce( begin, end, reduction, dataFetcher, zero ); + return Reduction< Devices::Sequential >::reduce( begin, end, fetch, reduce, zero ); } template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > std::pair< Result, Index > Reduction< Devices::Host >:: reduceWithArgument( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ) { #ifdef HAVE_OPENMP @@ -247,17 +247,17 @@ reduceWithArgument( const Index begin, arg[ 1 ] = offset + i + 1; arg[ 2 ] = offset + i + 2; arg[ 3 ] = offset + i + 3; - r[ 0 ] = dataFetcher( offset + i ); - r[ 1 ] = dataFetcher( offset + i + 1 ); - r[ 2 ] = dataFetcher( offset + i + 2 ); - r[ 3 ] = dataFetcher( offset + i + 3 ); + r[ 0 ] = fetch( offset + i ); + r[ 1 ] = fetch( offset + i + 1 ); + r[ 2 ] = fetch( offset + i + 2 ); + r[ 3 ] = fetch( offset + i + 3 ); initialized = true; continue; } - reduction( r[ 0 ], dataFetcher( offset + i ), arg[ 0 ], offset + i ); - reduction( r[ 1 ], dataFetcher( offset + i + 1 ), arg[ 1 ], offset + i + 1 ); - reduction( r[ 2 ], dataFetcher( offset + i + 2 ), arg[ 2 ], offset + i + 2 ); - reduction( r[ 3 ], dataFetcher( offset + i + 3 ), arg[ 3 ], offset + i + 3 ); + reduce( r[ 0 ], fetch( offset + i ), arg[ 0 ], offset + i ); + reduce( r[ 1 ], fetch( offset + i + 1 ), arg[ 1 ], offset + i + 1 ); + reduce( r[ 2 ], fetch( offset + i + 2 ), arg[ 2 ], offset + i + 2 ); + reduce( r[ 3 ], fetch( offset + i + 3 ), arg[ 3 ], offset + i + 3 ); } } @@ -265,44 +265,44 @@ reduceWithArgument( const Index begin, #pragma omp single nowait { for( Index i = begin + blocks * block_size; i < end; i++ ) - reduction( r[ 0 ], dataFetcher( i ), arg[ 0 ], i ); + reduce( r[ 0 ], fetch( i ), arg[ 0 ], i ); } - // local reduction of unrolled results - reduction( r[ 0 ], r[ 2 ], arg[ 0 ], arg[ 2 ] ); - reduction( r[ 1 ], r[ 3 ], arg[ 1 ], arg[ 3 ] ); - reduction( r[ 0 ], r[ 1 ], arg[ 0 ], arg[ 1 ] ); + // local reduce of unrolled results + reduce( r[ 0 ], r[ 2 ], arg[ 0 ], arg[ 2 ] ); + reduce( r[ 1 ], r[ 3 ], arg[ 1 ], arg[ 3 ] ); + reduce( r[ 0 ], r[ 1 ], arg[ 0 ], arg[ 1 ] ); - // inter-thread reduction of local results + // inter-thread reduce of local results #pragma omp critical { if( result.second == -1 ) result.second = arg[ 0 ]; - reduction( result.first, r[ 0 ], result.second, arg[ 0 ] ); + reduce( result.first, r[ 0 ], result.second, arg[ 0 ] ); } } return result; } else #endif - return Reduction< Devices::Sequential >::reduceWithArgument( begin, end, reduction, dataFetcher, zero ); + return Reduction< Devices::Sequential >::reduceWithArgument( begin, end, fetch, reduce, zero ); } template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > Result Reduction< Devices::Cuda >:: reduce( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ) { // Only fundamental and pointer types can be safely reduced on host. Complex // objects stored on the device might contain pointers into the device memory, - // in which case reduction on host might fail. + // in which case reduce on host might fail. constexpr bool can_reduce_later_on_host = std::is_fundamental< Result >::value || std::is_pointer< Result >::value; #ifdef CUDA_REDUCTION_PROFILING @@ -313,11 +313,11 @@ reduce( const Index begin, CudaReductionKernelLauncher< Index, Result > reductionLauncher( begin, end ); - // start the reduction on the GPU + // start the reduce on the GPU Result* deviceAux1( 0 ); const int reducedSize = reductionLauncher.start( - reduction, - dataFetcher, + reduce, + fetch, zero, deviceAux1 ); @@ -353,9 +353,9 @@ reduce( const Index begin, timer.start(); #endif - // finish the reduction on the host + // finish the reduce on the host auto fetch = [&] ( Index i ) { return resultArray[ i ]; }; - const Result result = Reduction< Devices::Sequential >::reduce( 0, reducedSize, reduction, fetch, zero ); + const Result result = Reduction< Devices::Sequential >::reduce( 0, reducedSize, fetch, reduce, zero ); #ifdef CUDA_REDUCTION_PROFILING timer.stop(); @@ -364,8 +364,8 @@ reduce( const Index begin, return result; } else { - // data can't be safely reduced on host, so continue with the reduction on the GPU - auto result = reductionLauncher.finish( reduction, zero ); + // data can't be safely reduced on host, so continue with the reduce on the GPU + auto result = reductionLauncher.finish( reduce, zero ); #ifdef CUDA_REDUCTION_PROFILING timer.stop(); @@ -380,19 +380,19 @@ reduce( const Index begin, template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > std::pair< Result, Index > Reduction< Devices::Cuda >:: reduceWithArgument( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ) { // Only fundamental and pointer types can be safely reduced on host. Complex // objects stored on the device might contain pointers into the device memory, - // in which case reduction on host might fail. + // in which case reduce on host might fail. constexpr bool can_reduce_later_on_host = std::is_fundamental< Result >::value || std::is_pointer< Result >::value; #ifdef CUDA_REDUCTION_PROFILING @@ -403,12 +403,12 @@ reduceWithArgument( const Index begin, CudaReductionKernelLauncher< Index, Result > reductionLauncher( begin, end ); - // start the reduction on the GPU + // start the reduce on the GPU Result* deviceAux1( nullptr ); Index* deviceIndexes( nullptr ); const int reducedSize = reductionLauncher.startWithArgument( - reduction, - dataFetcher, + reduce, + fetch, zero, deviceAux1, deviceIndexes ); @@ -460,11 +460,11 @@ reduceWithArgument( const Index begin, timer.start(); #endif - // finish the reduction on the host + // finish the reduce on the host // auto fetch = [&] ( Index i ) { return resultArray[ i ]; }; -// const Result result = Reduction< Devices::Sequential >::reduceWithArgument( reducedSize, argument, reduction, fetch, zero ); +// const Result result = Reduction< Devices::Sequential >::reduceWithArgument( reducedSize, argument, reduce, fetch, zero ); for( Index i = 1; i < reducedSize; i++ ) - reduction( resultArray[ 0 ], resultArray[ i ], indexArray[ 0 ], indexArray[ i ] ); + reduce( resultArray[ 0 ], resultArray[ i ], indexArray[ 0 ], indexArray[ i ] ); #ifdef CUDA_REDUCTION_PROFILING timer.stop(); @@ -473,8 +473,8 @@ reduceWithArgument( const Index begin, return std::make_pair( resultArray[ 0 ], indexArray[ 0 ] ); } else { - // data can't be safely reduced on host, so continue with the reduction on the GPU - auto result = reductionLauncher.finishWithArgument( reduction, zero ); + // data can't be safely reduced on host, so continue with the reduce on the GPU + auto result = reductionLauncher.finishWithArgument( reduce, zero ); #ifdef CUDA_REDUCTION_PROFILING timer.stop(); diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp index 2c0d3d631f..9143dea1ac 100644 --- a/src/TNL/Containers/ArrayView.hpp +++ b/src/TNL/Containers/ArrayView.hpp @@ -380,7 +380,7 @@ reduceElements( Index begin, Index end, Fetch&& fetch, Reduce&& reduce, const Re ValueType* d = this->getData(); auto main_fetch = [=] __cuda_callable__ ( IndexType i ) mutable -> Result { return fetch( i, d[ i ] ); }; - return Algorithms::Reduction< DeviceType >::reduce( begin, end, reduce, main_fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( begin, end, main_fetch, reduce, zero ); } template< typename Value, @@ -397,7 +397,7 @@ reduceElements( Index begin, Index end, Fetch&& fetch, Reduce&& reduce, const Re const ValueType* d = this->getData(); auto main_fetch = [=] __cuda_callable__ ( IndexType i ) mutable -> Result { return fetch( i, d[ i ] ); }; - return Algorithms::Reduction< DeviceType >::reduce( begin, end, reduce, main_fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( begin, end, main_fetch, reduce, zero ); } template< typename Value, diff --git a/src/TNL/Containers/Expressions/Comparison.h b/src/TNL/Containers/Expressions/Comparison.h index 33986e1edd..738409cc40 100644 --- a/src/TNL/Containers/Expressions/Comparison.h +++ b/src/TNL/Containers/Expressions/Comparison.h @@ -68,7 +68,7 @@ struct VectorComparison< T1, T2, false > const auto view_a = a.getConstView(); const auto view_b = b.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return view_a[ i ] == view_b[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), fetch, std::logical_and<>{}, true ); } }; @@ -100,7 +100,7 @@ struct Comparison< T1, T2, VectorExpressionVariable, VectorExpressionVariable > const auto view_a = a.getConstView(); const auto view_b = b.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return view_a[ i ] > view_b[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), fetch, std::logical_and<>{}, true ); } static bool GE( const T1& a, const T2& b ) @@ -115,7 +115,7 @@ struct Comparison< T1, T2, VectorExpressionVariable, VectorExpressionVariable > const auto view_a = a.getConstView(); const auto view_b = b.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return view_a[ i ] >= view_b[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), fetch, std::logical_and<>{}, true ); } static bool LT( const T1& a, const T2& b ) @@ -130,7 +130,7 @@ struct Comparison< T1, T2, VectorExpressionVariable, VectorExpressionVariable > const auto view_a = a.getConstView(); const auto view_b = b.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return view_a[ i ] < view_b[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), fetch, std::logical_and<>{}, true ); } static bool LE( const T1& a, const T2& b ) @@ -145,7 +145,7 @@ struct Comparison< T1, T2, VectorExpressionVariable, VectorExpressionVariable > const auto view_a = a.getConstView(); const auto view_b = b.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return view_a[ i ] <= view_b[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), fetch, std::logical_and<>{}, true ); } }; @@ -162,7 +162,7 @@ struct Comparison< T1, T2, ArithmeticVariable, VectorExpressionVariable > const auto view_b = b.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return a == view_b[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, b.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, b.getSize(), fetch, std::logical_and<>{}, true ); } static bool NE( const T1& a, const T2& b ) @@ -177,7 +177,7 @@ struct Comparison< T1, T2, ArithmeticVariable, VectorExpressionVariable > const auto view_b = b.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return a > view_b[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, b.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, b.getSize(), fetch, std::logical_and<>{}, true ); } static bool GE( const T1& a, const T2& b ) @@ -187,7 +187,7 @@ struct Comparison< T1, T2, ArithmeticVariable, VectorExpressionVariable > const auto view_b = b.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return a >= view_b[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, b.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, b.getSize(), fetch, std::logical_and<>{}, true ); } static bool LT( const T1& a, const T2& b ) @@ -197,7 +197,7 @@ struct Comparison< T1, T2, ArithmeticVariable, VectorExpressionVariable > const auto view_b = b.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return a < view_b[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, b.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, b.getSize(), fetch, std::logical_and<>{}, true ); } static bool LE( const T1& a, const T2& b ) @@ -207,7 +207,7 @@ struct Comparison< T1, T2, ArithmeticVariable, VectorExpressionVariable > const auto view_b = b.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return a <= view_b[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, b.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, b.getSize(), fetch, std::logical_and<>{}, true ); } }; @@ -224,7 +224,7 @@ struct Comparison< T1, T2, VectorExpressionVariable, ArithmeticVariable > const auto view_a = a.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return view_a[ i ] == b; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), fetch, std::logical_and<>{}, true ); } static bool NE( const T1& a, const T2& b ) @@ -239,7 +239,7 @@ struct Comparison< T1, T2, VectorExpressionVariable, ArithmeticVariable > const auto view_a = a.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return view_a[ i ] > b; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), fetch, std::logical_and<>{}, true ); } static bool GE( const T1& a, const T2& b ) @@ -249,7 +249,7 @@ struct Comparison< T1, T2, VectorExpressionVariable, ArithmeticVariable > const auto view_a = a.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return view_a[ i ] >= b; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), fetch, std::logical_and<>{}, true ); } static bool LT( const T1& a, const T2& b ) @@ -259,7 +259,7 @@ struct Comparison< T1, T2, VectorExpressionVariable, ArithmeticVariable > const auto view_a = a.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return view_a[ i ] < b; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), fetch, std::logical_and<>{}, true ); } static bool LE( const T1& a, const T2& b ) @@ -269,7 +269,7 @@ struct Comparison< T1, T2, VectorExpressionVariable, ArithmeticVariable > const auto view_a = a.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return view_a[ i ] <= b; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), fetch, std::logical_and<>{}, true ); } }; diff --git a/src/TNL/Containers/Expressions/DistributedExpressionTemplates.h b/src/TNL/Containers/Expressions/DistributedExpressionTemplates.h index 5f67084fd8..6959a95fed 100644 --- a/src/TNL/Containers/Expressions/DistributedExpressionTemplates.h +++ b/src/TNL/Containers/Expressions/DistributedExpressionTemplates.h @@ -1073,7 +1073,7 @@ Result evaluateAndReduce( Vector& lhs, RealType* lhs_data = lhs.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return ( lhs_data[ i ] = expression[ i ] ); }; - return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), fetch, reduction, zero ); } template< typename Vector, @@ -1092,7 +1092,7 @@ Result evaluateAndReduce( Vector& lhs, RealType* lhs_data = lhs.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return ( lhs_data[ i ] = expression[ i ] ); }; - return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), fetch, reduction, zero ); } //// @@ -1118,7 +1118,7 @@ Result addAndReduce( Vector& lhs, lhs_data[ i ] += aux; return aux; }; - return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), fetch, reduction, zero ); } template< typename Vector, @@ -1141,7 +1141,7 @@ Result addAndReduce( Vector& lhs, lhs_data[ i ] += aux; return aux; }; - return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), fetch, reduction, zero ); } //// @@ -1167,7 +1167,7 @@ Result addAndReduceAbs( Vector& lhs, lhs_data[ i ] += aux; return TNL::abs( aux ); }; - return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), fetch, reduction, zero ); } template< typename Vector, @@ -1190,7 +1190,7 @@ Result addAndReduceAbs( Vector& lhs, lhs_data[ i ] += aux; return TNL::abs( aux ); }; - return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), fetch, reduction, zero ); } } // namespace TNL diff --git a/src/TNL/Containers/Expressions/ExpressionTemplates.h b/src/TNL/Containers/Expressions/ExpressionTemplates.h index 7baf37572e..93d7e802d3 100644 --- a/src/TNL/Containers/Expressions/ExpressionTemplates.h +++ b/src/TNL/Containers/Expressions/ExpressionTemplates.h @@ -896,7 +896,7 @@ Result evaluateAndReduce( Vector& lhs, RealType* lhs_data = lhs.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return ( lhs_data[ i ] = expression[ i ] ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), fetch, reduction, zero ); } template< typename Vector, @@ -915,7 +915,7 @@ Result evaluateAndReduce( Vector& lhs, RealType* lhs_data = lhs.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return ( lhs_data[ i ] = expression[ i ] ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), fetch, reduction, zero ); } //// @@ -941,7 +941,7 @@ Result addAndReduce( Vector& lhs, lhs_data[ i ] += aux; return aux; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), fetch, reduction, zero ); } template< typename Vector, @@ -964,7 +964,7 @@ Result addAndReduce( Vector& lhs, lhs_data[ i ] += aux; return aux; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), fetch, reduction, zero ); } //// @@ -990,7 +990,7 @@ Result addAndReduceAbs( Vector& lhs, lhs_data[ i ] += aux; return TNL::abs( aux ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), fetch, reduction, zero ); } template< typename Vector, @@ -1013,7 +1013,7 @@ Result addAndReduceAbs( Vector& lhs, lhs_data[ i ] += aux; return TNL::abs( aux ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), fetch, reduction, zero ); } } // namespace TNL diff --git a/src/TNL/Containers/Expressions/VerticalOperations.h b/src/TNL/Containers/Expressions/VerticalOperations.h index 8de97f06cb..6e5f5624b2 100644 --- a/src/TNL/Containers/Expressions/VerticalOperations.h +++ b/src/TNL/Containers/Expressions/VerticalOperations.h @@ -43,7 +43,7 @@ auto ExpressionMin( const Expression& expression ) }; static_assert( std::numeric_limits< ResultType >::is_specialized, "std::numeric_limits is not specialized for the reduction's result type" ); - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), fetch, reduction, std::numeric_limits< ResultType >::max() ); } template< typename Expression > @@ -65,7 +65,7 @@ auto ExpressionArgMin( const Expression& expression ) }; static_assert( std::numeric_limits< ResultType >::is_specialized, "std::numeric_limits is not specialized for the reduction's result type" ); - return Algorithms::Reduction< typename Expression::DeviceType >::reduceWithArgument( ( IndexType ) 0, expression.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduceWithArgument( ( IndexType ) 0, expression.getSize(), fetch, reduction, std::numeric_limits< ResultType >::max() ); } template< typename Expression > @@ -85,7 +85,7 @@ auto ExpressionMax( const Expression& expression ) }; static_assert( std::numeric_limits< ResultType >::is_specialized, "std::numeric_limits is not specialized for the reduction's result type" ); - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), fetch, reduction, std::numeric_limits< ResultType >::lowest() ); } template< typename Expression > @@ -107,7 +107,7 @@ auto ExpressionArgMax( const Expression& expression ) }; static_assert( std::numeric_limits< ResultType >::is_specialized, "std::numeric_limits is not specialized for the reduction's result type" ); - return Algorithms::Reduction< typename Expression::DeviceType >::reduceWithArgument( ( IndexType ) 0, expression.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduceWithArgument( ( IndexType ) 0, expression.getSize(), fetch, reduction, std::numeric_limits< ResultType >::lowest() ); } template< typename Expression > @@ -119,7 +119,7 @@ auto ExpressionSum( const Expression& expression ) const auto view = expression.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; }; - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), std::plus<>{}, fetch, (ResultType) 0 ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), fetch, std::plus<>{}, (ResultType) 0 ); } template< typename Expression > @@ -131,7 +131,7 @@ auto ExpressionProduct( const Expression& expression ) const auto view = expression.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; }; - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), std::multiplies<>{}, fetch, (ResultType) 1 ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), fetch, std::multiplies<>{}, (ResultType) 1 ); } template< typename Expression > @@ -145,7 +145,7 @@ auto ExpressionLogicalAnd( const Expression& expression ) auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; }; static_assert( std::numeric_limits< ResultType >::is_specialized, "std::numeric_limits is not specialized for the reduction's result type" ); - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), std::logical_and<>{}, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), fetch, std::logical_and<>{}, std::numeric_limits< ResultType >::max() ); } template< typename Expression > @@ -157,7 +157,7 @@ auto ExpressionLogicalOr( const Expression& expression ) const auto view = expression.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; }; - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), std::logical_or<>{}, fetch, (ResultType) 0 ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), fetch, std::logical_or<>{}, (ResultType) 0 ); } template< typename Expression > @@ -171,7 +171,7 @@ auto ExpressionBinaryAnd( const Expression& expression ) auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; }; static_assert( std::numeric_limits< ResultType >::is_specialized, "std::numeric_limits is not specialized for the reduction's result type" ); - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), std::bit_and<>{}, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), fetch, std::bit_and<>{}, std::numeric_limits< ResultType >::max() ); } template< typename Expression > @@ -183,7 +183,7 @@ auto ExpressionBinaryOr( const Expression& expression ) const auto view = expression.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; }; - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), std::bit_or<>{}, fetch, (ResultType) 0 ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), fetch, std::bit_or<>{}, (ResultType) 0 ); } } // namespace Expressions diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index f2532a47bb..97e82af0e3 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -156,7 +156,7 @@ getNonzeroElementsCount() const auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { return ( values_view[ i ] != 0.0 ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, this->values.getSize(), std::plus<>{}, fetch, 0 ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, this->values.getSize(), fetch, std::plus<>{}, 0 ); } template< typename Real, diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp index 5122879350..57c79cd769 100644 --- a/src/TNL/Matrices/Matrix.hpp +++ b/src/TNL/Matrices/Matrix.hpp @@ -85,7 +85,7 @@ Index Matrix< Real, Device, Index, RealAllocator >::getNonzeroElementsCount() co auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { return ( values_view[ i ] != 0.0 ); }; - return Algorithms::Reduction< DeviceType >::reduce( 0, this->values.getSize(), std::plus<>{}, fetch, 0 ); + return Algorithms::Reduction< DeviceType >::reduce( 0, this->values.getSize(), fetch, std::plus<>{}, 0 ); } template< typename Real, diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp index 8c20d07d1a..83563a8257 100644 --- a/src/TNL/Matrices/MatrixView.hpp +++ b/src/TNL/Matrices/MatrixView.hpp @@ -63,7 +63,7 @@ getNonzeroElementsCount() const auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { return ( values_view[ i ] != 0.0 ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, this->values.getSize(), std::plus<>{}, fetch, 0 ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, this->values.getSize(), fetch, std::plus<>{}, 0 ); } template< typename Real, diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp index 844e1721f1..44c43da7f9 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp +++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp @@ -173,7 +173,7 @@ getNonzeroElementsCount() const auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { return ( values_view[ i ] != 0.0 ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, this->values.getSize(), std::plus<>{}, fetch, 0 ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, this->values.getSize(), fetch, std::plus<>{}, 0 ); } template< typename Real, diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index c26b3ee05f..e7842a50a5 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -191,7 +191,7 @@ getNonzeroElementsCount() const auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { return ( columns_view[ i ] != paddingIndex ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, this->columnIndexes.getSize(), std::plus<>{}, fetch, 0 ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, this->columnIndexes.getSize(), fetch, std::plus<>{}, 0 ); } else { @@ -799,7 +799,7 @@ operator==( const Matrix& m ) const { return view1.getRow( i ) == view2.getRow( i ); }; - return Algorithms::Reduction< DeviceType >::reduce( 0, this->getRows(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( 0, this->getRows(), fetch, std::logical_and<>{}, true ); } template< typename Real, diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp index c0b6547fba..c125ffe222 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.hpp +++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp @@ -133,7 +133,7 @@ getNonzeroElementsCount() const auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { return ( values_view[ i ] != 0.0 ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, this->values.getSize(), std::plus<>{}, fetch, 0 ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, this->values.getSize(), fetch, std::plus<>{}, 0 ); } template< typename Real, diff --git a/src/TNL/Meshes/MeshDetails/layers/EntityTags/Layer.h b/src/TNL/Meshes/MeshDetails/layers/EntityTags/Layer.h index dab80fc7e3..dc0c767b80 100644 --- a/src/TNL/Meshes/MeshDetails/layers/EntityTags/Layer.h +++ b/src/TNL/Meshes/MeshDetails/layers/EntityTags/Layer.h @@ -139,8 +139,8 @@ public: { return bool(tags_view[ entityIndex ] & EntityTags::GhostEntity); }; - const GlobalIndexType boundaryEntities = Algorithms::Reduction< Device >::reduce( (GlobalIndexType) 0, tags.getSize(), std::plus<>{}, is_boundary, (GlobalIndexType) 0 ); - const GlobalIndexType ghostEntities = Algorithms::Reduction< Device >::reduce( (GlobalIndexType) 0, tags.getSize(), std::plus<>{}, is_ghost, (GlobalIndexType) 0 ); + const GlobalIndexType boundaryEntities = Algorithms::Reduction< Device >::reduce( (GlobalIndexType) 0, tags.getSize(), is_boundary, std::plus<>{}, (GlobalIndexType) 0 ); + const GlobalIndexType ghostEntities = Algorithms::Reduction< Device >::reduce( (GlobalIndexType) 0, tags.getSize(), is_ghost, std::plus<>{}, (GlobalIndexType) 0 ); interiorIndices.setSize( tags.getSize() - boundaryEntities ); boundaryIndices.setSize( boundaryEntities ); diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h index 3ac1f38ff1..9cd7c3db05 100644 --- a/src/UnitTests/Matrices/DenseMatrixTest.h +++ b/src/UnitTests/Matrices/DenseMatrixTest.h @@ -448,7 +448,7 @@ void test_SetElement() auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return ( v_view[ i ] == m_view.getElement( i, i ) ); }; - EXPECT_TRUE( TNL::Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, m.getRows(), std::logical_and<>{}, fetch, true ) ); + EXPECT_TRUE( TNL::Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, m.getRows(), fetch, std::logical_and<>{}, true ) ); } -- GitLab