diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cpp index 445ba2d518fd9b53cc6ea405fac23397426ab6f4..c61a1c8221a39073099300ddd569b034104b52f9 100644 --- a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cpp +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cpp @@ -36,7 +36,7 @@ void getRowExample() /*** * Compute the matrix trace. */ - int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix->getRows(), std::plus<>{}, fetch, 0 ); + int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix->getRows(), fetch, std::plus<>{}, 0 ); std::cout << "Matrix trace is " << trace << "." << std::endl; } diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cpp index 1e139fa4b2b5281365b44a6e8ee8ba24fc5d39ec..a0b9980242fe33c0c4a76e2b6f8dc549b85fa293 100644 --- a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cpp +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cpp @@ -29,7 +29,7 @@ void getRowExample() return row.getElement( rowIdx ); }; - int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix.getRows(), std::plus<>{}, fetch, 0 ); + int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix.getRows(), fetch, std::plus<>{}, 0 ); std::cout << "Matrix trace is " << trace << "." << std::endl; } diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cpp index 6d0f7aeb352dc89691b5dcaeae15d0d84b5f0385..b8ebf918175309adb754d2db35ade6d81e85bb2d 100644 --- a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cpp +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cpp @@ -41,7 +41,7 @@ void getRowExample() /*** * Compute the matrix trace. */ - int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix->getRows(), std::plus<>{}, fetch, 0 ); + int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix->getRows(), fetch, std::plus<>{}, 0 ); std::cout << "Matrix reads as: " << std::endl << *matrix << std::endl; std::cout << "Matrix trace is: " << trace << "." << std::endl; } diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getConstRow.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getConstRow.cpp index 65fa867f152eaac259c1dfd219f9a3380e6b6a6c..346e331dba3284e798255d0b12e4d50aae8e2212 100644 --- a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getConstRow.cpp +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getConstRow.cpp @@ -13,7 +13,7 @@ void getRowExample() using MatrixType = TNL::Matrices::MultidiagonalMatrix< double, Device >; MatrixType matrix ( matrixSize, // number of matrix columns - diagonalsOffsets, + diagonalsOffsets, { { 0.0, 0.0, 1.0 }, // matrix elements { 0.0, 2.0, 1.0 }, { 3.0, 2.0, 1.0 }, @@ -32,7 +32,7 @@ void getRowExample() /*** * Compute the matrix trace. */ - int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix.getRows(), std::plus<>{}, fetch, 0 ); + int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix.getRows(), fetch, std::plus<>{}, 0 ); std::cout << "Matrix reads as: " << std::endl << matrix << std::endl; std::cout << "Matrix trace is: " << trace << "." << std::endl; } diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cpp index 01689a6621eea1cd82ad5a6c33668ec87face1e6..4d3ae4ff518201ae4eee03a7ccac69ff6a16c423 100644 --- a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cpp +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cpp @@ -36,7 +36,7 @@ void getRowExample() /*** * Compute the matrix trace. */ - int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix->getRows(), std::plus<>{}, fetch, 0 ); + int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix->getRows(), fetch, std::plus<>{}, 0 ); std::cout << "Matrix trace is " << trace << "." << std::endl; } diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cpp index d2e4d971ce09f1b09357da64db335b565600d8b9..2b5f0faed2f8b81107f1a048fb053248b5f23480 100644 --- a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cpp +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cpp @@ -28,7 +28,7 @@ void getRowExample() /*** * Compute the matrix trace. */ - int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix.getRows(), std::plus<>{}, fetch, 0 ); + int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix.getRows(), fetch, std::plus<>{}, 0 ); std::cout << "Matrix trace is " << trace << "." << std::endl; } diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getConstRow.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getConstRow.cpp index e008c03a02cea769041bfffe99dd9b3b9c4b6fd4..30bf9249eccc5149db46af640f8ecfb58bdb04fc 100644 --- a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getConstRow.cpp +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getConstRow.cpp @@ -40,7 +40,7 @@ void getRowExample() /*** * Compute the matrix trace. */ - int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix->getRows(), std::plus<>{}, fetch, 0 ); + int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix->getRows(), fetch, std::plus<>{}, 0 ); std::cout << "Matrix reads as: " << std::endl << *matrix << std::endl; std::cout << "Matrix trace is: " << trace << "." << std::endl; } diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getConstRow.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getConstRow.cpp index 83463d86865f8d4901fc292a711f6ee75ff9bcc5..20d55ff1216e20a5c943d72919aa13e51e353240 100644 --- a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getConstRow.cpp +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getConstRow.cpp @@ -30,7 +30,7 @@ void getRowExample() /*** * Compute the matrix trace. */ - int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, view.getRows(), std::plus<>{}, fetch, 0 ); + int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, view.getRows(), fetch, std::plus<>{}, 0 ); std::cout << "Matrix reads as: " << std::endl << matrix << std::endl; std::cout << "Matrix trace is: " << trace << "." << std::endl; } diff --git a/Documentation/Tutorials/GeneralConcepts/snippet_algorithms_and_lambda_functions_reduction.cpp b/Documentation/Tutorials/GeneralConcepts/snippet_algorithms_and_lambda_functions_reduction.cpp index 85ba93408035aa72b5fa31411b38f53d276ed122..fda9a41b995585d38b2a0067c1de3b3136578136 100644 --- a/Documentation/Tutorials/GeneralConcepts/snippet_algorithms_and_lambda_functions_reduction.cpp +++ b/Documentation/Tutorials/GeneralConcepts/snippet_algorithms_and_lambda_functions_reduction.cpp @@ -6,5 +6,5 @@ void scalarProduct( double* v1, double* v2, double* product, const int size ) } auto reduce = [] __cuda_callable__ ( const double& a, const double& b ) { return a + b; }; - TNL::Algorithms::Reduction< Device >::reduce( 0, size, reduce, fetch, 0.0 ); + TNL::Algorithms::Reduction< Device >::reduce( 0, size, fetch, reduce, 0.0 ); } \ No newline at end of file diff --git a/Documentation/Tutorials/GeneralConcepts/snippet_algorithms_and_lambda_functions_reduction_2.cpp b/Documentation/Tutorials/GeneralConcepts/snippet_algorithms_and_lambda_functions_reduction_2.cpp index deeb49dd5161b90a65adf00164340635f035bfd9..ef17140ce0acdaf39ef772481a9b4728d638127e 100644 --- a/Documentation/Tutorials/GeneralConcepts/snippet_algorithms_and_lambda_functions_reduction_2.cpp +++ b/Documentation/Tutorials/GeneralConcepts/snippet_algorithms_and_lambda_functions_reduction_2.cpp @@ -8,5 +8,5 @@ void scalarProduct( double* u1, double* u2, } auto reduce = [] __cuda_callable__ ( const double& a, const double& b ) { return a + b; }; - TNL::Algorithms::Reduction< Device >::reduce( 0, size, reduce, fetch, 0.0 ); + TNL::Algorithms::Reduction< Device >::reduce( 0, size, fetch, reduce, 0.0 ); } \ No newline at end of file diff --git a/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp b/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp index b8c73530ce035c8eb4925740d3f5cfe49cffb19d..3ef168a2514120adeed63b8eadd5dbe4fb5e4176 100644 --- a/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp +++ b/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp @@ -22,7 +22,7 @@ bool comparison( const Vector< double, Device >& u, const Vector< double, Device * Reduce performs logical AND on intermediate results obtained by fetch. */ auto reduce = [] __cuda_callable__ ( const bool& a, const bool& b ) { return a && b; }; - return Reduction< Device >::reduce( 0, v_view.getSize(), reduce, fetch, true ); + return Reduction< Device >::reduce( 0, v_view.getSize(), fetch, reduce, true ); } int main( int argc, char* argv[] ) diff --git a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp index ddcb5e2f97124c18f55c888073577eafb2efce1c..eeccc728fb4ea23d1e3a95f22f76c70f1773fddb 100644 --- a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp +++ b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp @@ -14,7 +14,7 @@ double mapReduce( Vector< double, Device >& u ) auto fetch = [=] __cuda_callable__ ( int i )->double { return u_view[ i ] > 0 ? u_view[ i ] : 0.0; }; auto reduce = [] __cuda_callable__ ( const double& a, const double& b ) { return a + b; }; - return Reduction< Device >::reduce( 0, u_view.getSize(), reduce, fetch, 0.0 ); + return Reduction< Device >::reduce( 0, u_view.getSize(), fetch, reduce, 0.0 ); } int main( int argc, char* argv[] ) diff --git a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-2.cpp b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-2.cpp index 64f7be8cae339e362c746be9556a5f2e34956e33..da7c1c9c6cc8d690a8fec45ad43f54a51cbeab3b 100644 --- a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-2.cpp +++ b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-2.cpp @@ -16,7 +16,7 @@ double mapReduce( Vector< double, Device >& u ) if( i % 2 == 0 ) return u_view[ i ]; return 0.0; }; auto reduce = [] __cuda_callable__ ( const double& a, const double& b ) { return a + b; }; - return Reduction< Device >::reduce( 0, u_view.getSize(), reduce, fetch, 0.0 ); + return Reduction< Device >::reduce( 0, u_view.getSize(), fetch, reduce, 0.0 ); } int main( int argc, char* argv[] ) diff --git a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-3.cpp b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-3.cpp index bfbf63f3b83158940d93f7e619858521c2f3942d..5b5f31131cac0c90dcaaa783c80acd51018e711c 100644 --- a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-3.cpp +++ b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-3.cpp @@ -15,7 +15,7 @@ double mapReduce( Vector< double, Device >& u ) auto fetch = [=] __cuda_callable__ ( int i )->double { return u_view[ 2 * i ]; }; auto reduce = [] __cuda_callable__ ( const double& a, const double& b ) { return a + b; }; - return Reduction< Device >::reduce( 0, u_view.getSize() / 2, reduce, fetch, 0.0 ); + return Reduction< Device >::reduce( 0, u_view.getSize() / 2, fetch, reduce, 0.0 ); } int main( int argc, char* argv[] ) diff --git a/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp b/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp index 7dcd9a92b4e161a54ea414ac6187c43acfd2b27f..1b31eb5e5395fe8c5d4f4387ccb7b38c74d40bb2 100644 --- a/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp +++ b/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp @@ -13,7 +13,7 @@ double maximumNorm( const Vector< double, Device >& v ) auto view = v.getConstView(); auto fetch = [=] __cuda_callable__ ( int i ) { return abs( view[ i ] ); }; auto reduce = [] __cuda_callable__ ( const double& a, const double& b ) { return max( a, b ); }; - return Reduction< Device >::reduce( 0, view.getSize(), reduce, fetch, 0.0 ); + return Reduction< Device >::reduce( 0, view.getSize(), fetch, reduce, 0.0 ); } int main( int argc, char* argv[] ) diff --git a/Documentation/Tutorials/ReductionAndScan/ProductExample.cpp b/Documentation/Tutorials/ReductionAndScan/ProductExample.cpp index 6f37861dc6ae3a91962dc497ff848c7c960f1b9b..9df9a6e4b533d9b1669d80802d3eb6a38944d274 100644 --- a/Documentation/Tutorials/ReductionAndScan/ProductExample.cpp +++ b/Documentation/Tutorials/ReductionAndScan/ProductExample.cpp @@ -17,7 +17,7 @@ double product( const Vector< double, Device >& v ) /*** * Since we compute the product of all elements, the reduction must be initialized by 1.0 not by 0.0. */ - return Reduction< Device >::reduce( 0, view.getSize(), reduce, fetch, 1.0 ); + return Reduction< Device >::reduce( 0, view.getSize(), fetch, reduce, 1.0 ); } int main( int argc, char* argv[] ) diff --git a/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp b/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp index 0d9c160208a74f5ac523aa4211d95c678efd25b0..689d8b599c15a011d64624b2688004c480aa1e72 100644 --- a/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp +++ b/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp @@ -22,7 +22,7 @@ maximumNorm( const Vector< double, Device >& v ) else if( a == b && bIdx < aIdx ) aIdx = bIdx; }; - return Reduction< Device >::reduceWithArgument( 0, view.getSize(), reduction, fetch, std::numeric_limits< double >::max() ); + return Reduction< Device >::reduceWithArgument( 0, view.getSize(), fetch, reduction, std::numeric_limits< double >::max() ); } int main( int argc, char* argv[] ) diff --git a/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp b/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp index e830f7884eb1359a500a5433f34786d0bb6edb1c..5a63b460b87cced54cfad32adadd1d7707749fa7 100644 --- a/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp +++ b/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp @@ -18,7 +18,7 @@ double scalarProduct( const Vector< double, Device >& u, const Vector< double, D */ auto fetch = [=] __cuda_callable__ ( int i ) { return u_view[ i ] * v_view[ i ]; }; auto reduce = [] __cuda_callable__ ( const double& a, const double& b ) { return a + b; }; - return Reduction< Device >::reduce( 0, v_view.getSize(), reduce, fetch, 0.0 ); + return Reduction< Device >::reduce( 0, v_view.getSize(), fetch, reduce, 0.0 ); } int main( int argc, char* argv[] ) diff --git a/Documentation/Tutorials/ReductionAndScan/SumExample.cpp b/Documentation/Tutorials/ReductionAndScan/SumExample.cpp index 5db872f5e0aaa2f289c81cd88b8aaeedd791e75f..90c6f724a7106f18f9ea87f0eb9807c2d264c349 100644 --- a/Documentation/Tutorials/ReductionAndScan/SumExample.cpp +++ b/Documentation/Tutorials/ReductionAndScan/SumExample.cpp @@ -30,7 +30,7 @@ double sum( const Vector< double, Device >& v ) * lambdas defined above and finally value of idempotent element, zero in this case, which serve for the * reduction initiation. */ - return Reduction< Device >::reduce( 0, view.getSize(), reduce, fetch, 0.0 ); + return Reduction< Device >::reduce( 0, view.getSize(), fetch, reduce, 0.0 ); } int main( int argc, char* argv[] ) diff --git a/Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cpp b/Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cpp index fa2717ac324aacb17038a261b4b1de2af8fbe05e..8bd08e900dcf9dfb0924e3665ac0211037fa135f 100644 --- a/Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cpp +++ b/Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cpp @@ -17,7 +17,7 @@ double updateAndResidue( Vector< double, Device >& u, const Vector< double, Devi u_view[ i ] += tau * add; return add * add; }; auto reduce = [] __cuda_callable__ ( const double& a, const double& b ) { return a + b; }; - return sqrt( Reduction< Device >::reduce( 0, u_view.getSize(), reduce, fetch, 0.0 ) ); + return sqrt( Reduction< Device >::reduce( 0, u_view.getSize(), fetch, reduce, 0.0 ) ); } int main( int argc, char* argv[] ) diff --git a/src/Benchmarks/BLAS/CommonVectorOperations.hpp b/src/Benchmarks/BLAS/CommonVectorOperations.hpp index acb96fabbe4201cb07bbb39218d86a593df238a0..d6a459677deec7e2a78cf3bbf2e12a1e8c46ecd9 100644 --- a/src/Benchmarks/BLAS/CommonVectorOperations.hpp +++ b/src/Benchmarks/BLAS/CommonVectorOperations.hpp @@ -30,7 +30,7 @@ getVectorMax( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), fetch, reduction, std::numeric_limits< ResultType >::lowest() ); } template< typename Device > @@ -47,7 +47,7 @@ getVectorMin( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return data[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), fetch, reduction, std::numeric_limits< ResultType >::max() ); } template< typename Device > @@ -64,7 +64,7 @@ getVectorAbsMax( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), fetch, reduction, std::numeric_limits< ResultType >::lowest() ); } template< typename Device > @@ -81,7 +81,7 @@ getVectorAbsMin( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), fetch, reduction, std::numeric_limits< ResultType >::max() ); } template< typename Device > @@ -97,7 +97,7 @@ getVectorL1Norm( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), fetch, std::plus<>{}, ( ResultType ) 0 ); } template< typename Device > @@ -113,7 +113,7 @@ getVectorL2Norm( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data[ i ] * data[ i ]; }; - return std::sqrt( Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ) ); + return std::sqrt( Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), fetch, std::plus<>{}, ( ResultType ) 0 ) ); } template< typename Device > @@ -136,7 +136,7 @@ getVectorLpNorm( const Vector& v, const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data[ i ] ), p ); }; - return std::pow( Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ), 1.0 / p ); + return std::pow( Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), fetch, std::plus<>{}, ( ResultType ) 0 ), 1.0 / p ); } template< typename Device > @@ -155,7 +155,7 @@ getVectorSum( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v.getSize(), fetch, std::plus<>{}, ( ResultType ) 0 ); } template< typename Device > @@ -175,7 +175,7 @@ getVectorDifferenceMax( const Vector1& v1, const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), fetch, reduction, std::numeric_limits< ResultType >::lowest() ); } template< typename Device > @@ -195,7 +195,7 @@ getVectorDifferenceMin( const Vector1& v1, const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), fetch, reduction, std::numeric_limits< ResultType >::max() ); } template< typename Device > @@ -215,7 +215,7 @@ getVectorDifferenceAbsMax( const Vector1& v1, const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), fetch, reduction, std::numeric_limits< ResultType >::lowest() ); } template< typename Device > @@ -235,7 +235,7 @@ getVectorDifferenceAbsMin( const Vector1& v1, const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), fetch, reduction, std::numeric_limits< ResultType >::max() ); } template< typename Device > @@ -254,7 +254,7 @@ getVectorDifferenceL1Norm( const Vector1& v1, const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), fetch, std::plus<>{}, ( ResultType ) 0 ); } template< typename Device > @@ -276,7 +276,7 @@ getVectorDifferenceL2Norm( const Vector1& v1, auto diff = data1[ i ] - data2[ i ]; return diff * diff; }; - return std::sqrt( Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ) ); + return std::sqrt( Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), fetch, std::plus<>{}, ( ResultType ) 0 ) ); } template< typename Device > @@ -302,7 +302,7 @@ getVectorDifferenceLpNorm( const Vector1& v1, const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data1[ i ] - data2[ i ] ), p ); }; - return std::pow( Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ), 1.0 / p ); + return std::pow( Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), fetch, std::plus<>{}, ( ResultType ) 0 ), 1.0 / p ); } template< typename Device > @@ -321,7 +321,7 @@ getVectorDifferenceSum( const Vector1& v1, const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), fetch, std::plus<>{}, ( ResultType ) 0 ); } template< typename Device > @@ -340,7 +340,7 @@ getScalarProduct( const Vector1& v1, const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] * data2[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, v1.getSize(), fetch, std::plus<>{}, ( ResultType ) 0 ); } } // namespace Benchmarks diff --git a/src/TNL/Algorithms/MemoryOperationsCuda.hpp b/src/TNL/Algorithms/MemoryOperationsCuda.hpp index 53b60bb3925fe405de95ec71f3dc756f5aecdbf8..5351b69625e22e73b5039b39321a6338c5944f2f 100644 --- a/src/TNL/Algorithms/MemoryOperationsCuda.hpp +++ b/src/TNL/Algorithms/MemoryOperationsCuda.hpp @@ -148,7 +148,7 @@ compare( const Element1* destination, TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return destination[ i ] == source[ i ]; }; - return Reduction< Devices::Cuda >::reduce( ( Index ) 0, size, std::logical_and<>{}, fetch, true ); + return Reduction< Devices::Cuda >::reduce( ( Index ) 0, size, fetch, std::logical_and<>{}, true ); } template< typename Element, @@ -164,7 +164,7 @@ containsValue( const Element* data, TNL_ASSERT_GE( size, (Index) 0, "" ); auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; }; - return Reduction< Devices::Cuda >::reduce( ( Index ) 0, size, std::logical_or<>{}, fetch, false ); + return Reduction< Devices::Cuda >::reduce( ( Index ) 0, size, fetch, std::logical_or<>{}, false ); } template< typename Element, @@ -180,7 +180,7 @@ containsOnlyValue( const Element* data, TNL_ASSERT_GE( size, 0, "" ); auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; }; - return Reduction< Devices::Cuda >::reduce( ( Index ) 0, size, std::logical_and<>{}, fetch, true ); + return Reduction< Devices::Cuda >::reduce( ( Index ) 0, size, fetch, std::logical_and<>{}, true ); } } // namespace Algorithms diff --git a/src/TNL/Algorithms/MemoryOperationsHost.hpp b/src/TNL/Algorithms/MemoryOperationsHost.hpp index 090d0bb9edc8e8d0e1d6a91a2c0ac40abcd1d3d5..92b44f8cf51fe085eda52d41f98524125631b6d4 100644 --- a/src/TNL/Algorithms/MemoryOperationsHost.hpp +++ b/src/TNL/Algorithms/MemoryOperationsHost.hpp @@ -113,7 +113,7 @@ compare( const DestinationElement* destination, if( Devices::Host::isOMPEnabled() && Devices::Host::getMaxThreadsCount() > 1 ) { auto fetch = [destination, source] ( Index i ) -> bool { return destination[ i ] == source[ i ]; }; - return Reduction< Devices::Host >::reduce( ( Index ) 0, size, std::logical_and<>{}, fetch, true ); + return Reduction< Devices::Host >::reduce( ( Index ) 0, size, fetch, std::logical_and<>{}, true ); } else { // sequential algorithm can return as soon as it finds a mismatch @@ -135,7 +135,7 @@ containsValue( const Element* data, if( Devices::Host::isOMPEnabled() && Devices::Host::getMaxThreadsCount() > 1 ) { auto fetch = [=] ( Index i ) -> bool { return data[ i ] == value; }; - return Reduction< Devices::Host >::reduce( ( Index ) 0, size, std::logical_or<>{}, fetch, false ); + return Reduction< Devices::Host >::reduce( ( Index ) 0, size, fetch, std::logical_or<>{}, false ); } else { // sequential algorithm can return as soon as it finds a match @@ -157,7 +157,7 @@ containsOnlyValue( const Element* data, if( Devices::Host::isOMPEnabled() && Devices::Host::getMaxThreadsCount() > 1 ) { auto fetch = [data, value] ( Index i ) -> bool { return data[ i ] == value; }; - return Reduction< Devices::Host >::reduce( ( Index ) 0, size, std::logical_and<>{}, fetch, true ); + return Reduction< Devices::Host >::reduce( ( Index ) 0, size, fetch, std::logical_and<>{}, true ); } else { // sequential algorithm can return as soon as it finds a mismatch diff --git a/src/TNL/Algorithms/Reduction.h b/src/TNL/Algorithms/Reduction.h index e36a706c13d106f8ec9f47cc3e725567e596cd82..d928ec6875e6a39bb855ae29961ef89d6b358b89 100644 --- a/src/TNL/Algorithms/Reduction.h +++ b/src/TNL/Algorithms/Reduction.h @@ -45,27 +45,27 @@ struct Reduction< Devices::Sequential > * * \tparam Index is a type for indexing. * \tparam Result is a type of the reduction result. - * \tparam ReductionOperation is a lambda function performing the reduction. - * \tparam DataFetcher is a lambda function for fetching the input data. + * \tparam Fetch is a lambda function for fetching the input data. + * \tparam Reduce is a lambda function performing the reduction. * * \param begin defines range [begin, end) of indexes which will be used for the reduction. * \param end defines range [begin, end) of indexes which will be used for the reduction. - * \param reduction is a lambda function defining the reduction operation. - * \param dataFetcher is a lambda function fetching the input data. + * \param fetch is a lambda function fetching the input data. + * \param reduce is a lambda function defining the reduction operation. * \param zero is the idempotent element for the reduction operation, i.e. element which * does not change the result of the reduction. * \return result of the reduction * - * The dataFetcher lambda function takes one argument which is index of the element to be fetched: + * The `fetch` lambda function takes one argument which is index of the element to be fetched: * * ``` - * auto dataFetcher1 = [=] __cuda_callable__ ( Index i ) { return ... }; + * auto fetch = [=] __cuda_callable__ ( Index i ) { return ... }; * ``` * - * The reduction lambda function takes two variables which are supposed to be reduced: + * The `reduce` lambda function takes two variables which are supposed to be reduced: * * ``` - * auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... }; + * auto reduce = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... }; * ``` * * \par Example @@ -78,13 +78,13 @@ struct Reduction< Devices::Sequential > */ template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > static constexpr Result reduce( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ); /** @@ -96,28 +96,28 @@ struct Reduction< Devices::Sequential > * * \tparam Index is a type for indexing. * \tparam Result is a type of the reduction result. - * \tparam ReductionOperation is a lambda function performing the reduction. - * \tparam DataFetcher is a lambda function for fetching the input data. + * \tparam Fetch is a lambda function for fetching the input data. + * \tparam Reduce is a lambda function performing the reduction. * * \param begin defines range [begin, end) of indexes which will be used for the reduction. * \param end defines range [begin, end) of indexes which will be used for the reduction. - * \param reduction is a lambda function defining the reduction operation and managing the elements positions. - * \param dataFetcher is a lambda function fetching the input data. + * \param fetch is a lambda function fetching the input data. + * \param reduce is a lambda function defining the reduction operation and managing the elements positions. * \param zero is the idempotent element for the reduction operation, i.e. element which * does not change the result of the reduction. * \return result of the reduction in a form of std::pair< Index, Result> structure. `pair.first' * is the element position and `pair.second` is the reduction result. * - * The dataFetcher lambda function takes one argument which is index of the element to be fetched: + * The `fetch` lambda function takes one argument which is index of the element to be fetched: * * ``` - * auto dataFetcher1 = [=] __cuda_callable__ ( Index i ) { return ... }; + * auto fetch = [=] __cuda_callable__ ( Index i ) { return ... }; * ``` * - * The reduction lambda function takes two variables which are supposed to be reduced: + * The `reduce` lambda function takes two variables which are supposed to be reduced: * * ``` - * auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b, Index& aIdx, const Index& bIdx ) { return ... }; + * auto reduce = [] __cuda_callable__ ( const Result& a, const Result& b, Index& aIdx, const Index& bIdx ) { return ... }; * ``` * * \par Example @@ -130,13 +130,13 @@ struct Reduction< Devices::Sequential > */ template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > static constexpr std::pair< Result, Index > reduceWithArgument( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ); }; @@ -148,27 +148,27 @@ struct Reduction< Devices::Host > * * \tparam Index is a type for indexing. * \tparam Result is a type of the reduction result. - * \tparam ReductionOperation is a lambda function performing the reduction. - * \tparam DataFetcher is a lambda function for fetching the input data. + * \tparam Fetch is a lambda function for fetching the input data. + * \tparam Reduce is a lambda function performing the reduction. * * \param begin defines range [begin, end) of indexes which will be used for the reduction. * \param end defines range [begin, end) of indexes which will be used for the reduction. - * \param reduction is a lambda function defining the reduction operation. - * \param dataFetcher is a lambda function fetching the input data. + * \param fetch is a lambda function fetching the input data. + * \param reduce is a lambda function defining the reduction operation. * \param zero is the idempotent element for the reduction operation, i.e. element which * does not change the result of the reduction. * \return result of the reduction * - * The dataFetcher lambda function takes one argument which is index of the element to be fetched: + * The `fetch` lambda function takes one argument which is index of the element to be fetched: * * ``` - * auto dataFetcher1 = [=] __cuda_callable__ ( Index i ) { return ... }; + * auto fetch = [=] __cuda_callable__ ( Index i ) { return ... }; * ``` * - * The reduction lambda function takes two variables which are supposed to be reduced: + * The `reduce` lambda function takes two variables which are supposed to be reduced: * * ``` - * auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... }; + * auto reduce = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... }; * ``` * * \par Example @@ -181,65 +181,65 @@ struct Reduction< Devices::Host > */ template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > static Result reduce( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ); /** * \brief Computes reduction on CPU and returns position of an element of interest. - * - * For example in case of computing minimal or maximal element in array/vector, + * + * For example in case of computing minimal or maximal element in array/vector, * the position of the element having given value can be obtained. The use of this method * is, however, more flexible. - * + * * \tparam Index is a type for indexing. * \tparam Result is a type of the reduction result. * \tparam ReductionOperation is a lambda function performing the reduction. * \tparam DataFetcher is a lambda function for fetching the input data. - * + * * \param begin defines range [begin, end) of indexes which will be used for the reduction. * \param end defines range [begin, end) of indexes which will be used for the reduction. - * \param reduction is a lambda function defining the reduction operation and managing the elements positions. - * \param dataFetcher is a lambda function fetching the input data. + * \param fetch is a lambda function fetching the input data. + * \param reduce is a lambda function defining the reduction operation and managing the elements positions. * \param zero is the idempotent element for the reduction operation, i.e. element which * does not change the result of the reduction. * \return result of the reduction in a form of std::pair< Index, Result> structure. `pair.first' * is the element position and `pair.second` is the reduction result. - * - * The dataFetcher lambda function takes one argument which is index of the element to be fetched: - * + * + * The `fetch` lambda function takes one argument which is index of the element to be fetched: + * * ``` - * auto dataFetcher1 = [=] __cuda_callable__ ( Index i ) { return ... }; + * auto fetch = [=] __cuda_callable__ ( Index i ) { return ... }; * ``` - * - * The reduction lambda function takes two variables which are supposed to be reduced: - * + * + * The `reduce` lambda function takes two variables which are supposed to be reduced: + * * ``` - * auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b, Index& aIdx, const Index& bIdx ) { return ... }; + * auto reduce = [] __cuda_callable__ ( const Result& a, const Result& b, Index& aIdx, const Index& bIdx ) { return ... }; * ``` - * + * * \par Example - * + * * \include ReductionAndScan/ReductionWithArgument.cpp - * + * * \par Output - * + * * \include ReductionWithArgument.out */ template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > static std::pair< Result, Index > reduceWithArgument( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ); }; @@ -251,27 +251,27 @@ struct Reduction< Devices::Cuda > * * \tparam Index is a type for indexing. * \tparam Result is a type of the reduction result. - * \tparam ReductionOperation is a lambda function performing the reduction. - * \tparam DataFetcher is a lambda function for fetching the input data. + * \tparam Fetch is a lambda function for fetching the input data. + * \tparam Reduce is a lambda function performing the reduction. * * \param begin defines range [begin, end) of indexes which will be used for the reduction. * \param end defines range [begin, end) of indexes which will be used for the reduction. - * \param reduction is a lambda function defining the reduction operation. - * \param dataFetcher is a lambda function fetching the input data. + * \param fetch is a lambda function fetching the input data. + * \param reduce is a lambda function defining the reduction operation. * \param zero is the idempotent element for the reduction operation, i.e. element which * does not change the result of the reduction. * \return result of the reduction * - * The dataFetcher lambda function takes one argument which is index of the element to be fetched: + * The `fetch` lambda function takes one argument which is index of the element to be fetched: * * ``` - * auto dataFetcher1 = [=] __cuda_callable__ ( Index i ) { return ... }; + * auto fetch = [=] __cuda_callable__ ( Index i ) { return ... }; * ``` * - * The reduction lambda function takes two variables which are supposed to be reduced: + * The `reduce` lambda function takes two variables which are supposed to be reduced: * * ``` - * auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... }; + * auto reduce = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... }; * ``` * * \par Example @@ -284,46 +284,46 @@ struct Reduction< Devices::Cuda > */ template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > static Result reduce( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ); /** * \brief Computes reduction on GPU and returns position of an element of interest. * - * For example in case of computing minimal or maximal element in array/vector, + * For example in case of computing minimal or maximal element in array/vector, * the position of the element having given value can be obtained. The use of this method * is, however, more flexible. * * \tparam Index is a type for indexing. * \tparam Result is a type of the reduction result. - * \tparam ReductionOperation is a lambda function performing the reduction. - * \tparam DataFetcher is a lambda function for fetching the input data. + * \tparam Fetch is a lambda function for fetching the input data. + * \tparam Reduce is a lambda function performing the reduction. * * \param begin defines range [begin, end) of indexes which will be used for the reduction. * \param end defines range [begin, end) of indexes which will be used for the reduction. - * \param reduction is a lambda function defining the reduction operation and managing the elements positions. - * \param dataFetcher is a lambda function fetching the input data. + * \param fetch is a lambda function fetching the input data. + * \param reduce is a lambda function defining the reduction operation and managing the elements positions. * \param zero is the idempotent element for the reduction operation, i.e. element which * does not change the result of the reduction. * \return result of the reduction in a form of std::pair< Index, Result> structure. `pair.first' * is the element position and `pair.second` is the reduction result. * - * The dataFetcher lambda function takes one argument which is index of the element to be fetched: + * The `fetch` lambda function takes one argument which is index of the element to be fetched: * * ``` - * auto dataFetcher1 = [=] __cuda_callable__ ( Index i ) { return ... }; + * auto fetch = [=] __cuda_callable__ ( Index i ) { return ... }; * ``` * - * The reduction lambda function takes two variables which are supposed to be reduced: + * The `reduce` lambda function takes two variables which are supposed to be reduced: * * ``` - * auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b, Index& aIdx, const Index& bIdx ) { return ... }; + * auto reduce = [] __cuda_callable__ ( const Result& a, const Result& b, Index& aIdx, const Index& bIdx ) { return ... }; * ``` * * \par Example @@ -336,13 +336,13 @@ struct Reduction< Devices::Cuda > */ template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > static std::pair< Result, Index > reduceWithArgument( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ); }; diff --git a/src/TNL/Algorithms/Reduction.hpp b/src/TNL/Algorithms/Reduction.hpp index 70e725af6b0bc87acd822274d3a424c36957425a..7873f9c3c4268fbbec0cd7757bcfca0dade40869 100644 --- a/src/TNL/Algorithms/Reduction.hpp +++ b/src/TNL/Algorithms/Reduction.hpp @@ -37,14 +37,14 @@ static constexpr int Reduction_minGpuDataSize = 256;//65536; //16384;//1024;//25 template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > constexpr Result Reduction< Devices::Sequential >:: reduce( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ) { constexpr int block_size = 128; @@ -55,45 +55,45 @@ reduce( const Index begin, // initialize array for unrolled results Result r[ 4 ] = { zero, zero, zero, zero }; - // main reduction (explicitly unrolled loop) + // main reduce (explicitly unrolled loop) for( Index b = 0; b < blocks; b++ ) { const Index offset = begin + b * block_size; for( int i = 0; i < block_size; i += 4 ) { - r[ 0 ] = reduction( r[ 0 ], dataFetcher( offset + i ) ); - r[ 1 ] = reduction( r[ 1 ], dataFetcher( offset + i + 1 ) ); - r[ 2 ] = reduction( r[ 2 ], dataFetcher( offset + i + 2 ) ); - r[ 3 ] = reduction( r[ 3 ], dataFetcher( offset + i + 3 ) ); + r[ 0 ] = reduce( r[ 0 ], fetch( offset + i ) ); + r[ 1 ] = reduce( r[ 1 ], fetch( offset + i + 1 ) ); + r[ 2 ] = reduce( r[ 2 ], fetch( offset + i + 2 ) ); + r[ 3 ] = reduce( r[ 3 ], fetch( offset + i + 3 ) ); } } - // reduction of the last, incomplete block (not unrolled) + // reduce of the last, incomplete block (not unrolled) for( Index i = begin + blocks * block_size; i < end; i++ ) - r[ 0 ] = reduction( r[ 0 ], dataFetcher( i ) ); + r[ 0 ] = reduce( r[ 0 ], fetch( i ) ); - // reduction of unrolled results - r[ 0 ] = reduction( r[ 0 ], r[ 2 ] ); - r[ 1 ] = reduction( r[ 1 ], r[ 3 ] ); - r[ 0 ] = reduction( r[ 0 ], r[ 1 ] ); + // reduce of unrolled results + r[ 0 ] = reduce( r[ 0 ], r[ 2 ] ); + r[ 1 ] = reduce( r[ 1 ], r[ 3 ] ); + r[ 0 ] = reduce( r[ 0 ], r[ 1 ] ); return r[ 0 ]; } else { Result result = zero; for( Index i = begin; i < end; i++ ) - result = reduction( result, dataFetcher( i ) ); + result = reduce( result, fetch( i ) ); return result; } } template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > constexpr std::pair< Result, Index > Reduction< Devices::Sequential >:: reduceWithArgument( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ) { constexpr int block_size = 128; @@ -106,7 +106,7 @@ reduceWithArgument( const Index begin, Result r[ 4 ] = { zero, zero, zero, zero }; bool initialized( false ); - // main reduction (explicitly unrolled loop) + // main reduce (explicitly unrolled loop) for( Index b = 0; b < blocks; b++ ) { const Index offset = begin + b * block_size; for( int i = 0; i < block_size; i += 4 ) { @@ -116,48 +116,48 @@ reduceWithArgument( const Index begin, arg[ 1 ] = offset + i + 1; arg[ 2 ] = offset + i + 2; arg[ 3 ] = offset + i + 3; - r[ 0 ] = dataFetcher( offset + i ); - r[ 1 ] = dataFetcher( offset + i + 1 ); - r[ 2 ] = dataFetcher( offset + i + 2 ); - r[ 3 ] = dataFetcher( offset + i + 3 ); + r[ 0 ] = fetch( offset + i ); + r[ 1 ] = fetch( offset + i + 1 ); + r[ 2 ] = fetch( offset + i + 2 ); + r[ 3 ] = fetch( offset + i + 3 ); initialized = true; continue; } - reduction( r[ 0 ], dataFetcher( offset + i ), arg[ 0 ], offset + i ); - reduction( r[ 1 ], dataFetcher( offset + i + 1 ), arg[ 1 ], offset + i + 1 ); - reduction( r[ 2 ], dataFetcher( offset + i + 2 ), arg[ 2 ], offset + i + 2 ); - reduction( r[ 3 ], dataFetcher( offset + i + 3 ), arg[ 3 ], offset + i + 3 ); + reduce( r[ 0 ], fetch( offset + i ), arg[ 0 ], offset + i ); + reduce( r[ 1 ], fetch( offset + i + 1 ), arg[ 1 ], offset + i + 1 ); + reduce( r[ 2 ], fetch( offset + i + 2 ), arg[ 2 ], offset + i + 2 ); + reduce( r[ 3 ], fetch( offset + i + 3 ), arg[ 3 ], offset + i + 3 ); } } - // reduction of the last, incomplete block (not unrolled) + // reduce of the last, incomplete block (not unrolled) for( Index i = begin + blocks * block_size; i < size; i++ ) - reduction( r[ 0 ], dataFetcher( i ), arg[ 0 ], i ); + reduce( r[ 0 ], fetch( i ), arg[ 0 ], i ); - // reduction of unrolled results - reduction( r[ 0 ], r[ 2 ], arg[ 0 ], arg[ 2 ] ); - reduction( r[ 1 ], r[ 3 ], arg[ 1 ], arg[ 3 ] ); - reduction( r[ 0 ], r[ 1 ], arg[ 0 ], arg[ 1 ] ); + // reduce of unrolled results + reduce( r[ 0 ], r[ 2 ], arg[ 0 ], arg[ 2 ] ); + reduce( r[ 1 ], r[ 3 ], arg[ 1 ], arg[ 3 ] ); + reduce( r[ 0 ], r[ 1 ], arg[ 0 ], arg[ 1 ] ); return std::make_pair( r[ 0 ], arg[ 0 ] ); } else { - std::pair< Result, Index > result( dataFetcher( begin ), begin ); + std::pair< Result, Index > result( fetch( begin ), begin ); for( Index i = begin + 1; i < end; i++ ) - reduction( result.first, dataFetcher( i ), result.second, i ); + reduce( result.first, fetch( i ), result.second, i ); return result; } } template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > Result Reduction< Devices::Host >:: reduce( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ) { #ifdef HAVE_OPENMP @@ -178,10 +178,10 @@ reduce( const Index begin, for( Index b = 0; b < blocks; b++ ) { const Index offset = begin + b * block_size; for( int i = 0; i < block_size; i += 4 ) { - r[ 0 ] = reduction( r[ 0 ], dataFetcher( offset + i ) ); - r[ 1 ] = reduction( r[ 1 ], dataFetcher( offset + i + 1 ) ); - r[ 2 ] = reduction( r[ 2 ], dataFetcher( offset + i + 2 ) ); - r[ 3 ] = reduction( r[ 3 ], dataFetcher( offset + i + 3 ) ); + r[ 0 ] = reduce( r[ 0 ], fetch( offset + i ) ); + r[ 1 ] = reduce( r[ 1 ], fetch( offset + i + 1 ) ); + r[ 2 ] = reduce( r[ 2 ], fetch( offset + i + 2 ) ); + r[ 3 ] = reduce( r[ 3 ], fetch( offset + i + 3 ) ); } } @@ -189,37 +189,37 @@ reduce( const Index begin, #pragma omp single nowait { for( Index i = begin + blocks * block_size; i < end; i++ ) - r[ 0 ] = reduction( r[ 0 ], dataFetcher( i ) ); + r[ 0 ] = reduce( r[ 0 ], fetch( i ) ); } - // local reduction of unrolled results - r[ 0 ] = reduction( r[ 0 ], r[ 2 ] ); - r[ 1 ] = reduction( r[ 1 ], r[ 3 ] ); - r[ 0 ] = reduction( r[ 0 ], r[ 1 ] ); + // local reduce of unrolled results + r[ 0 ] = reduce( r[ 0 ], r[ 2 ] ); + r[ 1 ] = reduce( r[ 1 ], r[ 3 ] ); + r[ 0 ] = reduce( r[ 0 ], r[ 1 ] ); - // inter-thread reduction of local results + // inter-thread reduce of local results #pragma omp critical { - result = reduction( result, r[ 0 ] ); + result = reduce( result, r[ 0 ] ); } } return result; } else #endif - return Reduction< Devices::Sequential >::reduce( begin, end, reduction, dataFetcher, zero ); + return Reduction< Devices::Sequential >::reduce( begin, end, fetch, reduce, zero ); } template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > std::pair< Result, Index > Reduction< Devices::Host >:: reduceWithArgument( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ) { #ifdef HAVE_OPENMP @@ -247,17 +247,17 @@ reduceWithArgument( const Index begin, arg[ 1 ] = offset + i + 1; arg[ 2 ] = offset + i + 2; arg[ 3 ] = offset + i + 3; - r[ 0 ] = dataFetcher( offset + i ); - r[ 1 ] = dataFetcher( offset + i + 1 ); - r[ 2 ] = dataFetcher( offset + i + 2 ); - r[ 3 ] = dataFetcher( offset + i + 3 ); + r[ 0 ] = fetch( offset + i ); + r[ 1 ] = fetch( offset + i + 1 ); + r[ 2 ] = fetch( offset + i + 2 ); + r[ 3 ] = fetch( offset + i + 3 ); initialized = true; continue; } - reduction( r[ 0 ], dataFetcher( offset + i ), arg[ 0 ], offset + i ); - reduction( r[ 1 ], dataFetcher( offset + i + 1 ), arg[ 1 ], offset + i + 1 ); - reduction( r[ 2 ], dataFetcher( offset + i + 2 ), arg[ 2 ], offset + i + 2 ); - reduction( r[ 3 ], dataFetcher( offset + i + 3 ), arg[ 3 ], offset + i + 3 ); + reduce( r[ 0 ], fetch( offset + i ), arg[ 0 ], offset + i ); + reduce( r[ 1 ], fetch( offset + i + 1 ), arg[ 1 ], offset + i + 1 ); + reduce( r[ 2 ], fetch( offset + i + 2 ), arg[ 2 ], offset + i + 2 ); + reduce( r[ 3 ], fetch( offset + i + 3 ), arg[ 3 ], offset + i + 3 ); } } @@ -265,44 +265,44 @@ reduceWithArgument( const Index begin, #pragma omp single nowait { for( Index i = begin + blocks * block_size; i < end; i++ ) - reduction( r[ 0 ], dataFetcher( i ), arg[ 0 ], i ); + reduce( r[ 0 ], fetch( i ), arg[ 0 ], i ); } - // local reduction of unrolled results - reduction( r[ 0 ], r[ 2 ], arg[ 0 ], arg[ 2 ] ); - reduction( r[ 1 ], r[ 3 ], arg[ 1 ], arg[ 3 ] ); - reduction( r[ 0 ], r[ 1 ], arg[ 0 ], arg[ 1 ] ); + // local reduce of unrolled results + reduce( r[ 0 ], r[ 2 ], arg[ 0 ], arg[ 2 ] ); + reduce( r[ 1 ], r[ 3 ], arg[ 1 ], arg[ 3 ] ); + reduce( r[ 0 ], r[ 1 ], arg[ 0 ], arg[ 1 ] ); - // inter-thread reduction of local results + // inter-thread reduce of local results #pragma omp critical { if( result.second == -1 ) result.second = arg[ 0 ]; - reduction( result.first, r[ 0 ], result.second, arg[ 0 ] ); + reduce( result.first, r[ 0 ], result.second, arg[ 0 ] ); } } return result; } else #endif - return Reduction< Devices::Sequential >::reduceWithArgument( begin, end, reduction, dataFetcher, zero ); + return Reduction< Devices::Sequential >::reduceWithArgument( begin, end, fetch, reduce, zero ); } template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > Result Reduction< Devices::Cuda >:: reduce( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ) { // Only fundamental and pointer types can be safely reduced on host. Complex // objects stored on the device might contain pointers into the device memory, - // in which case reduction on host might fail. + // in which case reduce on host might fail. constexpr bool can_reduce_later_on_host = std::is_fundamental< Result >::value || std::is_pointer< Result >::value; #ifdef CUDA_REDUCTION_PROFILING @@ -313,11 +313,11 @@ reduce( const Index begin, CudaReductionKernelLauncher< Index, Result > reductionLauncher( begin, end ); - // start the reduction on the GPU + // start the reduce on the GPU Result* deviceAux1( 0 ); const int reducedSize = reductionLauncher.start( - reduction, - dataFetcher, + reduce, + fetch, zero, deviceAux1 ); @@ -353,9 +353,9 @@ reduce( const Index begin, timer.start(); #endif - // finish the reduction on the host + // finish the reduce on the host auto fetch = [&] ( Index i ) { return resultArray[ i ]; }; - const Result result = Reduction< Devices::Sequential >::reduce( 0, reducedSize, reduction, fetch, zero ); + const Result result = Reduction< Devices::Sequential >::reduce( 0, reducedSize, fetch, reduce, zero ); #ifdef CUDA_REDUCTION_PROFILING timer.stop(); @@ -364,8 +364,8 @@ reduce( const Index begin, return result; } else { - // data can't be safely reduced on host, so continue with the reduction on the GPU - auto result = reductionLauncher.finish( reduction, zero ); + // data can't be safely reduced on host, so continue with the reduce on the GPU + auto result = reductionLauncher.finish( reduce, zero ); #ifdef CUDA_REDUCTION_PROFILING timer.stop(); @@ -380,19 +380,19 @@ reduce( const Index begin, template< typename Index, typename Result, - typename ReductionOperation, - typename DataFetcher > + typename Fetch, + typename Reduce > std::pair< Result, Index > Reduction< Devices::Cuda >:: reduceWithArgument( const Index begin, const Index end, - const ReductionOperation& reduction, - DataFetcher& dataFetcher, + Fetch&& fetch, + Reduce&& reduce, const Result& zero ) { // Only fundamental and pointer types can be safely reduced on host. Complex // objects stored on the device might contain pointers into the device memory, - // in which case reduction on host might fail. + // in which case reduce on host might fail. constexpr bool can_reduce_later_on_host = std::is_fundamental< Result >::value || std::is_pointer< Result >::value; #ifdef CUDA_REDUCTION_PROFILING @@ -403,12 +403,12 @@ reduceWithArgument( const Index begin, CudaReductionKernelLauncher< Index, Result > reductionLauncher( begin, end ); - // start the reduction on the GPU + // start the reduce on the GPU Result* deviceAux1( nullptr ); Index* deviceIndexes( nullptr ); const int reducedSize = reductionLauncher.startWithArgument( - reduction, - dataFetcher, + reduce, + fetch, zero, deviceAux1, deviceIndexes ); @@ -460,11 +460,11 @@ reduceWithArgument( const Index begin, timer.start(); #endif - // finish the reduction on the host + // finish the reduce on the host // auto fetch = [&] ( Index i ) { return resultArray[ i ]; }; -// const Result result = Reduction< Devices::Sequential >::reduceWithArgument( reducedSize, argument, reduction, fetch, zero ); +// const Result result = Reduction< Devices::Sequential >::reduceWithArgument( reducedSize, argument, reduce, fetch, zero ); for( Index i = 1; i < reducedSize; i++ ) - reduction( resultArray[ 0 ], resultArray[ i ], indexArray[ 0 ], indexArray[ i ] ); + reduce( resultArray[ 0 ], resultArray[ i ], indexArray[ 0 ], indexArray[ i ] ); #ifdef CUDA_REDUCTION_PROFILING timer.stop(); @@ -473,8 +473,8 @@ reduceWithArgument( const Index begin, return std::make_pair( resultArray[ 0 ], indexArray[ 0 ] ); } else { - // data can't be safely reduced on host, so continue with the reduction on the GPU - auto result = reductionLauncher.finishWithArgument( reduction, zero ); + // data can't be safely reduced on host, so continue with the reduce on the GPU + auto result = reductionLauncher.finishWithArgument( reduce, zero ); #ifdef CUDA_REDUCTION_PROFILING timer.stop(); diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp index 2c0d3d631f454810b6e0144135e4904a3a387914..9143dea1accd0b89a74901e415675f6e08ae4b2c 100644 --- a/src/TNL/Containers/ArrayView.hpp +++ b/src/TNL/Containers/ArrayView.hpp @@ -380,7 +380,7 @@ reduceElements( Index begin, Index end, Fetch&& fetch, Reduce&& reduce, const Re ValueType* d = this->getData(); auto main_fetch = [=] __cuda_callable__ ( IndexType i ) mutable -> Result { return fetch( i, d[ i ] ); }; - return Algorithms::Reduction< DeviceType >::reduce( begin, end, reduce, main_fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( begin, end, main_fetch, reduce, zero ); } template< typename Value, @@ -397,7 +397,7 @@ reduceElements( Index begin, Index end, Fetch&& fetch, Reduce&& reduce, const Re const ValueType* d = this->getData(); auto main_fetch = [=] __cuda_callable__ ( IndexType i ) mutable -> Result { return fetch( i, d[ i ] ); }; - return Algorithms::Reduction< DeviceType >::reduce( begin, end, reduce, main_fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( begin, end, main_fetch, reduce, zero ); } template< typename Value, diff --git a/src/TNL/Containers/Expressions/Comparison.h b/src/TNL/Containers/Expressions/Comparison.h index 33986e1edd494fe09c9491b648cf4d52aa8bd12a..738409cc40d94959599d3ad8f8f3c83bb6277bac 100644 --- a/src/TNL/Containers/Expressions/Comparison.h +++ b/src/TNL/Containers/Expressions/Comparison.h @@ -68,7 +68,7 @@ struct VectorComparison< T1, T2, false > const auto view_a = a.getConstView(); const auto view_b = b.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return view_a[ i ] == view_b[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), fetch, std::logical_and<>{}, true ); } }; @@ -100,7 +100,7 @@ struct Comparison< T1, T2, VectorExpressionVariable, VectorExpressionVariable > const auto view_a = a.getConstView(); const auto view_b = b.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return view_a[ i ] > view_b[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), fetch, std::logical_and<>{}, true ); } static bool GE( const T1& a, const T2& b ) @@ -115,7 +115,7 @@ struct Comparison< T1, T2, VectorExpressionVariable, VectorExpressionVariable > const auto view_a = a.getConstView(); const auto view_b = b.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return view_a[ i ] >= view_b[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), fetch, std::logical_and<>{}, true ); } static bool LT( const T1& a, const T2& b ) @@ -130,7 +130,7 @@ struct Comparison< T1, T2, VectorExpressionVariable, VectorExpressionVariable > const auto view_a = a.getConstView(); const auto view_b = b.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return view_a[ i ] < view_b[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), fetch, std::logical_and<>{}, true ); } static bool LE( const T1& a, const T2& b ) @@ -145,7 +145,7 @@ struct Comparison< T1, T2, VectorExpressionVariable, VectorExpressionVariable > const auto view_a = a.getConstView(); const auto view_b = b.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return view_a[ i ] <= view_b[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), fetch, std::logical_and<>{}, true ); } }; @@ -162,7 +162,7 @@ struct Comparison< T1, T2, ArithmeticVariable, VectorExpressionVariable > const auto view_b = b.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return a == view_b[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, b.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, b.getSize(), fetch, std::logical_and<>{}, true ); } static bool NE( const T1& a, const T2& b ) @@ -177,7 +177,7 @@ struct Comparison< T1, T2, ArithmeticVariable, VectorExpressionVariable > const auto view_b = b.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return a > view_b[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, b.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, b.getSize(), fetch, std::logical_and<>{}, true ); } static bool GE( const T1& a, const T2& b ) @@ -187,7 +187,7 @@ struct Comparison< T1, T2, ArithmeticVariable, VectorExpressionVariable > const auto view_b = b.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return a >= view_b[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, b.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, b.getSize(), fetch, std::logical_and<>{}, true ); } static bool LT( const T1& a, const T2& b ) @@ -197,7 +197,7 @@ struct Comparison< T1, T2, ArithmeticVariable, VectorExpressionVariable > const auto view_b = b.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return a < view_b[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, b.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, b.getSize(), fetch, std::logical_and<>{}, true ); } static bool LE( const T1& a, const T2& b ) @@ -207,7 +207,7 @@ struct Comparison< T1, T2, ArithmeticVariable, VectorExpressionVariable > const auto view_b = b.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return a <= view_b[ i ]; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, b.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, b.getSize(), fetch, std::logical_and<>{}, true ); } }; @@ -224,7 +224,7 @@ struct Comparison< T1, T2, VectorExpressionVariable, ArithmeticVariable > const auto view_a = a.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return view_a[ i ] == b; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), fetch, std::logical_and<>{}, true ); } static bool NE( const T1& a, const T2& b ) @@ -239,7 +239,7 @@ struct Comparison< T1, T2, VectorExpressionVariable, ArithmeticVariable > const auto view_a = a.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return view_a[ i ] > b; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), fetch, std::logical_and<>{}, true ); } static bool GE( const T1& a, const T2& b ) @@ -249,7 +249,7 @@ struct Comparison< T1, T2, VectorExpressionVariable, ArithmeticVariable > const auto view_a = a.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return view_a[ i ] >= b; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), fetch, std::logical_and<>{}, true ); } static bool LT( const T1& a, const T2& b ) @@ -259,7 +259,7 @@ struct Comparison< T1, T2, VectorExpressionVariable, ArithmeticVariable > const auto view_a = a.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return view_a[ i ] < b; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), fetch, std::logical_and<>{}, true ); } static bool LE( const T1& a, const T2& b ) @@ -269,7 +269,7 @@ struct Comparison< T1, T2, VectorExpressionVariable, ArithmeticVariable > const auto view_a = a.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return view_a[ i ] <= b; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, a.getSize(), fetch, std::logical_and<>{}, true ); } }; diff --git a/src/TNL/Containers/Expressions/DistributedExpressionTemplates.h b/src/TNL/Containers/Expressions/DistributedExpressionTemplates.h index 5f67084fd8f3e21dd84ff165625cc1186386dd9b..6959a95fed7ececd17b330f0c720b2d1d4dc0904 100644 --- a/src/TNL/Containers/Expressions/DistributedExpressionTemplates.h +++ b/src/TNL/Containers/Expressions/DistributedExpressionTemplates.h @@ -1073,7 +1073,7 @@ Result evaluateAndReduce( Vector& lhs, RealType* lhs_data = lhs.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return ( lhs_data[ i ] = expression[ i ] ); }; - return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), fetch, reduction, zero ); } template< typename Vector, @@ -1092,7 +1092,7 @@ Result evaluateAndReduce( Vector& lhs, RealType* lhs_data = lhs.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return ( lhs_data[ i ] = expression[ i ] ); }; - return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), fetch, reduction, zero ); } //// @@ -1118,7 +1118,7 @@ Result addAndReduce( Vector& lhs, lhs_data[ i ] += aux; return aux; }; - return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), fetch, reduction, zero ); } template< typename Vector, @@ -1141,7 +1141,7 @@ Result addAndReduce( Vector& lhs, lhs_data[ i ] += aux; return aux; }; - return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), fetch, reduction, zero ); } //// @@ -1167,7 +1167,7 @@ Result addAndReduceAbs( Vector& lhs, lhs_data[ i ] += aux; return TNL::abs( aux ); }; - return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), fetch, reduction, zero ); } template< typename Vector, @@ -1190,7 +1190,7 @@ Result addAndReduceAbs( Vector& lhs, lhs_data[ i ] += aux; return TNL::abs( aux ); }; - return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), fetch, reduction, zero ); } } // namespace TNL diff --git a/src/TNL/Containers/Expressions/ExpressionTemplates.h b/src/TNL/Containers/Expressions/ExpressionTemplates.h index 7baf37572ef8098fcefe260e711169560dae0cd2..93d7e802d3cb627227156e5026a1404f7b57da7c 100644 --- a/src/TNL/Containers/Expressions/ExpressionTemplates.h +++ b/src/TNL/Containers/Expressions/ExpressionTemplates.h @@ -896,7 +896,7 @@ Result evaluateAndReduce( Vector& lhs, RealType* lhs_data = lhs.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return ( lhs_data[ i ] = expression[ i ] ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), fetch, reduction, zero ); } template< typename Vector, @@ -915,7 +915,7 @@ Result evaluateAndReduce( Vector& lhs, RealType* lhs_data = lhs.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return ( lhs_data[ i ] = expression[ i ] ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), fetch, reduction, zero ); } //// @@ -941,7 +941,7 @@ Result addAndReduce( Vector& lhs, lhs_data[ i ] += aux; return aux; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), fetch, reduction, zero ); } template< typename Vector, @@ -964,7 +964,7 @@ Result addAndReduce( Vector& lhs, lhs_data[ i ] += aux; return aux; }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), fetch, reduction, zero ); } //// @@ -990,7 +990,7 @@ Result addAndReduceAbs( Vector& lhs, lhs_data[ i ] += aux; return TNL::abs( aux ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), fetch, reduction, zero ); } template< typename Vector, @@ -1013,7 +1013,7 @@ Result addAndReduceAbs( Vector& lhs, lhs_data[ i ] += aux; return TNL::abs( aux ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, lhs.getSize(), fetch, reduction, zero ); } } // namespace TNL diff --git a/src/TNL/Containers/Expressions/VerticalOperations.h b/src/TNL/Containers/Expressions/VerticalOperations.h index 8de97f06cb09bae3625d62c8e7fba104373f4e32..6e5f5624b22934f4caeb22e303ab51ad98c27072 100644 --- a/src/TNL/Containers/Expressions/VerticalOperations.h +++ b/src/TNL/Containers/Expressions/VerticalOperations.h @@ -43,7 +43,7 @@ auto ExpressionMin( const Expression& expression ) }; static_assert( std::numeric_limits< ResultType >::is_specialized, "std::numeric_limits is not specialized for the reduction's result type" ); - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), fetch, reduction, std::numeric_limits< ResultType >::max() ); } template< typename Expression > @@ -65,7 +65,7 @@ auto ExpressionArgMin( const Expression& expression ) }; static_assert( std::numeric_limits< ResultType >::is_specialized, "std::numeric_limits is not specialized for the reduction's result type" ); - return Algorithms::Reduction< typename Expression::DeviceType >::reduceWithArgument( ( IndexType ) 0, expression.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduceWithArgument( ( IndexType ) 0, expression.getSize(), fetch, reduction, std::numeric_limits< ResultType >::max() ); } template< typename Expression > @@ -85,7 +85,7 @@ auto ExpressionMax( const Expression& expression ) }; static_assert( std::numeric_limits< ResultType >::is_specialized, "std::numeric_limits is not specialized for the reduction's result type" ); - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), fetch, reduction, std::numeric_limits< ResultType >::lowest() ); } template< typename Expression > @@ -107,7 +107,7 @@ auto ExpressionArgMax( const Expression& expression ) }; static_assert( std::numeric_limits< ResultType >::is_specialized, "std::numeric_limits is not specialized for the reduction's result type" ); - return Algorithms::Reduction< typename Expression::DeviceType >::reduceWithArgument( ( IndexType ) 0, expression.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduceWithArgument( ( IndexType ) 0, expression.getSize(), fetch, reduction, std::numeric_limits< ResultType >::lowest() ); } template< typename Expression > @@ -119,7 +119,7 @@ auto ExpressionSum( const Expression& expression ) const auto view = expression.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; }; - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), std::plus<>{}, fetch, (ResultType) 0 ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), fetch, std::plus<>{}, (ResultType) 0 ); } template< typename Expression > @@ -131,7 +131,7 @@ auto ExpressionProduct( const Expression& expression ) const auto view = expression.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; }; - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), std::multiplies<>{}, fetch, (ResultType) 1 ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), fetch, std::multiplies<>{}, (ResultType) 1 ); } template< typename Expression > @@ -145,7 +145,7 @@ auto ExpressionLogicalAnd( const Expression& expression ) auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; }; static_assert( std::numeric_limits< ResultType >::is_specialized, "std::numeric_limits is not specialized for the reduction's result type" ); - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), std::logical_and<>{}, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), fetch, std::logical_and<>{}, std::numeric_limits< ResultType >::max() ); } template< typename Expression > @@ -157,7 +157,7 @@ auto ExpressionLogicalOr( const Expression& expression ) const auto view = expression.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; }; - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), std::logical_or<>{}, fetch, (ResultType) 0 ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), fetch, std::logical_or<>{}, (ResultType) 0 ); } template< typename Expression > @@ -171,7 +171,7 @@ auto ExpressionBinaryAnd( const Expression& expression ) auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; }; static_assert( std::numeric_limits< ResultType >::is_specialized, "std::numeric_limits is not specialized for the reduction's result type" ); - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), std::bit_and<>{}, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), fetch, std::bit_and<>{}, std::numeric_limits< ResultType >::max() ); } template< typename Expression > @@ -183,7 +183,7 @@ auto ExpressionBinaryOr( const Expression& expression ) const auto view = expression.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; }; - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), std::bit_or<>{}, fetch, (ResultType) 0 ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), fetch, std::bit_or<>{}, (ResultType) 0 ); } } // namespace Expressions diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index f2532a47bb17a87f3dc009dc55dfbe2f94ce3738..97e82af0e31937860a94466ff95d0837936c3b83 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -156,7 +156,7 @@ getNonzeroElementsCount() const auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { return ( values_view[ i ] != 0.0 ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, this->values.getSize(), std::plus<>{}, fetch, 0 ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, this->values.getSize(), fetch, std::plus<>{}, 0 ); } template< typename Real, diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp index 512287935077ed6f41b816c5eb983feb015daf74..57c79cd769704d6ddf576167cafe9d1b9b56eb26 100644 --- a/src/TNL/Matrices/Matrix.hpp +++ b/src/TNL/Matrices/Matrix.hpp @@ -85,7 +85,7 @@ Index Matrix< Real, Device, Index, RealAllocator >::getNonzeroElementsCount() co auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { return ( values_view[ i ] != 0.0 ); }; - return Algorithms::Reduction< DeviceType >::reduce( 0, this->values.getSize(), std::plus<>{}, fetch, 0 ); + return Algorithms::Reduction< DeviceType >::reduce( 0, this->values.getSize(), fetch, std::plus<>{}, 0 ); } template< typename Real, diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp index 8c20d07d1a18880e34db66c12aeb21f4493e01ee..83563a82570a4f4b98e12b125c4d447f1492b982 100644 --- a/src/TNL/Matrices/MatrixView.hpp +++ b/src/TNL/Matrices/MatrixView.hpp @@ -63,7 +63,7 @@ getNonzeroElementsCount() const auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { return ( values_view[ i ] != 0.0 ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, this->values.getSize(), std::plus<>{}, fetch, 0 ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, this->values.getSize(), fetch, std::plus<>{}, 0 ); } template< typename Real, diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp index 844e1721f1b689d9b51b20eb716112083a774eb2..44c43da7f9640f4f23d5b47e2c37f4f36e60b42a 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp +++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp @@ -173,7 +173,7 @@ getNonzeroElementsCount() const auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { return ( values_view[ i ] != 0.0 ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, this->values.getSize(), std::plus<>{}, fetch, 0 ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, this->values.getSize(), fetch, std::plus<>{}, 0 ); } template< typename Real, diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index c26b3ee05fd64193087724e4c7188336e338170b..e7842a50a5065fb4ccf322fe92ba533497e326f5 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -191,7 +191,7 @@ getNonzeroElementsCount() const auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { return ( columns_view[ i ] != paddingIndex ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, this->columnIndexes.getSize(), std::plus<>{}, fetch, 0 ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, this->columnIndexes.getSize(), fetch, std::plus<>{}, 0 ); } else { @@ -799,7 +799,7 @@ operator==( const Matrix& m ) const { return view1.getRow( i ) == view2.getRow( i ); }; - return Algorithms::Reduction< DeviceType >::reduce( 0, this->getRows(), std::logical_and<>{}, fetch, true ); + return Algorithms::Reduction< DeviceType >::reduce( 0, this->getRows(), fetch, std::logical_and<>{}, true ); } template< typename Real, diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp index c0b6547fbacc19d9c1117984ff8d35fba6731601..c125ffe222d690e5153d51e82e995bdf48372ea8 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.hpp +++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp @@ -133,7 +133,7 @@ getNonzeroElementsCount() const auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { return ( values_view[ i ] != 0.0 ); }; - return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, this->values.getSize(), std::plus<>{}, fetch, 0 ); + return Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, this->values.getSize(), fetch, std::plus<>{}, 0 ); } template< typename Real, diff --git a/src/TNL/Meshes/MeshDetails/layers/EntityTags/Layer.h b/src/TNL/Meshes/MeshDetails/layers/EntityTags/Layer.h index dab80fc7e3916d89bda1ab57581b6a9263d3891d..dc0c767b806000f2497f2cae0e32b1d67aa0e5cf 100644 --- a/src/TNL/Meshes/MeshDetails/layers/EntityTags/Layer.h +++ b/src/TNL/Meshes/MeshDetails/layers/EntityTags/Layer.h @@ -139,8 +139,8 @@ public: { return bool(tags_view[ entityIndex ] & EntityTags::GhostEntity); }; - const GlobalIndexType boundaryEntities = Algorithms::Reduction< Device >::reduce( (GlobalIndexType) 0, tags.getSize(), std::plus<>{}, is_boundary, (GlobalIndexType) 0 ); - const GlobalIndexType ghostEntities = Algorithms::Reduction< Device >::reduce( (GlobalIndexType) 0, tags.getSize(), std::plus<>{}, is_ghost, (GlobalIndexType) 0 ); + const GlobalIndexType boundaryEntities = Algorithms::Reduction< Device >::reduce( (GlobalIndexType) 0, tags.getSize(), is_boundary, std::plus<>{}, (GlobalIndexType) 0 ); + const GlobalIndexType ghostEntities = Algorithms::Reduction< Device >::reduce( (GlobalIndexType) 0, tags.getSize(), is_ghost, std::plus<>{}, (GlobalIndexType) 0 ); interiorIndices.setSize( tags.getSize() - boundaryEntities ); boundaryIndices.setSize( boundaryEntities ); diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h index 3ac1f38ff16d19c10150ca02675cf3a76c5c13cd..9cd7c3db05b7cdcda7fb2628102db3e6b2cd6f4c 100644 --- a/src/UnitTests/Matrices/DenseMatrixTest.h +++ b/src/UnitTests/Matrices/DenseMatrixTest.h @@ -448,7 +448,7 @@ void test_SetElement() auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return ( v_view[ i ] == m_view.getElement( i, i ) ); }; - EXPECT_TRUE( TNL::Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, m.getRows(), std::logical_and<>{}, fetch, true ) ); + EXPECT_TRUE( TNL::Algorithms::Reduction< DeviceType >::reduce( ( IndexType ) 0, m.getRows(), fetch, std::logical_and<>{}, true ) ); }