Loading src/Benchmarks/BLAS/CommonVectorOperations.hpp +9 −18 Original line number Diff line number Diff line Loading @@ -97,8 +97,7 @@ getVectorL1Norm( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return a + b; }; return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, ( ResultType ) 0 ); return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); } template< typename Device > Loading @@ -114,8 +113,7 @@ getVectorL2Norm( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data[ i ] * data[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return a + b; }; return std::sqrt( Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, ( ResultType ) 0 ) ); return std::sqrt( Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ) ); } template< typename Device > Loading @@ -138,8 +136,7 @@ getVectorLpNorm( const Vector& v, const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data[ i ] ), p ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return a + b; }; return std::pow( Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, ( ResultType ) 0 ), 1.0 / p ); return std::pow( Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ), 1.0 / p ); } template< typename Device > Loading @@ -158,8 +155,7 @@ getVectorSum( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return a + b; }; return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, ( ResultType ) 0 ); return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); } template< typename Device > Loading Loading @@ -258,8 +254,7 @@ getVectorDifferenceL1Norm( const Vector1& v1, const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return a + b; }; return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, ( ResultType ) 0 ); return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); } template< typename Device > Loading @@ -281,8 +276,7 @@ getVectorDifferenceL2Norm( const Vector1& v1, auto diff = data1[ i ] - data2[ i ]; return diff * diff; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return a + b; }; return std::sqrt( Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, ( ResultType ) 0 ) ); return std::sqrt( Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ) ); } template< typename Device > Loading @@ -308,8 +302,7 @@ getVectorDifferenceLpNorm( const Vector1& v1, const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data1[ i ] - data2[ i ] ), p ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return a + b; }; return std::pow( Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, ( ResultType ) 0 ), 1.0 / p ); return std::pow( Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ), 1.0 / p ); } template< typename Device > Loading @@ -328,8 +321,7 @@ getVectorDifferenceSum( const Vector1& v1, const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return a + b; }; return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, ( ResultType ) 0 ); return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); } template< typename Device > Loading @@ -348,8 +340,7 @@ getScalarProduct( const Vector1& v1, const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] * data2[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return a + b; }; return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, ( ResultType ) 0 ); return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); } } // namespace Benchmarks Loading src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp +3 −6 Original line number Diff line number Diff line Loading @@ -133,8 +133,7 @@ compare( const Element1* destination, TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return destination[ i ] == source[ i ]; }; auto reduction = [] __cuda_callable__ ( bool a, bool b ) { return a && b; }; return Reduction< Devices::Cuda >::reduce( size, reduction, fetch, true ); return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true ); } template< typename Element, Loading @@ -150,8 +149,7 @@ containsValue( const Element* data, TNL_ASSERT_GE( size, (Index) 0, "" ); auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; }; auto reduction = [] __cuda_callable__ ( bool a, bool b ) { return a || b; }; return Reduction< Devices::Cuda >::reduce( size, reduction, fetch, false ); return Reduction< Devices::Cuda >::reduce( size, std::logical_or<>{}, fetch, false ); } template< typename Element, Loading @@ -167,8 +165,7 @@ containsOnlyValue( const Element* data, TNL_ASSERT_GE( size, 0, "" ); auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; }; auto reduction = [] __cuda_callable__ ( bool a, bool b ) { return a && b; }; return Reduction< Devices::Cuda >::reduce( size, reduction, fetch, true ); return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true ); } Loading src/TNL/Containers/Algorithms/Multireduction.h +6 −0 Original line number Diff line number Diff line Loading @@ -12,6 +12,8 @@ #pragma once #include <functional> // reduction functions like std::plus, std::logical_and, std::logical_or etc. #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> Loading @@ -32,6 +34,8 @@ struct Multireduction< Devices::Host > * the i-th value to be reduced from the j-th dataset * (i = 0,...,size-1; j = 0,...,n-1) * reduction: callable object representing the reduction operation * for example, it can be an instance of std::plus, std::logical_and, * std::logical_or etc. * size: the size of each dataset * n: number of datasets to be reduced * result: output array of size = n Loading Loading @@ -59,6 +63,8 @@ struct Multireduction< Devices::Cuda > * the i-th value to be reduced from the j-th dataset * (i = 0,...,size-1; j = 0,...,n-1) * reduction: callable object representing the reduction operation * for example, it can be an instance of std::plus, std::logical_and, * std::logical_or etc. * size: the size of each dataset * n: number of datasets to be reduced * hostResult: output array of size = n Loading src/TNL/Containers/Algorithms/Reduction.h +5 −4 Original line number Diff line number Diff line Loading @@ -13,6 +13,7 @@ #pragma once #include <utility> // std::pair #include <functional> // reduction functions like std::plus, std::logical_and, std::logical_or etc. #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> Loading @@ -33,7 +34,7 @@ struct Reduction< Devices::Host > typename DataFetcher > static Result reduce( const Index size, ReductionOperation& reduction, const ReductionOperation& reduction, DataFetcher& dataFetcher, const Result& zero ); Loading @@ -43,7 +44,7 @@ struct Reduction< Devices::Host > typename DataFetcher > static std::pair< Index, Result > reduceWithArgument( const Index size, ReductionOperation& reduction, const ReductionOperation& reduction, DataFetcher& dataFetcher, const Result& zero ); }; Loading @@ -57,7 +58,7 @@ struct Reduction< Devices::Cuda > typename DataFetcher > static Result reduce( const Index size, ReductionOperation& reduction, const ReductionOperation& reduction, DataFetcher& dataFetcher, const Result& zero ); Loading @@ -67,7 +68,7 @@ struct Reduction< Devices::Cuda > typename DataFetcher > static std::pair< Index, Result > reduceWithArgument( const Index size, ReductionOperation& reduction, const ReductionOperation& reduction, DataFetcher& dataFetcher, const Result& zero ); }; Loading src/TNL/Containers/Algorithms/Reduction.hpp +4 −4 Original line number Diff line number Diff line Loading @@ -45,7 +45,7 @@ template< typename Index, Result Reduction< Devices::Host >:: reduce( const Index size, ReductionOperation& reduction, const ReductionOperation& reduction, DataFetcher& dataFetcher, const Result& zero ) { Loading Loading @@ -137,7 +137,7 @@ template< typename Index, std::pair< Index, Result > Reduction< Devices::Host >:: reduceWithArgument( const Index size, ReductionOperation& reduction, const ReductionOperation& reduction, DataFetcher& dataFetcher, const Result& zero ) { Loading Loading @@ -260,7 +260,7 @@ template< typename Index, Result Reduction< Devices::Cuda >:: reduce( const Index size, ReductionOperation& reduction, const ReductionOperation& reduction, DataFetcher& dataFetcher, const Result& zero ) { Loading Loading @@ -336,7 +336,7 @@ template< typename Index, std::pair< Index, Result > Reduction< Devices::Cuda >:: reduceWithArgument( const Index size, ReductionOperation& reduction, const ReductionOperation& reduction, DataFetcher& dataFetcher, const Result& zero ) { Loading Loading
src/Benchmarks/BLAS/CommonVectorOperations.hpp +9 −18 Original line number Diff line number Diff line Loading @@ -97,8 +97,7 @@ getVectorL1Norm( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return a + b; }; return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, ( ResultType ) 0 ); return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); } template< typename Device > Loading @@ -114,8 +113,7 @@ getVectorL2Norm( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data[ i ] * data[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return a + b; }; return std::sqrt( Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, ( ResultType ) 0 ) ); return std::sqrt( Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ) ); } template< typename Device > Loading @@ -138,8 +136,7 @@ getVectorLpNorm( const Vector& v, const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data[ i ] ), p ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return a + b; }; return std::pow( Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, ( ResultType ) 0 ), 1.0 / p ); return std::pow( Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ), 1.0 / p ); } template< typename Device > Loading @@ -158,8 +155,7 @@ getVectorSum( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return a + b; }; return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, ( ResultType ) 0 ); return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); } template< typename Device > Loading Loading @@ -258,8 +254,7 @@ getVectorDifferenceL1Norm( const Vector1& v1, const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return a + b; }; return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, ( ResultType ) 0 ); return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); } template< typename Device > Loading @@ -281,8 +276,7 @@ getVectorDifferenceL2Norm( const Vector1& v1, auto diff = data1[ i ] - data2[ i ]; return diff * diff; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return a + b; }; return std::sqrt( Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, ( ResultType ) 0 ) ); return std::sqrt( Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ) ); } template< typename Device > Loading @@ -308,8 +302,7 @@ getVectorDifferenceLpNorm( const Vector1& v1, const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data1[ i ] - data2[ i ] ), p ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return a + b; }; return std::pow( Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, ( ResultType ) 0 ), 1.0 / p ); return std::pow( Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ), 1.0 / p ); } template< typename Device > Loading @@ -328,8 +321,7 @@ getVectorDifferenceSum( const Vector1& v1, const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return a + b; }; return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, ( ResultType ) 0 ); return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); } template< typename Device > Loading @@ -348,8 +340,7 @@ getScalarProduct( const Vector1& v1, const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] * data2[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return a + b; }; return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, ( ResultType ) 0 ); return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); } } // namespace Benchmarks Loading
src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp +3 −6 Original line number Diff line number Diff line Loading @@ -133,8 +133,7 @@ compare( const Element1* destination, TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return destination[ i ] == source[ i ]; }; auto reduction = [] __cuda_callable__ ( bool a, bool b ) { return a && b; }; return Reduction< Devices::Cuda >::reduce( size, reduction, fetch, true ); return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true ); } template< typename Element, Loading @@ -150,8 +149,7 @@ containsValue( const Element* data, TNL_ASSERT_GE( size, (Index) 0, "" ); auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; }; auto reduction = [] __cuda_callable__ ( bool a, bool b ) { return a || b; }; return Reduction< Devices::Cuda >::reduce( size, reduction, fetch, false ); return Reduction< Devices::Cuda >::reduce( size, std::logical_or<>{}, fetch, false ); } template< typename Element, Loading @@ -167,8 +165,7 @@ containsOnlyValue( const Element* data, TNL_ASSERT_GE( size, 0, "" ); auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; }; auto reduction = [] __cuda_callable__ ( bool a, bool b ) { return a && b; }; return Reduction< Devices::Cuda >::reduce( size, reduction, fetch, true ); return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true ); } Loading
src/TNL/Containers/Algorithms/Multireduction.h +6 −0 Original line number Diff line number Diff line Loading @@ -12,6 +12,8 @@ #pragma once #include <functional> // reduction functions like std::plus, std::logical_and, std::logical_or etc. #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> Loading @@ -32,6 +34,8 @@ struct Multireduction< Devices::Host > * the i-th value to be reduced from the j-th dataset * (i = 0,...,size-1; j = 0,...,n-1) * reduction: callable object representing the reduction operation * for example, it can be an instance of std::plus, std::logical_and, * std::logical_or etc. * size: the size of each dataset * n: number of datasets to be reduced * result: output array of size = n Loading Loading @@ -59,6 +63,8 @@ struct Multireduction< Devices::Cuda > * the i-th value to be reduced from the j-th dataset * (i = 0,...,size-1; j = 0,...,n-1) * reduction: callable object representing the reduction operation * for example, it can be an instance of std::plus, std::logical_and, * std::logical_or etc. * size: the size of each dataset * n: number of datasets to be reduced * hostResult: output array of size = n Loading
src/TNL/Containers/Algorithms/Reduction.h +5 −4 Original line number Diff line number Diff line Loading @@ -13,6 +13,7 @@ #pragma once #include <utility> // std::pair #include <functional> // reduction functions like std::plus, std::logical_and, std::logical_or etc. #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> Loading @@ -33,7 +34,7 @@ struct Reduction< Devices::Host > typename DataFetcher > static Result reduce( const Index size, ReductionOperation& reduction, const ReductionOperation& reduction, DataFetcher& dataFetcher, const Result& zero ); Loading @@ -43,7 +44,7 @@ struct Reduction< Devices::Host > typename DataFetcher > static std::pair< Index, Result > reduceWithArgument( const Index size, ReductionOperation& reduction, const ReductionOperation& reduction, DataFetcher& dataFetcher, const Result& zero ); }; Loading @@ -57,7 +58,7 @@ struct Reduction< Devices::Cuda > typename DataFetcher > static Result reduce( const Index size, ReductionOperation& reduction, const ReductionOperation& reduction, DataFetcher& dataFetcher, const Result& zero ); Loading @@ -67,7 +68,7 @@ struct Reduction< Devices::Cuda > typename DataFetcher > static std::pair< Index, Result > reduceWithArgument( const Index size, ReductionOperation& reduction, const ReductionOperation& reduction, DataFetcher& dataFetcher, const Result& zero ); }; Loading
src/TNL/Containers/Algorithms/Reduction.hpp +4 −4 Original line number Diff line number Diff line Loading @@ -45,7 +45,7 @@ template< typename Index, Result Reduction< Devices::Host >:: reduce( const Index size, ReductionOperation& reduction, const ReductionOperation& reduction, DataFetcher& dataFetcher, const Result& zero ) { Loading Loading @@ -137,7 +137,7 @@ template< typename Index, std::pair< Index, Result > Reduction< Devices::Host >:: reduceWithArgument( const Index size, ReductionOperation& reduction, const ReductionOperation& reduction, DataFetcher& dataFetcher, const Result& zero ) { Loading Loading @@ -260,7 +260,7 @@ template< typename Index, Result Reduction< Devices::Cuda >:: reduce( const Index size, ReductionOperation& reduction, const ReductionOperation& reduction, DataFetcher& dataFetcher, const Result& zero ) { Loading Loading @@ -336,7 +336,7 @@ template< typename Index, std::pair< Index, Result > Reduction< Devices::Cuda >:: reduceWithArgument( const Index size, ReductionOperation& reduction, const ReductionOperation& reduction, DataFetcher& dataFetcher, const Result& zero ) { Loading