Loading src/TNL/Containers/Algorithms/ArrayAssignment.h +0 −2 Original line number Diff line number Diff line Loading @@ -86,8 +86,6 @@ struct ArrayAssignment< Array, T, false > }; } // namespace Algorithms } // namespace Containers } // namespace TNL src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp +16 −9 Original line number Diff line number Diff line Loading @@ -193,7 +193,6 @@ copySTLList( DestinationElement* destination, copiedElements += copySize; } } template< typename Element1, typename Element2, typename Index > Loading @@ -205,8 +204,11 @@ compareMemory( const Element1* destination, { TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." ); TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); Algorithms::ParallelReductionEqualities< Element1, Element2 > reductionEqualities; return Reduction< Devices::Cuda >::reduce( reductionEqualities, size, destination, source ); auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return ( destination[ i ] == source[ i ] ); }; auto reduction = [=] __cuda_callable__ ( bool& a, const bool& b ) { a &= b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile bool& a, volatile bool& b ) { a &= b; }; return Reduction< Devices::Cuda >::reduce( size, reduction, volatileReduction, fetch, true ); } template< typename Element, Loading @@ -219,10 +221,12 @@ containsValue( const Element* data, { TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." ); TNL_ASSERT_GE( size, 0, "" ); if( size == 0 ) return false; Algorithms::ParallelReductionContainsValue< Element > reductionContainsValue; reductionContainsValue.setValue( value ); return Reduction< Devices::Cuda >::reduce( reductionContainsValue, size, data, nullptr ); auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return ( data[ i ] == value ); }; auto reduction = [=] __cuda_callable__ ( bool& a, const bool& b ) { a |= b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile bool& a, volatile bool& b ) { a |= b; }; return Reduction< Devices::Cuda >::reduce( size, reduction, volatileReduction, fetch, false ); } template< typename Element, Loading @@ -236,9 +240,12 @@ containsOnlyValue( const Element* data, TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." ); TNL_ASSERT_GE( size, 0, "" ); if( size == 0 ) return false; Algorithms::ParallelReductionContainsOnlyValue< Element > reductionContainsOnlyValue; reductionContainsOnlyValue.setValue( value ); return Reduction< Devices::Cuda >::reduce( reductionContainsOnlyValue, size, data, nullptr ); if( size == 0 ) return false; auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return ( data[ i ] == value ); }; auto reduction = [=] __cuda_callable__ ( bool& a, const bool& b ) { a &= b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile bool& a, volatile bool& b ) { a &= b; }; return Reduction< Devices::Cuda >::reduce( size, reduction, volatileReduction, fetch, true ); } Loading src/TNL/Containers/Algorithms/CommonVectorOperations.h 0 → 100644 +79 −0 Original line number Diff line number Diff line /*************************************************************************** CommonVectorOperations.h - description ------------------- begin : Apr 12, 2019 copyright : (C) 2019 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ #pragma once namespace TNL { namespace Containers { namespace Algorithms { template< typename Device > struct CommonVectorOperations { using DeviceType = Device; template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorMax( const Vector& v ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorMin( const Vector& v ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorAbsMax( const Vector& v ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorAbsMin( const Vector& v ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorL1Norm( const Vector& v ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorL2Norm( const Vector& v ); template< typename Vector, typename ResultType = typename Vector::RealType, typename Scalar > static ResultType getVectorLpNorm( const Vector& v, const Scalar p ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorSum( const Vector& v ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceMax( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceMin( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceAbsMax( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceAbsMin( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceL1Norm( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Scalar > static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Scalar p ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getScalarProduct( const Vector1& v1, const Vector2& v2 ); }; } // namespace Algorithms } // namespace Containers } // namespace TNL #include <TNL/Containers/Algorithms/CommonVectorOperations.hpp> src/TNL/Containers/Algorithms/CommonVectorOperations.hpp 0 → 100644 +375 −0 Original line number Diff line number Diff line /*************************************************************************** CommonVectorOperations.hpp - description ------------------- begin : Apr 12, 2019 copyright : (C) 2019 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ #pragma once #include <TNL/Containers/Algorithms/CommonVectorOperations.h> #include <TNL/Containers/Algorithms/Reduction.h> namespace TNL { namespace Containers { namespace Algorithms { template< typename Device > template< typename Vector, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorMax( const Vector& v ) { TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::max( a, b ); }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::max( a, b ); }; return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::lowest() ); } template< typename Device > template< typename Vector, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorMin( const Vector& v ) { TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); }; return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() ); } template< typename Device > template< typename Vector, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorAbsMax( const Vector& v ) { TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::max( a, b ); }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::max( a, b ); }; return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::lowest() ); } template< typename Device > template< typename Vector, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorAbsMin( const Vector& v ) { TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); }; return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() ); } template< typename Device > template< typename Vector, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorL1Norm( const Vector& v ) { TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ); } template< typename Device > template< typename Vector, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorL2Norm( const Vector& v ) { TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data[ i ] * data[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; return std::sqrt( Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ) ); } template< typename Device > template< typename Vector, typename ResultType, typename Scalar > ResultType CommonVectorOperations< Device >:: getVectorLpNorm( const Vector& v, const Scalar p ) { TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_GE( p, 1.0, "Parameter of the L^p norm must be at least 1.0." ); using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; if( p == 1.0 ) return getVectorL1Norm< Vector, ResultType >( v ); if( p == 2.0 ) return getVectorL2Norm< Vector, ResultType >( v ); const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data[ i ] ), p ); }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; return std::pow( Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ), 1.0 / p ); } template< typename Device > template< typename Vector, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorSum( const Vector& v ) { TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); if( std::is_same< ResultType, bool >::value ) abort(); using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ); } template< typename Device > template< typename Vector1, typename Vector2, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorDifferenceMax( const Vector1& v1, const Vector2& v2 ) { TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); using RealType = typename Vector1::RealType; using IndexType = typename Vector1::IndexType; const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::max( a, b ); }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::max( a, b ); }; return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::lowest() ); } template< typename Device > template< typename Vector1, typename Vector2, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorDifferenceMin( const Vector1& v1, const Vector2& v2 ) { TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); using RealType = typename Vector1::RealType; using IndexType = typename Vector1::IndexType; const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); }; return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() ); } template< typename Device > template< typename Vector1, typename Vector2, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorDifferenceAbsMax( const Vector1& v1, const Vector2& v2 ) { TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); using RealType = typename Vector1::RealType; using IndexType = typename Vector1::IndexType; const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::max( a, b ); }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::max( a, b ); }; return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::lowest() ); } template< typename Device > template< typename Vector1, typename Vector2, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorDifferenceAbsMin( const Vector1& v1, const Vector2& v2 ) { TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); using RealType = typename Vector1::RealType; using IndexType = typename Vector1::IndexType; const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); }; return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() ); } template< typename Device > template< typename Vector1, typename Vector2, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorDifferenceL1Norm( const Vector1& v1, const Vector2& v2 ) { TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); using RealType = typename Vector1::RealType; using IndexType = typename Vector1::IndexType; const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ); } template< typename Device > template< typename Vector1, typename Vector2, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 ) { TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); using RealType = typename Vector1::RealType; using IndexType = typename Vector1::IndexType; const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { auto diff = data1[ i ] - data2[ i ]; return diff * diff; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; return std::sqrt( Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ) ); } template< typename Device > template< typename Vector1, typename Vector2, typename ResultType, typename Scalar > ResultType CommonVectorOperations< Device >:: getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Scalar p ) { TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); TNL_ASSERT_GE( p, 1.0, "Parameter of the L^p norm must be at least 1.0." ); if( p == 1.0 ) return getVectorDifferenceL1Norm< Vector1, Vector2, ResultType >( v1, v2 ); if( p == 2.0 ) return getVectorDifferenceL2Norm< Vector1, Vector2, ResultType >( v1, v2 ); using RealType = typename Vector1::RealType; using IndexType = typename Vector1::IndexType; const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data1[ i ] - data2[ i ] ), p ); }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; return std::pow( Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ), 1.0 / p ); } template< typename Device > template< typename Vector1, typename Vector2, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 ) { TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); using RealType = typename Vector1::RealType; using IndexType = typename Vector1::IndexType; const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ); } template< typename Device > template< typename Vector1, typename Vector2, typename ResultType > ResultType CommonVectorOperations< Device >:: getScalarProduct( const Vector1& v1, const Vector2& v2 ) { TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); using RealType = typename Vector1::RealType; using IndexType = typename Vector1::IndexType; const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] * data2[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ); } } // namespace Algorithms } // namespace Containers } // namespace TNL src/TNL/Containers/Algorithms/CudaReductionKernel.h +188 −122 File changed.Preview size limit exceeded, changes collapsed. Show changes Loading
src/TNL/Containers/Algorithms/ArrayAssignment.h +0 −2 Original line number Diff line number Diff line Loading @@ -86,8 +86,6 @@ struct ArrayAssignment< Array, T, false > }; } // namespace Algorithms } // namespace Containers } // namespace TNL
src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp +16 −9 Original line number Diff line number Diff line Loading @@ -193,7 +193,6 @@ copySTLList( DestinationElement* destination, copiedElements += copySize; } } template< typename Element1, typename Element2, typename Index > Loading @@ -205,8 +204,11 @@ compareMemory( const Element1* destination, { TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." ); TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); Algorithms::ParallelReductionEqualities< Element1, Element2 > reductionEqualities; return Reduction< Devices::Cuda >::reduce( reductionEqualities, size, destination, source ); auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return ( destination[ i ] == source[ i ] ); }; auto reduction = [=] __cuda_callable__ ( bool& a, const bool& b ) { a &= b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile bool& a, volatile bool& b ) { a &= b; }; return Reduction< Devices::Cuda >::reduce( size, reduction, volatileReduction, fetch, true ); } template< typename Element, Loading @@ -219,10 +221,12 @@ containsValue( const Element* data, { TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." ); TNL_ASSERT_GE( size, 0, "" ); if( size == 0 ) return false; Algorithms::ParallelReductionContainsValue< Element > reductionContainsValue; reductionContainsValue.setValue( value ); return Reduction< Devices::Cuda >::reduce( reductionContainsValue, size, data, nullptr ); auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return ( data[ i ] == value ); }; auto reduction = [=] __cuda_callable__ ( bool& a, const bool& b ) { a |= b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile bool& a, volatile bool& b ) { a |= b; }; return Reduction< Devices::Cuda >::reduce( size, reduction, volatileReduction, fetch, false ); } template< typename Element, Loading @@ -236,9 +240,12 @@ containsOnlyValue( const Element* data, TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." ); TNL_ASSERT_GE( size, 0, "" ); if( size == 0 ) return false; Algorithms::ParallelReductionContainsOnlyValue< Element > reductionContainsOnlyValue; reductionContainsOnlyValue.setValue( value ); return Reduction< Devices::Cuda >::reduce( reductionContainsOnlyValue, size, data, nullptr ); if( size == 0 ) return false; auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return ( data[ i ] == value ); }; auto reduction = [=] __cuda_callable__ ( bool& a, const bool& b ) { a &= b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile bool& a, volatile bool& b ) { a &= b; }; return Reduction< Devices::Cuda >::reduce( size, reduction, volatileReduction, fetch, true ); } Loading
src/TNL/Containers/Algorithms/CommonVectorOperations.h 0 → 100644 +79 −0 Original line number Diff line number Diff line /*************************************************************************** CommonVectorOperations.h - description ------------------- begin : Apr 12, 2019 copyright : (C) 2019 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ #pragma once namespace TNL { namespace Containers { namespace Algorithms { template< typename Device > struct CommonVectorOperations { using DeviceType = Device; template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorMax( const Vector& v ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorMin( const Vector& v ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorAbsMax( const Vector& v ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorAbsMin( const Vector& v ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorL1Norm( const Vector& v ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorL2Norm( const Vector& v ); template< typename Vector, typename ResultType = typename Vector::RealType, typename Scalar > static ResultType getVectorLpNorm( const Vector& v, const Scalar p ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorSum( const Vector& v ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceMax( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceMin( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceAbsMax( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceAbsMin( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceL1Norm( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Scalar > static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Scalar p ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getScalarProduct( const Vector1& v1, const Vector2& v2 ); }; } // namespace Algorithms } // namespace Containers } // namespace TNL #include <TNL/Containers/Algorithms/CommonVectorOperations.hpp>
src/TNL/Containers/Algorithms/CommonVectorOperations.hpp 0 → 100644 +375 −0 Original line number Diff line number Diff line /*************************************************************************** CommonVectorOperations.hpp - description ------------------- begin : Apr 12, 2019 copyright : (C) 2019 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ #pragma once #include <TNL/Containers/Algorithms/CommonVectorOperations.h> #include <TNL/Containers/Algorithms/Reduction.h> namespace TNL { namespace Containers { namespace Algorithms { template< typename Device > template< typename Vector, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorMax( const Vector& v ) { TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::max( a, b ); }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::max( a, b ); }; return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::lowest() ); } template< typename Device > template< typename Vector, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorMin( const Vector& v ) { TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); }; return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() ); } template< typename Device > template< typename Vector, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorAbsMax( const Vector& v ) { TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::max( a, b ); }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::max( a, b ); }; return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::lowest() ); } template< typename Device > template< typename Vector, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorAbsMin( const Vector& v ) { TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); }; return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() ); } template< typename Device > template< typename Vector, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorL1Norm( const Vector& v ) { TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ); } template< typename Device > template< typename Vector, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorL2Norm( const Vector& v ) { TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data[ i ] * data[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; return std::sqrt( Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ) ); } template< typename Device > template< typename Vector, typename ResultType, typename Scalar > ResultType CommonVectorOperations< Device >:: getVectorLpNorm( const Vector& v, const Scalar p ) { TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_GE( p, 1.0, "Parameter of the L^p norm must be at least 1.0." ); using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; if( p == 1.0 ) return getVectorL1Norm< Vector, ResultType >( v ); if( p == 2.0 ) return getVectorL2Norm< Vector, ResultType >( v ); const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data[ i ] ), p ); }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; return std::pow( Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ), 1.0 / p ); } template< typename Device > template< typename Vector, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorSum( const Vector& v ) { TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); if( std::is_same< ResultType, bool >::value ) abort(); using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ); } template< typename Device > template< typename Vector1, typename Vector2, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorDifferenceMax( const Vector1& v1, const Vector2& v2 ) { TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); using RealType = typename Vector1::RealType; using IndexType = typename Vector1::IndexType; const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::max( a, b ); }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::max( a, b ); }; return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::lowest() ); } template< typename Device > template< typename Vector1, typename Vector2, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorDifferenceMin( const Vector1& v1, const Vector2& v2 ) { TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); using RealType = typename Vector1::RealType; using IndexType = typename Vector1::IndexType; const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); }; return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() ); } template< typename Device > template< typename Vector1, typename Vector2, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorDifferenceAbsMax( const Vector1& v1, const Vector2& v2 ) { TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); using RealType = typename Vector1::RealType; using IndexType = typename Vector1::IndexType; const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::max( a, b ); }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::max( a, b ); }; return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::lowest() ); } template< typename Device > template< typename Vector1, typename Vector2, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorDifferenceAbsMin( const Vector1& v1, const Vector2& v2 ) { TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); using RealType = typename Vector1::RealType; using IndexType = typename Vector1::IndexType; const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); }; return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() ); } template< typename Device > template< typename Vector1, typename Vector2, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorDifferenceL1Norm( const Vector1& v1, const Vector2& v2 ) { TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); using RealType = typename Vector1::RealType; using IndexType = typename Vector1::IndexType; const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ); } template< typename Device > template< typename Vector1, typename Vector2, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 ) { TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); using RealType = typename Vector1::RealType; using IndexType = typename Vector1::IndexType; const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { auto diff = data1[ i ] - data2[ i ]; return diff * diff; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; return std::sqrt( Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ) ); } template< typename Device > template< typename Vector1, typename Vector2, typename ResultType, typename Scalar > ResultType CommonVectorOperations< Device >:: getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Scalar p ) { TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); TNL_ASSERT_GE( p, 1.0, "Parameter of the L^p norm must be at least 1.0." ); if( p == 1.0 ) return getVectorDifferenceL1Norm< Vector1, Vector2, ResultType >( v1, v2 ); if( p == 2.0 ) return getVectorDifferenceL2Norm< Vector1, Vector2, ResultType >( v1, v2 ); using RealType = typename Vector1::RealType; using IndexType = typename Vector1::IndexType; const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data1[ i ] - data2[ i ] ), p ); }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; return std::pow( Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ), 1.0 / p ); } template< typename Device > template< typename Vector1, typename Vector2, typename ResultType > ResultType CommonVectorOperations< Device >:: getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 ) { TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); using RealType = typename Vector1::RealType; using IndexType = typename Vector1::IndexType; const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ); } template< typename Device > template< typename Vector1, typename Vector2, typename ResultType > ResultType CommonVectorOperations< Device >:: getScalarProduct( const Vector1& v1, const Vector2& v2 ) { TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); using RealType = typename Vector1::RealType; using IndexType = typename Vector1::IndexType; const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] * data2[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ); } } // namespace Algorithms } // namespace Containers } // namespace TNL
src/TNL/Containers/Algorithms/CudaReductionKernel.h +188 −122 File changed.Preview size limit exceeded, changes collapsed. Show changes