Loading src/TNL/Algorithms/CudaReductionKernel.h +3 −3 Original line number Diff line number Diff line Loading @@ -351,7 +351,7 @@ struct CudaReductionKernelLauncher // Copy result on CPU Result result; MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result, output, 1 ); MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( &result, output, 1 ); return result; } Loading Loading @@ -384,8 +384,8 @@ struct CudaReductionKernelLauncher //// // Copy result on CPU std::pair< Index, Result > result; MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result.first, idxOutput, 1 ); MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result.second, output, 1 ); MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( &result.first, idxOutput, 1 ); MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( &result.second, output, 1 ); return result; } Loading src/TNL/Algorithms/MemoryOperations.h +3 −3 Original line number Diff line number Diff line Loading @@ -10,6 +10,7 @@ #pragma once #include <TNL/Devices/Sequential.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Cuda/CudaCallable.h> Loading @@ -17,12 +18,11 @@ namespace TNL { namespace Algorithms { template< typename DestinationExecution > template< typename DestinationDevice > struct MemoryOperations; // TODO: change "void" to "Execution::Sequential" template<> struct MemoryOperations< void > struct MemoryOperations< Devices::Sequential > { template< typename Element > __cuda_callable__ Loading src/TNL/Algorithms/MemoryOperationsHost.hpp +3 −3 Original line number Diff line number Diff line Loading @@ -93,7 +93,7 @@ copyFromIterator( DestinationElement* destination, SourceIterator first, SourceIterator last ) { MemoryOperations< void >::copyFromIterator( destination, destinationSize, first, last ); MemoryOperations< Devices::Sequential >::copyFromIterator( destination, destinationSize, first, last ); } template< typename DestinationElement, Loading Loading @@ -137,7 +137,7 @@ containsValue( const Element* data, } else { // sequential algorithm can return as soon as it finds a match return MemoryOperations< void >::containsValue( data, size, value ); return MemoryOperations< Devices::Sequential >::containsValue( data, size, value ); } } Loading @@ -159,7 +159,7 @@ containsOnlyValue( const Element* data, } else { // sequential algorithm can return as soon as it finds a mismatch return MemoryOperations< void >::containsOnlyValue( data, size, value ); return MemoryOperations< Devices::Sequential >::containsOnlyValue( data, size, value ); } } Loading src/TNL/Algorithms/MemoryOperationsSequential.hpp +8 −8 Original line number Diff line number Diff line Loading @@ -18,7 +18,7 @@ namespace Algorithms { template< typename Element > __cuda_callable__ void MemoryOperations< void >:: MemoryOperations< Devices::Sequential >:: setElement( Element* data, const Element& value ) { Loading @@ -28,7 +28,7 @@ setElement( Element* data, template< typename Element > __cuda_callable__ Element MemoryOperations< void >:: MemoryOperations< Devices::Sequential >:: getElement( const Element* data ) { return *data; Loading @@ -37,7 +37,7 @@ getElement( const Element* data ) template< typename Element, typename Index > __cuda_callable__ void MemoryOperations< void >:: MemoryOperations< Devices::Sequential >:: set( Element* data, const Element& value, const Index size ) Loading @@ -51,7 +51,7 @@ template< typename DestinationElement, typename Index > __cuda_callable__ void MemoryOperations< void >:: MemoryOperations< Devices::Sequential >:: copy( DestinationElement* destination, const SourceElement* source, const Index size ) Loading @@ -64,7 +64,7 @@ template< typename DestinationElement, typename Index, typename SourceIterator > void MemoryOperations< void >:: MemoryOperations< Devices::Sequential >:: copyFromIterator( DestinationElement* destination, Index destinationSize, SourceIterator first, Loading @@ -82,7 +82,7 @@ template< typename Element1, typename Index > __cuda_callable__ bool MemoryOperations< void >:: MemoryOperations< Devices::Sequential >:: compare( const Element1* destination, const Element2* source, const Index size ) Loading @@ -97,7 +97,7 @@ template< typename Element, typename Index > __cuda_callable__ bool MemoryOperations< void >:: MemoryOperations< Devices::Sequential >:: containsValue( const Element* data, const Index size, const Element& value ) Loading @@ -116,7 +116,7 @@ template< typename Element, typename Index > __cuda_callable__ bool MemoryOperations< void >:: MemoryOperations< Devices::Sequential >:: containsOnlyValue( const Element* data, const Index size, const Element& value ) Loading src/TNL/Algorithms/Multireduction.h +30 −0 Original line number Diff line number Diff line Loading @@ -14,6 +14,7 @@ #include <functional> // reduction functions like std::plus, std::logical_and, std::logical_or etc. #include <TNL/Devices/Sequential.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> Loading @@ -23,6 +24,35 @@ namespace Algorithms { template< typename Device > struct Multireduction; template<> struct Multireduction< Devices::Sequential > { /** * Parameters: * zero: starting value for reduction * dataFetcher: callable object such that `dataFetcher( i, j )` yields * the i-th value to be reduced from the j-th dataset * (i = 0,...,size-1; j = 0,...,n-1) * reduction: callable object representing the reduction operation * for example, it can be an instance of std::plus, std::logical_and, * std::logical_or etc. * size: the size of each dataset * n: number of datasets to be reduced * result: output array of size = n */ template< typename Result, typename DataFetcher, typename Reduction, typename Index > static constexpr void reduce( const Result zero, DataFetcher dataFetcher, const Reduction reduction, const Index size, const int n, Result* result ); }; template<> struct Multireduction< Devices::Host > { Loading Loading
src/TNL/Algorithms/CudaReductionKernel.h +3 −3 Original line number Diff line number Diff line Loading @@ -351,7 +351,7 @@ struct CudaReductionKernelLauncher // Copy result on CPU Result result; MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result, output, 1 ); MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( &result, output, 1 ); return result; } Loading Loading @@ -384,8 +384,8 @@ struct CudaReductionKernelLauncher //// // Copy result on CPU std::pair< Index, Result > result; MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result.first, idxOutput, 1 ); MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result.second, output, 1 ); MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( &result.first, idxOutput, 1 ); MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( &result.second, output, 1 ); return result; } Loading
src/TNL/Algorithms/MemoryOperations.h +3 −3 Original line number Diff line number Diff line Loading @@ -10,6 +10,7 @@ #pragma once #include <TNL/Devices/Sequential.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Cuda/CudaCallable.h> Loading @@ -17,12 +18,11 @@ namespace TNL { namespace Algorithms { template< typename DestinationExecution > template< typename DestinationDevice > struct MemoryOperations; // TODO: change "void" to "Execution::Sequential" template<> struct MemoryOperations< void > struct MemoryOperations< Devices::Sequential > { template< typename Element > __cuda_callable__ Loading
src/TNL/Algorithms/MemoryOperationsHost.hpp +3 −3 Original line number Diff line number Diff line Loading @@ -93,7 +93,7 @@ copyFromIterator( DestinationElement* destination, SourceIterator first, SourceIterator last ) { MemoryOperations< void >::copyFromIterator( destination, destinationSize, first, last ); MemoryOperations< Devices::Sequential >::copyFromIterator( destination, destinationSize, first, last ); } template< typename DestinationElement, Loading Loading @@ -137,7 +137,7 @@ containsValue( const Element* data, } else { // sequential algorithm can return as soon as it finds a match return MemoryOperations< void >::containsValue( data, size, value ); return MemoryOperations< Devices::Sequential >::containsValue( data, size, value ); } } Loading @@ -159,7 +159,7 @@ containsOnlyValue( const Element* data, } else { // sequential algorithm can return as soon as it finds a mismatch return MemoryOperations< void >::containsOnlyValue( data, size, value ); return MemoryOperations< Devices::Sequential >::containsOnlyValue( data, size, value ); } } Loading
src/TNL/Algorithms/MemoryOperationsSequential.hpp +8 −8 Original line number Diff line number Diff line Loading @@ -18,7 +18,7 @@ namespace Algorithms { template< typename Element > __cuda_callable__ void MemoryOperations< void >:: MemoryOperations< Devices::Sequential >:: setElement( Element* data, const Element& value ) { Loading @@ -28,7 +28,7 @@ setElement( Element* data, template< typename Element > __cuda_callable__ Element MemoryOperations< void >:: MemoryOperations< Devices::Sequential >:: getElement( const Element* data ) { return *data; Loading @@ -37,7 +37,7 @@ getElement( const Element* data ) template< typename Element, typename Index > __cuda_callable__ void MemoryOperations< void >:: MemoryOperations< Devices::Sequential >:: set( Element* data, const Element& value, const Index size ) Loading @@ -51,7 +51,7 @@ template< typename DestinationElement, typename Index > __cuda_callable__ void MemoryOperations< void >:: MemoryOperations< Devices::Sequential >:: copy( DestinationElement* destination, const SourceElement* source, const Index size ) Loading @@ -64,7 +64,7 @@ template< typename DestinationElement, typename Index, typename SourceIterator > void MemoryOperations< void >:: MemoryOperations< Devices::Sequential >:: copyFromIterator( DestinationElement* destination, Index destinationSize, SourceIterator first, Loading @@ -82,7 +82,7 @@ template< typename Element1, typename Index > __cuda_callable__ bool MemoryOperations< void >:: MemoryOperations< Devices::Sequential >:: compare( const Element1* destination, const Element2* source, const Index size ) Loading @@ -97,7 +97,7 @@ template< typename Element, typename Index > __cuda_callable__ bool MemoryOperations< void >:: MemoryOperations< Devices::Sequential >:: containsValue( const Element* data, const Index size, const Element& value ) Loading @@ -116,7 +116,7 @@ template< typename Element, typename Index > __cuda_callable__ bool MemoryOperations< void >:: MemoryOperations< Devices::Sequential >:: containsOnlyValue( const Element* data, const Index size, const Element& value ) Loading
src/TNL/Algorithms/Multireduction.h +30 −0 Original line number Diff line number Diff line Loading @@ -14,6 +14,7 @@ #include <functional> // reduction functions like std::plus, std::logical_and, std::logical_or etc. #include <TNL/Devices/Sequential.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> Loading @@ -23,6 +24,35 @@ namespace Algorithms { template< typename Device > struct Multireduction; template<> struct Multireduction< Devices::Sequential > { /** * Parameters: * zero: starting value for reduction * dataFetcher: callable object such that `dataFetcher( i, j )` yields * the i-th value to be reduced from the j-th dataset * (i = 0,...,size-1; j = 0,...,n-1) * reduction: callable object representing the reduction operation * for example, it can be an instance of std::plus, std::logical_and, * std::logical_or etc. * size: the size of each dataset * n: number of datasets to be reduced * result: output array of size = n */ template< typename Result, typename DataFetcher, typename Reduction, typename Index > static constexpr void reduce( const Result zero, DataFetcher dataFetcher, const Reduction reduction, const Index size, const int n, Result* result ); }; template<> struct Multireduction< Devices::Host > { Loading