Added Devices::Sequential and corresponding specializations in TNL::Algorithms (7756e2d0) · Commits · TNL / tnl-dev

src/TNL/Algorithms/CudaReductionKernel.h

+3 −3

Original line number	Diff line number	Diff line
		@@ -351,7 +351,7 @@ struct CudaReductionKernelLauncher

		// Copy result on CPU
		Result result;
		MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result, output, 1 );
		MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( &result, output, 1 );
		return result;
		}

		@@ -384,8 +384,8 @@ struct CudaReductionKernelLauncher
		////
		// Copy result on CPU
		std::pair< Index, Result > result;
		MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result.first, idxOutput, 1 );
		MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result.second, output, 1 );
		MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( &result.first, idxOutput, 1 );
		MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( &result.second, output, 1 );
		return result;
		}

src/TNL/Algorithms/MemoryOperations.h

+3 −3

Original line number	Diff line number	Diff line
		@@ -10,6 +10,7 @@

		#pragma once

		#include <TNL/Devices/Sequential.h>
		#include <TNL/Devices/Host.h>
		#include <TNL/Devices/Cuda.h>
		#include <TNL/Cuda/CudaCallable.h>
		@@ -17,12 +18,11 @@
		namespace TNL {
		namespace Algorithms {

		template< typename DestinationExecution >
		template< typename DestinationDevice >
		struct MemoryOperations;

		// TODO: change "void" to "Execution::Sequential"
		template<>
		struct MemoryOperations< void >
		struct MemoryOperations< Devices::Sequential >
		{
		template< typename Element >
		__cuda_callable__

src/TNL/Algorithms/MemoryOperationsHost.hpp

+3 −3

Original line number	Diff line number	Diff line
		@@ -93,7 +93,7 @@ copyFromIterator( DestinationElement* destination,
		SourceIterator first,
		SourceIterator last )
		{
		MemoryOperations< void >::copyFromIterator( destination, destinationSize, first, last );
		MemoryOperations< Devices::Sequential >::copyFromIterator( destination, destinationSize, first, last );
		}

		template< typename DestinationElement,
		@@ -137,7 +137,7 @@ containsValue( const Element* data,
		}
		else {
		// sequential algorithm can return as soon as it finds a match
		return MemoryOperations< void >::containsValue( data, size, value );
		return MemoryOperations< Devices::Sequential >::containsValue( data, size, value );
		}
		}

		@@ -159,7 +159,7 @@ containsOnlyValue( const Element* data,
		}
		else {
		// sequential algorithm can return as soon as it finds a mismatch
		return MemoryOperations< void >::containsOnlyValue( data, size, value );
		return MemoryOperations< Devices::Sequential >::containsOnlyValue( data, size, value );
		}
		}

src/TNL/Algorithms/MemoryOperationsSequential.hpp

+8 −8

Original line number	Diff line number	Diff line
		@@ -18,7 +18,7 @@ namespace Algorithms {
		template< typename Element >
		__cuda_callable__
		void
		MemoryOperations< void >::
		MemoryOperations< Devices::Sequential >::
		setElement( Element* data,
		const Element& value )
		{
		@@ -28,7 +28,7 @@ setElement( Element* data,
		template< typename Element >
		__cuda_callable__
		Element
		MemoryOperations< void >::
		MemoryOperations< Devices::Sequential >::
		getElement( const Element* data )
		{
		return *data;
		@@ -37,7 +37,7 @@ getElement( const Element* data )
		template< typename Element, typename Index >
		__cuda_callable__
		void
		MemoryOperations< void >::
		MemoryOperations< Devices::Sequential >::
		set( Element* data,
		const Element& value,
		const Index size )
		@@ -51,7 +51,7 @@ template< typename DestinationElement,
		typename Index >
		__cuda_callable__
		void
		MemoryOperations< void >::
		MemoryOperations< Devices::Sequential >::
		copy( DestinationElement* destination,
		const SourceElement* source,
		const Index size )
		@@ -64,7 +64,7 @@ template< typename DestinationElement,
		typename Index,
		typename SourceIterator >
		void
		MemoryOperations< void >::
		MemoryOperations< Devices::Sequential >::
		copyFromIterator( DestinationElement* destination,
		Index destinationSize,
		SourceIterator first,
		@@ -82,7 +82,7 @@ template< typename Element1,
		typename Index >
		__cuda_callable__
		bool
		MemoryOperations< void >::
		MemoryOperations< Devices::Sequential >::
		compare( const Element1* destination,
		const Element2* source,
		const Index size )
		@@ -97,7 +97,7 @@ template< typename Element,
		typename Index >
		__cuda_callable__
		bool
		MemoryOperations< void >::
		MemoryOperations< Devices::Sequential >::
		containsValue( const Element* data,
		const Index size,
		const Element& value )
		@@ -116,7 +116,7 @@ template< typename Element,
		typename Index >
		__cuda_callable__
		bool
		MemoryOperations< void >::
		MemoryOperations< Devices::Sequential >::
		containsOnlyValue( const Element* data,
		const Index size,
		const Element& value )

src/TNL/Algorithms/Multireduction.h

+30 −0

Original line number	Diff line number	Diff line
		@@ -14,6 +14,7 @@

		#include <functional> // reduction functions like std::plus, std::logical_and, std::logical_or etc.

		#include <TNL/Devices/Sequential.h>
		#include <TNL/Devices/Host.h>
		#include <TNL/Devices/Cuda.h>

		@@ -23,6 +24,35 @@ namespace Algorithms {
		template< typename Device >
		struct Multireduction;

		template<>
		struct Multireduction< Devices::Sequential >
		{
		/**
		* Parameters:
		* zero: starting value for reduction
		* dataFetcher: callable object such that `dataFetcher( i, j )` yields
		* the i-th value to be reduced from the j-th dataset
		* (i = 0,...,size-1; j = 0,...,n-1)
		* reduction: callable object representing the reduction operation
		* for example, it can be an instance of std::plus, std::logical_and,
		* std::logical_or etc.
		* size: the size of each dataset
		* n: number of datasets to be reduced
		* result: output array of size = n
		*/
		template< typename Result,
		typename DataFetcher,
		typename Reduction,
		typename Index >
		static constexpr void
		reduce( const Result zero,
		DataFetcher dataFetcher,
		const Reduction reduction,
		const Index size,
		const int n,
		Result* result );
		};

		template<>
		struct Multireduction< Devices::Host >
		{