Split ArrayOperations into MemoryOperations and MultiDeviceMemoryOperations (57db358c) · Commits · TNL / tnl-dev

src/TNL/Containers/Algorithms/ArrayAssignment.h

+4 −3

Original line number	Diff line number	Diff line
		@@ -11,7 +11,8 @@
		#pragma once

		#include <TNL/TypeTraits.h>
		#include <TNL/Containers/Algorithms/ArrayOperations.h>
		#include <TNL/Containers/Algorithms/MemoryOperations.h>
		#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>

		namespace TNL {
		namespace Containers {
		@@ -39,7 +40,7 @@ struct ArrayAssignment< Array, T, true >
		{
		TNL_ASSERT_EQ( a.getSize(), t.getSize(), "The sizes of the arrays must be equal." );
		if( t.getSize() > 0 ) // we allow even assignment of empty arrays
		ArrayOperations< typename Array::DeviceType, typename T::DeviceType >::template
		MultiDeviceMemoryOperations< typename Array::DeviceType, typename T::DeviceType >::template
		copy< typename Array::ValueType, typename T::ValueType, typename Array::IndexType >
		( a.getArrayData(), t.getArrayData(), t.getSize() );
		}
		@@ -60,7 +61,7 @@ struct ArrayAssignment< Array, T, false >
		static void assign( Array& a, const T& t )
		{
		TNL_ASSERT_FALSE( a.empty(), "Cannot assign value to empty array." );
		ArrayOperations< typename Array::DeviceType >::template
		MemoryOperations< typename Array::DeviceType >::template
		set< typename Array::ValueType, typename Array::IndexType >
		( a.getArrayData(), ( typename Array::ValueType ) t, a.getSize() );
		}

src/TNL/Containers/Algorithms/CudaReductionKernel.h

+4 −4

Original line number	Diff line number	Diff line
		@@ -17,7 +17,7 @@
		#include <TNL/Cuda/DeviceInfo.h>
		#include <TNL/Cuda/SharedMemory.h>
		#include <TNL/Containers/Algorithms/CudaReductionBuffer.h>
		#include <TNL/Containers/Algorithms/ArrayOperations.h>
		#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
		#include <TNL/Exceptions/CudaSupportMissing.h>

		namespace TNL {
		@@ -352,7 +352,7 @@ struct CudaReductionKernelLauncher

		// Copy result on CPU
		Result result;
		ArrayOperations< Devices::Host, Devices::Cuda >::copy( &result, output, 1 );
		MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result, output, 1 );
		return result;
		}

		@@ -385,8 +385,8 @@ struct CudaReductionKernelLauncher
		////
		// Copy result on CPU
		std::pair< Index, Result > result;
		ArrayOperations< Devices::Host, Devices::Cuda >::copy( &result.first, idxOutput, 1 );
		ArrayOperations< Devices::Host, Devices::Cuda >::copy( &result.second, output, 1 );
		MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result.first, idxOutput, 1 );
		MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result.second, output, 1 );
		return result;
		}

src/TNL/Containers/Algorithms/ArrayOperations.h→src/TNL/Containers/Algorithms/MemoryOperations.h

+11 −47

Original line number	Diff line number	Diff line
		/***************************************************************************
		ArrayOperations.h - description
		MemoryOperations.h - description
		-------------------
		begin : Jul 15, 2013
		copyright : (C) 2013 by Tomas Oberhuber
		@@ -12,18 +12,18 @@

		#include <TNL/Devices/Host.h>
		#include <TNL/Devices/Cuda.h>
		#include <TNL/Cuda/CudaCallable.h>

		namespace TNL {
		namespace Containers {
		namespace Algorithms {

		template< typename DestinationDevice,
		typename SourceDevice = DestinationDevice >
		struct ArrayOperations;
		template< typename DestinationExecution >
		struct MemoryOperations;

		// TODO: establish the concept of a "void device" for static computations in the whole TNL
		// TODO: change "void" to "Execution::Sequential"
		template<>
		struct ArrayOperations< void >
		struct MemoryOperations< void >
		{
		template< typename Element >
		__cuda_callable__
		@@ -80,7 +80,7 @@ struct ArrayOperations< void >
		};

		template<>
		struct ArrayOperations< Devices::Host >
		struct MemoryOperations< Devices::Host >
		{
		template< typename Element >
		static void setElement( Element* data,
		@@ -130,7 +130,7 @@ struct ArrayOperations< Devices::Host >
		};

		template<>
		struct ArrayOperations< Devices::Cuda >
		struct MemoryOperations< Devices::Cuda >
		{
		template< typename Element >
		static void setElement( Element* data,
		@@ -179,46 +179,10 @@ struct ArrayOperations< Devices::Cuda >
		const Element& value );
		};

		template<>
		struct ArrayOperations< Devices::Cuda, Devices::Host >
		{
		template< typename DestinationElement,
		typename SourceElement,
		typename Index >
		static void copy( DestinationElement* destination,
		const SourceElement* source,
		const Index size );

		template< typename DestinationElement,
		typename SourceElement,
		typename Index >
		static bool compare( const DestinationElement* destination,
		const SourceElement* source,
		const Index size );
		};

		template<>
		struct ArrayOperations< Devices::Host, Devices::Cuda >
		{
		template< typename DestinationElement,
		typename SourceElement,
		typename Index >
		static void copy( DestinationElement* destination,
		const SourceElement* source,
		const Index size );

		template< typename Element1,
		typename Element2,
		typename Index >
		static bool compare( const Element1* destination,
		const Element2* source,
		const Index size );
		};

		} // namespace Algorithms
		} // namespace Containers
		} // namespace TNL

		#include <TNL/Containers/Algorithms/ArrayOperationsStatic.hpp>
		#include <TNL/Containers/Algorithms/ArrayOperationsHost.hpp>
		#include <TNL/Containers/Algorithms/ArrayOperationsCuda.hpp>
		#include <TNL/Containers/Algorithms/MemoryOperationsSequential.hpp>
		#include <TNL/Containers/Algorithms/MemoryOperationsHost.hpp>
		#include <TNL/Containers/Algorithms/MemoryOperationsCuda.hpp>

src/TNL/Containers/Algorithms/MemoryOperationsCuda.hpp

0 → 100644

+160 −0

Original line number	Diff line number	Diff line
		/***************************************************************************
		MemoryOperationsCuda.hpp - description
		-------------------
		begin : Jul 16, 2013
		copyright : (C) 2013 by Tomas Oberhuber
		email : tomas.oberhuber@fjfi.cvut.cz
		***************************************************************************/

		/* See Copyright Notice in tnl/Copyright */

		#pragma once

		#include <iostream>
		#include <memory> // std::unique_ptr
		#include <stdexcept>

		#include <TNL/Containers/Algorithms/MemoryOperations.h>
		#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
		#include <TNL/ParallelFor.h>
		#include <TNL/Containers/Algorithms/Reduction.h>
		#include <TNL/Exceptions/CudaSupportMissing.h>

		namespace TNL {
		namespace Containers {
		namespace Algorithms {

		template< typename Element >
		void
		MemoryOperations< Devices::Cuda >::
		setElement( Element* data,
		const Element& value )
		{
		TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
		MemoryOperations< Devices::Cuda >::set( data, value, 1 );
		}

		template< typename Element >
		Element
		MemoryOperations< Devices::Cuda >::
		getElement( const Element* data )
		{
		TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." );
		Element result;
		MultiDeviceMemoryOperations< void, Devices::Cuda >::template copy< Element, Element, int >( &result, data, 1 );
		return result;
		}

		template< typename Element, typename Index >
		void
		MemoryOperations< Devices::Cuda >::
		set( Element* data,
		const Element& value,
		const Index size )
		{
		if( size == 0 ) return;
		TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
		auto kernel = [data, value] __cuda_callable__ ( Index i )
		{
		data[ i ] = value;
		};
		ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel );
		}

		template< typename DestinationElement,
		typename SourceElement,
		typename Index >
		void
		MemoryOperations< Devices::Cuda >::
		copy( DestinationElement* destination,
		const SourceElement* source,
		const Index size )
		{
		if( size == 0 ) return;
		TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
		TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );

		// our ParallelFor kernel is faster than cudaMemcpy
		auto kernel = [destination, source] __cuda_callable__ ( Index i )
		{
		destination[ i ] = source[ i ];
		};
		ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel );
		}

		template< typename DestinationElement,
		typename Index,
		typename SourceIterator >
		void
		MemoryOperations< Devices::Cuda >::
		copyFromIterator( DestinationElement* destination,
		Index destinationSize,
		SourceIterator first,
		SourceIterator last )
		{
		using BaseType = typename std::remove_cv< DestinationElement >::type;
		std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] };
		Index copiedElements = 0;
		while( copiedElements < destinationSize && first != last ) {
		Index i = 0;
		while( i < Cuda::getTransferBufferSize() && first != last )
		buffer[ i++ ] = *first++;
		MultiDeviceMemoryOperations< Devices::Cuda, void >::copy( &destination[ copiedElements ], buffer.get(), i );
		copiedElements += i;
		}
		if( first != last )
		throw std::length_error( "Source iterator is larger than the destination array." );
		}

		template< typename Element1,
		typename Element2,
		typename Index >
		bool
		MemoryOperations< Devices::Cuda >::
		compare( const Element1* destination,
		const Element2* source,
		const Index size )
		{
		if( size == 0 ) return true;
		TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
		TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );

		auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return destination[ i ] == source[ i ]; };
		return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true );
		}

		template< typename Element,
		typename Index >
		bool
		MemoryOperations< Devices::Cuda >::
		containsValue( const Element* data,
		const Index size,
		const Element& value )
		{
		if( size == 0 ) return false;
		TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
		TNL_ASSERT_GE( size, (Index) 0, "" );

		auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; };
		return Reduction< Devices::Cuda >::reduce( size, std::logical_or<>{}, fetch, false );
		}

		template< typename Element,
		typename Index >
		bool
		MemoryOperations< Devices::Cuda >::
		containsOnlyValue( const Element* data,
		const Index size,
		const Element& value )
		{
		if( size == 0 ) return false;
		TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
		TNL_ASSERT_GE( size, 0, "" );

		auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; };
		return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true );
		}

		} // namespace Algorithms
		} // namespace Containers
		} // namespace TNL

src/TNL/Containers/Algorithms/ArrayOperationsHost.hpp→src/TNL/Containers/Algorithms/MemoryOperationsHost.hpp

+13 −14

Original line number	Diff line number	Diff line
		/***************************************************************************
		ArrayOperationsHost.hpp - description
		MemoryOperationsHost.hpp - description
		-------------------
		begin : Jul 16, 2013
		copyright : (C) 2013 by Tomas Oberhuber
		@@ -14,8 +14,8 @@
		#include <stdexcept>
		#include <algorithm> // std::copy, std::equal

		#include <TNL/Containers/Algorithms/MemoryOperations.h>
		#include <TNL/ParallelFor.h>
		#include <TNL/Containers/Algorithms/ArrayOperations.h>
		#include <TNL/Containers/Algorithms/Reduction.h>

		namespace TNL {
		@@ -24,7 +24,7 @@ namespace Algorithms {

		template< typename Element >
		void
		ArrayOperations< Devices::Host >::
		MemoryOperations< Devices::Host >::
		setElement( Element* data,
		const Element& value )
		{
		@@ -34,7 +34,7 @@ setElement( Element* data,

		template< typename Element >
		Element
		ArrayOperations< Devices::Host >::
		MemoryOperations< Devices::Host >::
		getElement( const Element* data )
		{
		TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." );
		@@ -43,7 +43,7 @@ getElement( const Element* data )

		template< typename Element, typename Index >
		void
		ArrayOperations< Devices::Host >::
		MemoryOperations< Devices::Host >::
		set( Element* data,
		const Element& value,
		const Index size )
		@@ -61,7 +61,7 @@ template< typename DestinationElement,
		typename SourceElement,
		typename Index >
		void
		ArrayOperations< Devices::Host >::
		MemoryOperations< Devices::Host >::
		copy( DestinationElement* destination,
		const SourceElement* source,
		const Index size )
		@@ -88,21 +88,20 @@ template< typename DestinationElement,
		typename Index,
		typename SourceIterator >
		void
		ArrayOperations< Devices::Host >::
		MemoryOperations< Devices::Host >::
		copyFromIterator( DestinationElement* destination,
		Index destinationSize,
		SourceIterator first,
		SourceIterator last )
		{
		ArrayOperations< void >::copyFromIterator( destination, destinationSize, first, last );
		MemoryOperations< void >::copyFromIterator( destination, destinationSize, first, last );
		}


		template< typename DestinationElement,
		typename SourceElement,
		typename Index >
		bool
		ArrayOperations< Devices::Host >::
		MemoryOperations< Devices::Host >::
		compare( const DestinationElement* destination,
		const SourceElement* source,
		const Index size )
		@@ -124,7 +123,7 @@ compare( const DestinationElement* destination,
		template< typename Element,
		typename Index >
		bool
		ArrayOperations< Devices::Host >::
		MemoryOperations< Devices::Host >::
		containsValue( const Element* data,
		const Index size,
		const Element& value )
		@@ -139,14 +138,14 @@ containsValue( const Element* data,
		}
		else {
		// sequential algorithm can return as soon as it finds a match
		return ArrayOperations< void >::containsValue( data, size, value );
		return MemoryOperations< void >::containsValue( data, size, value );
		}
		}

		template< typename Element,
		typename Index >
		bool
		ArrayOperations< Devices::Host >::
		MemoryOperations< Devices::Host >::
		containsOnlyValue( const Element* data,
		const Index size,
		const Element& value )
		@@ -161,7 +160,7 @@ containsOnlyValue( const Element* data,
		}
		else {
		// sequential algorithm can return as soon as it finds a mismatch
		return ArrayOperations< void >::containsOnlyValue( data, size, value );
		return MemoryOperations< void >::containsOnlyValue( data, size, value );
		}
		}