Merge branch 'develop' into mpi-explosive (84fce19a) · Commits · TNL / tnl-dev

src/TNL/Containers/Algorithms/ArrayOperations.h

+158 −138

Original line number	Diff line number	Diff line
		@@ -66,6 +66,12 @@ class ArrayOperations< Devices::Host >
		static bool compareMemory( const Element1* destination,
		const Element2* source,
		const Index size );

		template< typename Element,
		typename Index >
		static bool checkValue( const Element* data,
		const Index size,
		const Element& value );
		};

		template<>
		@@ -77,9 +83,6 @@ class ArrayOperations< Devices::Cuda >
		static void allocateMemory( Element*& data,
		const Index size );

		template< typename Element >
		static void freeMemory( Element* data );

		template< typename Element >
		__cuda_callable__
		static void setMemoryElement( Element* data,
		@@ -87,6 +90,9 @@ class ArrayOperations< Devices::Cuda >

		template< typename Element >
		__cuda_callable__
		static Element getMemoryElement( const Element* data );

		template< typename Element >
		static Element getMemoryElement( const Element* data );

		// TODO: does not make sense for CUDA - remove?
		@@ -114,6 +120,13 @@ class ArrayOperations< Devices::Cuda >
		static bool compareMemory( const Element1* destination,
		const Element2* source,
		const Index size );

		template< typename Element,
		typename Index >
		static bool checkValue( const Element* data,
		const Index size,
		const Element& value );

		};

		template<>
		@@ -200,6 +213,13 @@ class ArrayOperations< Devices::MIC >
		static bool compareMemory( const Element1* destination,
		const Element2* source,
		const Index size );

		template< typename Element,
		typename Index >
		static bool checkValue( const Element* data,
		const Index size,
		const Element& value );

		};

		template<>

src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h

+27 −0

Original line number	Diff line number	Diff line
		@@ -208,13 +208,40 @@ compareMemory( const Element1* destination,
		{
		TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
		TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
		#ifdef HAVE_CUDA
		//TODO: The parallel reduction on the CUDA device with different element types is needed.
		bool result = false;
		Algorithms::ParallelReductionEqualities< Element1, Element2 > reductionEqualities;
		Reduction< Devices::Cuda >::reduce( reductionEqualities, size, destination, source, result );
		return result;
		#else
		throw Exceptions::CudaSupportMissing();
		#endif
		}

		template< typename Element,
		typename Index >
		bool
		ArrayOperations< Devices::Cuda >::
		checkValue( const Element* data,
		const Index size,
		const Element& value )
		{
		TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
		TNL_ASSERT_GE( size, 0, "" );
		#ifdef HAVE_CUDA
		if( size == 0 ) return false;
		bool result = false;
		Algorithms::ParallelReductionCheckPresence< Element > reductionCheckPresence;
		reductionCheckPresence.setValue( value );
		Reduction< Devices::Cuda >::reduce( reductionCheckPresence, size, data, 0, result );
		return result;
		#else
		throw Exceptions::CudaSupportMissing();
		#endif
		}


		/****
		* Operations CUDA -> Host
		*/

src/TNL/Containers/Algorithms/ArrayOperationsHost_impl.h

+30 −12

Original line number	Diff line number	Diff line
		@@ -15,6 +15,8 @@

		#include <TNL/tnlConfig.h>
		#include <TNL/Containers/Algorithms/ArrayOperations.h>
		#include <TNL/Containers/Algorithms/Reduction.h>
		#include <TNL/Containers/Algorithms/ReductionOperations.h>

		namespace TNL {
		namespace Containers {
		@@ -128,20 +130,36 @@ compareMemory( const DestinationElement* destination,
		const SourceElement* source,
		const Index size )
		{
		if( std::is_same< DestinationElement, SourceElement >::value &&
		( std::is_fundamental< DestinationElement >::value \|\|
		std::is_pointer< DestinationElement >::value ) )
		{
		if( memcmp( destination, source, size * sizeof( DestinationElement ) ) != 0 )
		return false;
		TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
		TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );

		//TODO: The parallel reduction on the CUDA device with different element types is needed.
		bool result = false;
		Algorithms::ParallelReductionEqualities< DestinationElement, SourceElement > reductionEqualities;
		Reduction< Devices::Host >::reduce( reductionEqualities, size, destination, source, result );
		return result;
		}
		else
		for( Index i = 0; i < size; i ++ )
		if( ! ( destination[ i ] == source[ i ] ) )
		return false;
		return true;

		template< typename Element,
		typename Index >
		bool
		ArrayOperations< Devices::Host >::
		checkValue( const Element* data,
		const Index size,
		const Element& value )
		{
		TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
		TNL_ASSERT_GE( size, 0, "" );

		if( size == 0 ) return false;
		bool result = false;
		Algorithms::ParallelReductionCheckPresence< Element > reductionCheckPresence;
		reductionCheckPresence.setValue( value );
		Reduction< Devices::Host >::reduce( reductionCheckPresence, size, data, 0, result );
		return result;
		}


		#ifdef TEMPLATE_EXPLICIT_INSTANTIATION

		extern template bool ArrayOperations< Devices::Host >::allocateMemory< char, int >( char*& data, const int size );

src/TNL/Containers/Algorithms/ArrayOperationsMIC_impl.h

+19 −0

Original line number	Diff line number	Diff line
		@@ -215,6 +215,25 @@ compareMemory( const Element1* destination,
		#endif
		}

		template< typename Element,
		typename Index >
		bool
		ArrayOperations< Devices::MIC >::
		checkValue( const Element* data,
		const Index size,
		const Element& value )
		{
		TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
		TNL_ASSERT_GE( size, 0, "" );
		#ifdef HAVE_MIC
		TNL_ASSERT( false, );
		return false;
		#else
		throw Exceptions::MICSupportMissing();
		#endif
		}


		/****
		* Operations MIC -> Host
		*/

src/TNL/Containers/Algorithms/ReductionOperations.h

+30 −0

Original line number	Diff line number	Diff line
		@@ -579,6 +579,36 @@ protected:
		PType p;
		};

		template< typename Data, typename Result = bool >
		class ParallelReductionCheckPresence : public ParallelReductionLogicalOr< Result >
		{
		public:
		using DataType1 = Data;
		using DataType2 = Data;
		using ResultType = Result;
		using LaterReductionOperation = ParallelReductionLogicalOr< Result >;

		template< typename Index >
		__cuda_callable__ void
		firstReduction( ResultType& result,
		const Index& index,
		const DataType1* data1,
		const DataType2* data2 )
		{
		result = result \|\| ( data1[ index ] == value );
		}

		void setValue( const Data& v )
		{
		this->value = v;
		}

		protected:
		Data value;
		};



		} // namespace Algorithms
		} // namespace Containers
		} // namespace TNL