Fixed math functions in reduction-operations.h (664c84c0) · Commits · TNL / tnl-dev

src/TNL/Containers/Algorithms/reduction-operations.h

+17 −187

Original line number	Diff line number	Diff line
		@@ -10,186 +10,16 @@

		#pragma once

		#include <cmath>
		#include <TNL/Constants.h>
		#include <TNL/Math.h>
		#include <TNL/Devices/Cuda.h>

		#ifdef HAVE_CUDA
		#include <cuda.h>
		#include <TNL/Math.h>
		#endif


		namespace TNL {
		namespace Containers {
		namespace Algorithms {

		#ifdef HAVE_CUDA
		/***
		* This function returns minimum of two numbers stored on the device.
		* TODO: Make it tnlMin, tnlMax etc.
		*/
		template< class T > __device__ T tnlCudaMin( const T& a,
		const T& b )
		{
		return a < b ? a : b;
		}

		__device__ inline int tnlCudaMin( const int& a,
		const int& b )
		{
		return min( a, b );
		}

		__device__ inline float tnlCudaMin( const float& a,
		const float& b )
		{
		return fminf( a, b );
		}

		__device__ inline double tnlCudaMin( const double& a,
		const double& b )
		{
		return fmin( a, b );
		}

		template< class T > __device__ T tnlCudaMin( volatile const T& a,
		volatile const T& b )
		{
		return a < b ? a : b;
		}

		__device__ inline int tnlCudaMin( volatile const int& a,
		volatile const int& b )
		{
		return min( a, b );
		}

		__device__ inline float tnlCudaMin( volatile const float& a,
		volatile const float& b )
		{
		return fminf( a, b );
		}

		__device__ inline double tnlCudaMin( volatile const double& a,
		volatile const double& b )
		{
		return fmin( a, b );
		}


		/***
		* This function returns maximum of two numbers stored on the device.
		*/
		template< class T > __device__ T tnlCudaMax( const T& a,
		const T& b )
		{
		return a > b ? a : b;
		}

		__device__ inline int tnlCudaMax( const int& a,
		const int& b )
		{
		return max( a, b );
		}

		__device__ inline float tnlCudaMax( const float& a,
		const float& b )
		{
		return fmaxf( a, b );
		}

		__device__ inline double tnlCudaMax( const double& a,
		const double& b )
		{
		return fmax( a, b );
		}

		template< class T > __device__ T tnlCudaMax( volatile const T& a,
		volatile const T& b )
		{
		return a > b ? a : b;
		}

		__device__ inline int tnlCudaMax( volatile const int& a,
		volatile const int& b )
		{
		return max( a, b );
		}

		__device__ inline float tnlCudaMax( volatile const float& a,
		volatile const float& b )
		{
		return fmaxf( a, b );
		}

		__device__ inline double tnlCudaMax( volatile const double& a,
		volatile const double& b )
		{
		return fmax( a, b );
		}

		/***
		* This function returns absolute value of given number on the device.
		*/
		__device__ inline int tnlCudaAbs( const int& a )
		{
		return ::abs( a );
		}

		__device__ inline long int tnlCudaAbs( const long int& a )
		{
		return ::abs( a );
		}

		__device__ inline float tnlCudaAbs( const float& a )
		{
		return ::abs( a );
		}

		__device__ inline double tnlCudaAbs( const double& a )
		{
		return ::abs( a );
		}

		__device__ inline long double tnlCudaAbs( const long double& a )
		{
		return ::abs( ( double ) a );
		}

		__device__ inline int tnlCudaAbs( volatile const int& a )
		{
		return ::abs( a );
		}

		__device__ inline long int tnlCudaAbs( volatile const long int& a )
		{
		return ::abs( a );
		}

		__device__ inline float tnlCudaAbs( volatile const float& a )
		{
		return ::abs( a );
		}

		__device__ inline double tnlCudaAbs( volatile const double& a )
		{
		return ::abs( a );
		}

		__device__ inline long double tnlCudaAbs( volatile const long double& a )
		{
		return ::abs( ( double ) a );
		}


		template< typename Type1, typename Type2 >
		__device__ Type1 tnlCudaPow( const Type1& x, const Type2& power )
		{
		return ( Type1 ) ::pow( ( double ) x, ( double ) power );
		}
		#endif

		template< typename Real, typename Index >
		class tnlParallelReductionSum
		{
		@@ -256,19 +86,19 @@ class tnlParallelReductionMin
		const RealType* data1,
		const RealType* data2 )
		{
		result = tnlCudaMin( result, data1[ index ] );
		result = TNL::min( result, data1[ index ] );
		}

		__cuda_callable__ void commonReductionOnDevice( ResultType& result,
		const ResultType& data )
		{
		result = tnlCudaMin( result, data );
		result = TNL::min( result, data );
		};

		__cuda_callable__ void commonReductionOnDevice( volatile ResultType& result,
		volatile const ResultType& data )
		{
		result = tnlCudaMin( result, data );
		result = TNL::min( result, data );
		};
		};

		@@ -297,19 +127,19 @@ class tnlParallelReductionMax
		const RealType* data1,
		const RealType* data2 )
		{
		result = tnlCudaMax( result, data1[ index ] );
		result = TNL::max( result, data1[ index ] );
		}

		__cuda_callable__ void commonReductionOnDevice( ResultType& result,
		const ResultType& data )
		{
		result = tnlCudaMax( result, data );
		result = TNL::max( result, data );
		};

		__cuda_callable__ void commonReductionOnDevice( volatile ResultType& result,
		volatile const ResultType& data )
		{
		result = tnlCudaMax( result, data );
		result = TNL::max( result, data );
		};
		};

		@@ -421,7 +251,7 @@ class tnlParallelReductionAbsSum : public tnlParallelReductionSum< Real, Index >
		const RealType* data1,
		const RealType* data2 )
		{
		result += tnlCudaAbs( data1[ index ] );
		result += TNL::abs( data1[ index ] );
		}
		};

		@@ -450,7 +280,7 @@ class tnlParallelReductionAbsMin : public tnlParallelReductionMin< Real, Index >
		const RealType* data1,
		const RealType* data2 )
		{
		result = tnlCudaMin( result, tnlCudaAbs( data1[ index ] ) );
		result = TNL::min( result, TNL::abs( data1[ index ] ) );
		}
		};

		@@ -479,7 +309,7 @@ class tnlParallelReductionAbsMax : public tnlParallelReductionMax< Real, Index >
		const RealType* data1,
		const RealType* data2 )
		{
		result = tnlCudaMax( result, tnlCudaAbs( data1[ index ] ) );
		result = TNL::max( result, TNL::abs( data1[ index ] ) );
		}
		};

		@@ -545,7 +375,7 @@ class tnlParallelReductionLpNorm : public tnlParallelReductionSum< Real, Index >
		const RealType* data1,
		const RealType* data2 )
		{
		result += tnlCudaPow( tnlCudaAbs( data1[ index ] ), p );
		result += TNL::pow( TNL::abs( data1[ index ] ), p );
		}

		protected:
		@@ -694,7 +524,7 @@ class tnlParallelReductionDiffMin : public tnlParallelReductionMin< Real, Index
		const RealType* data1,
		const RealType* data2 )
		{
		result = tnlCudaMin( result, data1[ index ] - data2[ index ] );
		result = TNL::min( result, data1[ index ] - data2[ index ] );
		}
		};

		@@ -723,7 +553,7 @@ class tnlParallelReductionDiffMax : public tnlParallelReductionMax< Real, Index
		const RealType* data1,
		const RealType* data2 )
		{
		result = tnlCudaMax( result, data1[ index ] - data2[ index ] );
		result = TNL::max( result, data1[ index ] - data2[ index ] );
		}
		};

		@@ -752,7 +582,7 @@ class tnlParallelReductionDiffAbsSum : public tnlParallelReductionMax< Real, Ind
		const RealType* data1,
		const RealType* data2 )
		{
		result += tnlCudaAbs( data1[ index ] - data2[ index ] );
		result += TNL::abs( data1[ index ] - data2[ index ] );
		}
		};

		@@ -781,7 +611,7 @@ class tnlParallelReductionDiffAbsMin : public tnlParallelReductionMin< Real, Ind
		const RealType* data1,
		const RealType* data2 )
		{
		result = tnlCudaMin( result, tnlCudaAbs( data1[ index ] - data2[ index ] ) );
		result = TNL::min( result, TNL::abs( data1[ index ] - data2[ index ] ) );
		}
		};

		@@ -810,7 +640,7 @@ class tnlParallelReductionDiffAbsMax : public tnlParallelReductionMax< Real, Ind
		const RealType* data1,
		const RealType* data2 )
		{
		result = tnlCudaMax( result, tnlCudaAbs( data1[ index ] - data2[ index ] ) );
		result = TNL::max( result, TNL::abs( data1[ index ] - data2[ index ] ) );
		}
		};

		@@ -879,7 +709,7 @@ class tnlParallelReductionDiffLpNorm : public tnlParallelReductionSum< Real, Ind
		const RealType* data1,
		const RealType* data2 )
		{
		result += tnlCudaPow( tnlCudaAbs( data1[ index ] - data2[ index ] ), p );
		result += TNL::pow( TNL::abs( data1[ index ] - data2[ index ] ), p );
		}

		protected:

src/TNL/Math.h

+137 −25

Original line number	Diff line number	Diff line
		@@ -11,18 +11,76 @@
		#pragma once

		#include <cmath>
		#include <stdlib.h>
		#include <type_traits>

		#include <TNL/Devices/Cuda.h>

		#ifdef HAVE_CUDA
		#include <cuda.h>
		#endif

		namespace TNL {

		template< typename T1, typename T2 >
		using enable_if_same_base = std::enable_if< std::is_same< typename std::decay< T1 >::type, T2 >::value, T2 >;

		/***
		* This function returns minimum of two numbers.
		* Specializations use the functions defined in the CUDA's math_functions.h
		* in CUDA device code and STL functions otherwise.
		*/
		template< typename Type1, typename Type2 >
		__cuda_callable__
		__cuda_callable__ inline
		Type1 min( const Type1& a, const Type2& b )
		{
		return a < b ? a : b;
		};

		// specialization for int
		template< class T >
		__cuda_callable__ inline
		typename enable_if_same_base< T, int >::type
		min( const T& a, const T& b )
		{
		#ifdef __CUDA_ARCH__
		return ::min( a, b );
		#else
		return std::min( a, b );
		#endif
		}

		// specialization for float
		template< class T >
		__cuda_callable__ inline
		typename enable_if_same_base< T, float >::type
		min( const T& a, const T& b )
		{
		#ifdef __CUDA_ARCH__
		return ::fminf( a, b );
		#else
		return std::fmin( a, b );
		#endif
		}

		// specialization for double
		template< class T >
		__cuda_callable__ inline
		typename enable_if_same_base< T, double >::type
		min( const T& a, const T& b )
		{
		#ifdef __CUDA_ARCH__
		return ::fmin( a, b );
		#else
		return std::fmin( a, b );
		#endif
		}


		/***
		* This function returns maximum of two numbers.
		* Specializations use the functions defined in the CUDA's math_functions.h
		* in CUDA device code and STL functions otherwise.
		*/
		template< typename Type1, typename Type2 >
		__cuda_callable__
		Type1 max( const Type1& a, const Type2& b )
		@@ -30,49 +88,103 @@ Type1 max( const Type1& a, const Type2& b )
		return a > b ? a : b;
		};

		template< typename Type >
		__cuda_callable__
		void swap( Type& a, Type& b )
		// specialization for int
		template< class T >
		__cuda_callable__ inline
		typename enable_if_same_base< T, int >::type
		max( const T& a, const T& b )
		{
		Type tmp( a );
		a = b;
		b = tmp;
		};
		#ifdef __CUDA_ARCH__
		return ::max( a, b );
		#else
		return std::max( a, b );
		#endif
		}

		// specialization for float
		template< class T >
		__cuda_callable__
		T sign( const T& a )
		__cuda_callable__ inline
		typename enable_if_same_base< T, float >::type
		max( const T& a, const T& b )
		{
		if( a < ( T ) 0 ) return -1;
		if( a == ( T ) 0 ) return 0;
		return 1;
		};
		#ifdef __CUDA_ARCH__
		return ::fmaxf( a, b );
		#else
		return std::fmax( a, b );
		#endif
		}

		// specialization for double
		template< class T >
		__cuda_callable__
		T abs( const T& n )
		__cuda_callable__ inline
		typename enable_if_same_base< T, double >::type
		max( const T& a, const T& b )
		{
		#ifdef __CUDA_ARCH__
		return ::fmax( a, b );
		#else
		return std::fmax( a, b );
		#endif
		}


		/***
		* This function returns absolute value of given number.
		* Specializations use the functions defined in the CUDA's math_functions.h
		* in CUDA device code and STL functions otherwise.
		*/
		template< class T >
		__cuda_callable__ inline
		typename std::enable_if< ! std::is_arithmetic< T >::value, T >::type
		abs( const T& n )
		{
		if( n < ( T ) 0 )
		return -n;
		return n;
		};
		}

		__cuda_callable__
		inline int abs( const int& n )
		// specialization for any arithmetic type (e.g. int, float, double)
		template< class T >
		__cuda_callable__ inline
		typename std::enable_if< std::is_arithmetic< T >::value, T >::type
		abs( const T& n )
		{
		#ifdef __CUDA_ARCH__
		return ::abs( n );
		};
		#else
		return std::abs( n );
		#endif
		}


		template< class T >
		__cuda_callable__ inline
		T pow( const T& base, const T& exp )
		{
		#ifdef __CUDA_ARCH__
		return ::pow( base, exp );
		#else
		return std::pow( base, exp );
		#endif
		}


		template< typename Type >
		__cuda_callable__
		inline float abs( const float& f )
		void swap( Type& a, Type& b )
		{
		return ::fabs( f );
		Type tmp( a );
		a = b;
		b = tmp;
		};

		template< class T >
		__cuda_callable__
		inline double abs( const double& d )
		T sign( const T& a )
		{
		return ::fabs( d );
		if( a < ( T ) 0 ) return -1;
		if( a == ( T ) 0 ) return 0;
		return 1;
		};

		template< typename Real >