Skip to content
Snippets Groups Projects
Commit 54784be9 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Refactoring Math.h

- Cleaned up unnecessary specializations of TNL::max and TNL::min
- Implemented result type promotion to fix compilation with nvcc
- Added TNL::sqrt, which will be necessary for MIC
parent e6acae23
No related branches found
No related tags found
No related merge requests found
...@@ -21,147 +21,99 @@ namespace TNL { ...@@ -21,147 +21,99 @@ namespace TNL {
template< typename T1, typename T2 > template< typename T1, typename T2 >
using enable_if_same_base = std::enable_if< std::is_same< typename std::decay< T1 >::type, T2 >::value, T2 >; using enable_if_same_base = std::enable_if< std::is_same< typename std::decay< T1 >::type, T2 >::value, T2 >;
template< typename T1, typename T2 >
using both_integral_or_floating = typename std::conditional<
( std::is_integral< T1 >::value && std::is_integral< T2 >::value ) ||
( std::is_floating_point< T1 >::value && std::is_floating_point< T2 >::value ),
std::true_type,
std::false_type >::type;
// 1. If both types are integral or floating-point, the larger type is selected.
// 2. If one type is integral and the other floating-point, the floating-point type is selected.
// This is necessary only due to the limitations of nvcc. Note that clang and gcc
// can handle automatic promotion using a single-type template, exactly like
// std::min and std::max are implemented in STL.
template< typename T1, typename T2 >
using larger_type = typename std::conditional<
( both_integral_or_floating< T1, T2 >::value && sizeof(T1) >= sizeof(T2) ) ||
std::is_floating_point<T1>::value,
T1, T2 >::type;
/*** /***
* This function returns minimum of two numbers. * This function returns minimum of two numbers.
* Specializations use the functions defined in the CUDA's math_functions.h * GPU device code uses the functions defined in the CUDA's math_functions.h,
* in CUDA device code and STL functions otherwise. * MIC uses trivial override and host uses the STL functions.
*/ */
template< typename Type1, typename Type2 > template< typename T1, typename T2, typename ResultType = larger_type< T1, T2 > >
__cuda_callable__ inline __cuda_callable__ inline
Type1 min( const Type1& a, const Type2& b ) ResultType min( const T1& a, const T2& b )
{ {
#if defined(__CUDA_ARCH__)
return ::min( (ResultType) a, (ResultType) b );
#elif defined(__MIC__)
return a < b ? a : b; return a < b ? a : b;
};
// specialization for int
template< class T >
__cuda_callable__ inline
typename enable_if_same_base< T, int >::type
min( const T& a, const T& b )
{
#if defined(__CUDA_ARCH__) || defined(__MIC__)
return ::min( a, b );
#else
return std::min( a, b );
#endif
}
// specialization for float
template< class T >
__cuda_callable__ inline
typename enable_if_same_base< T, float >::type
min( const T& a, const T& b )
{
#ifdef __CUDA_ARCH__
return ::fminf( a, b );
#else #else
return std::fmin( a, b ); return std::min( (ResultType) a, (ResultType) b );
#endif
}
// specialization for double
template< class T >
__cuda_callable__ inline
typename enable_if_same_base< T, double >::type
min( const T& a, const T& b )
{
#ifdef __CUDA_ARCH__
return ::fmin( a, b );
#else
return std::fmin( a, b );
#endif #endif
} }
/*** /***
* This function returns maximum of two numbers. * This function returns maximum of two numbers.
* Specializations use the functions defined in the CUDA's math_functions.h * GPU device code uses the functions defined in the CUDA's math_functions.h,
* in CUDA device code and STL functions otherwise. * MIC uses trivial override and host uses the STL functions.
*/ */
template< typename Type1, typename Type2 > template< typename T1, typename T2, typename ResultType = larger_type< T1, T2 > >
__cuda_callable__ __cuda_callable__
Type1 max( const Type1& a, const Type2& b ) ResultType max( const T1& a, const T2& b )
{ {
#if defined(__CUDA_ARCH__)
return ::max( (ResultType) a, (ResultType) b );
#elif defined(__MIC__)
return a > b ? a : b; return a > b ? a : b;
};
// specialization for int
template< class T >
__cuda_callable__ inline
typename enable_if_same_base< T, int >::type
max( const T& a, const T& b )
{
#if defined(__CUDA_ARCH__) || defined(__MIC__)
return ::max( a, b );
#else
return std::max( a, b );
#endif
}
// specialization for float
template< class T >
__cuda_callable__ inline
typename enable_if_same_base< T, float >::type
max( const T& a, const T& b )
{
#ifdef __CUDA_ARCH__
return ::fmaxf( a, b );
#else
return std::fmax( a, b );
#endif
}
// specialization for double
template< class T >
__cuda_callable__ inline
typename enable_if_same_base< T, double >::type
max( const T& a, const T& b )
{
#ifdef __CUDA_ARCH__
return ::fmax( a, b );
#else #else
return std::fmax( a, b ); return std::max( (ResultType) a, (ResultType) b );
#endif #endif
} }
/*** /***
* This function returns absolute value of given number. * This function returns absolute value of given number.
* Specializations use the functions defined in the CUDA's math_functions.h
* in CUDA device code and STL functions otherwise.
*/ */
template< class T > template< class T >
__cuda_callable__ inline __cuda_callable__ inline
typename std::enable_if< ! std::is_arithmetic< T >::value, T >::type T abs( const T& n )
abs( const T& n )
{ {
#if defined(__MIC__)
if( n < ( T ) 0 ) if( n < ( T ) 0 )
return -n; return -n;
return n; return n;
#else
return std::abs( n );
#endif
} }
// specialization for any arithmetic type (e.g. int, float, double)
template< class T > template< typename T1, typename T2, typename ResultType = larger_type< T1, T2 > >
__cuda_callable__ inline __cuda_callable__ inline
typename std::enable_if< std::is_arithmetic< T >::value, T >::type ResultType pow( const T1& base, const T2& exp )
abs( const T& n )
{ {
#if defined(__CUDA_ARCH__) || defined(__MIC__) #if defined(__CUDA_ARCH__) || defined(__MIC__)
return ::abs( n ); return ::pow( (ResultType) base, (ResultType) exp );
#else #else
return std::abs( n ); return std::pow( (ResultType) base, (ResultType) exp );
#endif #endif
} }
template< class T > template< typename T >
__cuda_callable__ inline __cuda_callable__ inline
T pow( const T& base, const T& exp ) T sqrt( const T& value )
{ {
#ifdef __CUDA_ARCH__ #if defined(__CUDA_ARCH__) || defined(__MIC__)
return ::pow( base, exp ); return ::sqrt( value );
#else #else
return std::pow( base, exp ); return std::sqrt( value );
#endif #endif
} }
...@@ -173,7 +125,7 @@ void swap( Type& a, Type& b ) ...@@ -173,7 +125,7 @@ void swap( Type& a, Type& b )
Type tmp( a ); Type tmp( a );
a = b; a = b;
b = tmp; b = tmp;
}; }
template< class T > template< class T >
__cuda_callable__ __cuda_callable__
...@@ -182,7 +134,7 @@ T sign( const T& a ) ...@@ -182,7 +134,7 @@ T sign( const T& a )
if( a < ( T ) 0 ) return ( T ) -1; if( a < ( T ) 0 ) return ( T ) -1;
if( a == ( T ) 0 ) return ( T ) 0; if( a == ( T ) 0 ) return ( T ) 0;
return ( T ) 1; return ( T ) 1;
}; }
template< typename Real > template< typename Real >
__cuda_callable__ __cuda_callable__
...@@ -217,4 +169,3 @@ inline bool isPow2( long int x ) ...@@ -217,4 +169,3 @@ inline bool isPow2( long int x )
} }
} // namespace TNL } // namespace TNL
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment