Commit 54784be9 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Refactoring Math.h

- Cleaned up unnecessary specializations of TNL::max and TNL::min
- Implemented result type promotion to fix compilation with nvcc
- Added TNL::sqrt, which will be necessary for MIC
parent e6acae23
Loading
Loading
Loading
Loading
+51 −100
Original line number Diff line number Diff line
@@ -21,147 +21,99 @@ namespace TNL {
template< typename T1, typename T2 >
using enable_if_same_base = std::enable_if< std::is_same< typename std::decay< T1 >::type, T2 >::value, T2 >;

template< typename T1, typename T2 >
using both_integral_or_floating = typename std::conditional<
         ( std::is_integral< T1 >::value && std::is_integral< T2 >::value ) ||
         ( std::is_floating_point< T1 >::value && std::is_floating_point< T2 >::value ),
   std::true_type,
   std::false_type >::type;

// 1. If both types are integral or floating-point, the larger type is selected.
// 2. If one type is integral and the other floating-point, the floating-point type is selected.
// This is necessary only due to the limitations of nvcc. Note that clang and gcc
// can handle automatic promotion using a single-type template, exactly like
// std::min and std::max are implemented in STL.
template< typename T1, typename T2 >
using larger_type = typename std::conditional<
         ( both_integral_or_floating< T1, T2 >::value && sizeof(T1) >= sizeof(T2) ) ||
         std::is_floating_point<T1>::value,
   T1, T2 >::type;

/***
 * This function returns minimum of two numbers.
 * Specializations use the functions defined in the CUDA's math_functions.h
 * in CUDA device code and STL functions otherwise.
 * GPU device code uses the functions defined in the CUDA's math_functions.h,
 * MIC uses trivial override and host uses the STL functions.
 */
template< typename Type1, typename Type2 >
template< typename T1, typename T2, typename ResultType = larger_type< T1, T2 > >
__cuda_callable__ inline
Type1 min( const Type1& a, const Type2& b )
ResultType min( const T1& a, const T2& b )
{
#if defined(__CUDA_ARCH__)
   return ::min( (ResultType) a, (ResultType) b );
#elif defined(__MIC__)
   return a < b ? a : b;
};

// specialization for int
template< class T >
__cuda_callable__ inline
typename enable_if_same_base< T, int >::type
min( const T& a, const T& b )
{
#if defined(__CUDA_ARCH__) || defined(__MIC__)
   return ::min( a, b );
#else
   return std::min( a, b );
#endif
}

// specialization for float
template< class T >
__cuda_callable__ inline
typename enable_if_same_base< T, float >::type
min( const T& a, const T& b )
{
#ifdef __CUDA_ARCH__
   return ::fminf( a, b );
#else
   return std::fmin( a, b );
#endif
}

// specialization for double
template< class T >
__cuda_callable__ inline
typename enable_if_same_base< T, double >::type
min( const T& a, const T& b )
{
#ifdef __CUDA_ARCH__
   return ::fmin( a, b );
#else
   return std::fmin( a, b );
   return std::min( (ResultType) a, (ResultType) b );
#endif
}


/***
 * This function returns maximum of two numbers.
 * Specializations use the functions defined in the CUDA's math_functions.h
 * in CUDA device code and STL functions otherwise.
 * GPU device code uses the functions defined in the CUDA's math_functions.h,
 * MIC uses trivial override and host uses the STL functions.
 */
template< typename Type1, typename Type2 >
template< typename T1, typename T2, typename ResultType = larger_type< T1, T2 > >
__cuda_callable__
Type1 max( const Type1& a, const Type2& b )
ResultType max( const T1& a, const T2& b )
{
#if defined(__CUDA_ARCH__)
   return ::max( (ResultType) a, (ResultType) b );
#elif defined(__MIC__)
   return a > b ? a : b;
};

// specialization for int
template< class T >
__cuda_callable__ inline
typename enable_if_same_base< T, int >::type
max( const T& a, const T& b )
{
#if defined(__CUDA_ARCH__) || defined(__MIC__)
   return ::max( a, b );
#else
   return std::max( a, b );
#endif
}

// specialization for float
template< class T >
__cuda_callable__ inline
typename enable_if_same_base< T, float >::type
max( const T& a, const T& b )
{
#ifdef __CUDA_ARCH__
   return ::fmaxf( a, b );
#else
   return std::fmax( a, b );
#endif
}

// specialization for double
template< class T >
__cuda_callable__ inline
typename enable_if_same_base< T, double >::type
max( const T& a, const T& b )
{
#ifdef __CUDA_ARCH__
   return ::fmax( a, b );
#else
   return std::fmax( a, b );
   return std::max( (ResultType) a, (ResultType) b );
#endif
}


/***
 * This function returns absolute value of given number.
 * Specializations use the functions defined in the CUDA's math_functions.h
 * in CUDA device code and STL functions otherwise.
 */
template< class T >
__cuda_callable__ inline
typename std::enable_if< ! std::is_arithmetic< T >::value, T >::type
abs( const T& n )
T abs( const T& n )
{
#if defined(__MIC__)
   if( n < ( T ) 0 )
      return -n;
   return n;
#else
   return std::abs( n );
#endif
}

// specialization for any arithmetic type (e.g. int, float, double)
template< class T >

template< typename T1, typename T2, typename ResultType = larger_type< T1, T2 > >
__cuda_callable__ inline
typename std::enable_if< std::is_arithmetic< T >::value, T >::type
abs( const T& n )
ResultType pow( const T1& base, const T2& exp )
{
#if defined(__CUDA_ARCH__) || defined(__MIC__)
   return ::abs( n );
   return ::pow( (ResultType) base, (ResultType) exp );
#else
   return std::abs( n );
   return std::pow( (ResultType) base, (ResultType) exp );
#endif
}


template< class T >
template< typename T >
__cuda_callable__ inline
T pow( const T& base, const T& exp )
T sqrt( const T& value )
{
#ifdef __CUDA_ARCH__
   return ::pow( base, exp );
#if defined(__CUDA_ARCH__) || defined(__MIC__)
   return ::sqrt( value );
#else
   return std::pow( base, exp );
   return std::sqrt( value );
#endif
}

@@ -173,7 +125,7 @@ void swap( Type& a, Type& b )
   Type tmp( a );
   a = b;
   b = tmp;
};
}

template< class T >
__cuda_callable__
@@ -182,7 +134,7 @@ T sign( const T& a )
   if( a < ( T ) 0 ) return ( T ) -1;
   if( a == ( T ) 0 ) return ( T ) 0;
   return ( T ) 1;
};
}

template< typename Real >
__cuda_callable__
@@ -217,4 +169,3 @@ inline bool isPow2( long int x )
}

} // namespace TNL