Loading src/TNL/Math.h +51 −100 Original line number Diff line number Diff line Loading @@ -21,147 +21,99 @@ namespace TNL { template< typename T1, typename T2 > using enable_if_same_base = std::enable_if< std::is_same< typename std::decay< T1 >::type, T2 >::value, T2 >; template< typename T1, typename T2 > using both_integral_or_floating = typename std::conditional< ( std::is_integral< T1 >::value && std::is_integral< T2 >::value ) || ( std::is_floating_point< T1 >::value && std::is_floating_point< T2 >::value ), std::true_type, std::false_type >::type; // 1. If both types are integral or floating-point, the larger type is selected. // 2. If one type is integral and the other floating-point, the floating-point type is selected. // This is necessary only due to the limitations of nvcc. Note that clang and gcc // can handle automatic promotion using a single-type template, exactly like // std::min and std::max are implemented in STL. template< typename T1, typename T2 > using larger_type = typename std::conditional< ( both_integral_or_floating< T1, T2 >::value && sizeof(T1) >= sizeof(T2) ) || std::is_floating_point<T1>::value, T1, T2 >::type; /*** * This function returns minimum of two numbers. * Specializations use the functions defined in the CUDA's math_functions.h * in CUDA device code and STL functions otherwise. * GPU device code uses the functions defined in the CUDA's math_functions.h, * MIC uses trivial override and host uses the STL functions. */ template< typename Type1, typename Type2 > template< typename T1, typename T2, typename ResultType = larger_type< T1, T2 > > __cuda_callable__ inline Type1 min( const Type1& a, const Type2& b ) ResultType min( const T1& a, const T2& b ) { #if defined(__CUDA_ARCH__) return ::min( (ResultType) a, (ResultType) b ); #elif defined(__MIC__) return a < b ? a : b; }; // specialization for int template< class T > __cuda_callable__ inline typename enable_if_same_base< T, int >::type min( const T& a, const T& b ) { #if defined(__CUDA_ARCH__) || defined(__MIC__) return ::min( a, b ); #else return std::min( a, b ); #endif } // specialization for float template< class T > __cuda_callable__ inline typename enable_if_same_base< T, float >::type min( const T& a, const T& b ) { #ifdef __CUDA_ARCH__ return ::fminf( a, b ); #else return std::fmin( a, b ); #endif } // specialization for double template< class T > __cuda_callable__ inline typename enable_if_same_base< T, double >::type min( const T& a, const T& b ) { #ifdef __CUDA_ARCH__ return ::fmin( a, b ); #else return std::fmin( a, b ); return std::min( (ResultType) a, (ResultType) b ); #endif } /*** * This function returns maximum of two numbers. * Specializations use the functions defined in the CUDA's math_functions.h * in CUDA device code and STL functions otherwise. * GPU device code uses the functions defined in the CUDA's math_functions.h, * MIC uses trivial override and host uses the STL functions. */ template< typename Type1, typename Type2 > template< typename T1, typename T2, typename ResultType = larger_type< T1, T2 > > __cuda_callable__ Type1 max( const Type1& a, const Type2& b ) ResultType max( const T1& a, const T2& b ) { #if defined(__CUDA_ARCH__) return ::max( (ResultType) a, (ResultType) b ); #elif defined(__MIC__) return a > b ? a : b; }; // specialization for int template< class T > __cuda_callable__ inline typename enable_if_same_base< T, int >::type max( const T& a, const T& b ) { #if defined(__CUDA_ARCH__) || defined(__MIC__) return ::max( a, b ); #else return std::max( a, b ); #endif } // specialization for float template< class T > __cuda_callable__ inline typename enable_if_same_base< T, float >::type max( const T& a, const T& b ) { #ifdef __CUDA_ARCH__ return ::fmaxf( a, b ); #else return std::fmax( a, b ); #endif } // specialization for double template< class T > __cuda_callable__ inline typename enable_if_same_base< T, double >::type max( const T& a, const T& b ) { #ifdef __CUDA_ARCH__ return ::fmax( a, b ); #else return std::fmax( a, b ); return std::max( (ResultType) a, (ResultType) b ); #endif } /*** * This function returns absolute value of given number. * Specializations use the functions defined in the CUDA's math_functions.h * in CUDA device code and STL functions otherwise. */ template< class T > __cuda_callable__ inline typename std::enable_if< ! std::is_arithmetic< T >::value, T >::type abs( const T& n ) T abs( const T& n ) { #if defined(__MIC__) if( n < ( T ) 0 ) return -n; return n; #else return std::abs( n ); #endif } // specialization for any arithmetic type (e.g. int, float, double) template< class T > template< typename T1, typename T2, typename ResultType = larger_type< T1, T2 > > __cuda_callable__ inline typename std::enable_if< std::is_arithmetic< T >::value, T >::type abs( const T& n ) ResultType pow( const T1& base, const T2& exp ) { #if defined(__CUDA_ARCH__) || defined(__MIC__) return ::abs( n ); return ::pow( (ResultType) base, (ResultType) exp ); #else return std::abs( n ); return std::pow( (ResultType) base, (ResultType) exp ); #endif } template< class T > template< typename T > __cuda_callable__ inline T pow( const T& base, const T& exp ) T sqrt( const T& value ) { #ifdef __CUDA_ARCH__ return ::pow( base, exp ); #if defined(__CUDA_ARCH__) || defined(__MIC__) return ::sqrt( value ); #else return std::pow( base, exp ); return std::sqrt( value ); #endif } Loading @@ -173,7 +125,7 @@ void swap( Type& a, Type& b ) Type tmp( a ); a = b; b = tmp; }; } template< class T > __cuda_callable__ Loading @@ -182,7 +134,7 @@ T sign( const T& a ) if( a < ( T ) 0 ) return ( T ) -1; if( a == ( T ) 0 ) return ( T ) 0; return ( T ) 1; }; } template< typename Real > __cuda_callable__ Loading Loading @@ -217,4 +169,3 @@ inline bool isPow2( long int x ) } } // namespace TNL Loading
src/TNL/Math.h +51 −100 Original line number Diff line number Diff line Loading @@ -21,147 +21,99 @@ namespace TNL { template< typename T1, typename T2 > using enable_if_same_base = std::enable_if< std::is_same< typename std::decay< T1 >::type, T2 >::value, T2 >; template< typename T1, typename T2 > using both_integral_or_floating = typename std::conditional< ( std::is_integral< T1 >::value && std::is_integral< T2 >::value ) || ( std::is_floating_point< T1 >::value && std::is_floating_point< T2 >::value ), std::true_type, std::false_type >::type; // 1. If both types are integral or floating-point, the larger type is selected. // 2. If one type is integral and the other floating-point, the floating-point type is selected. // This is necessary only due to the limitations of nvcc. Note that clang and gcc // can handle automatic promotion using a single-type template, exactly like // std::min and std::max are implemented in STL. template< typename T1, typename T2 > using larger_type = typename std::conditional< ( both_integral_or_floating< T1, T2 >::value && sizeof(T1) >= sizeof(T2) ) || std::is_floating_point<T1>::value, T1, T2 >::type; /*** * This function returns minimum of two numbers. * Specializations use the functions defined in the CUDA's math_functions.h * in CUDA device code and STL functions otherwise. * GPU device code uses the functions defined in the CUDA's math_functions.h, * MIC uses trivial override and host uses the STL functions. */ template< typename Type1, typename Type2 > template< typename T1, typename T2, typename ResultType = larger_type< T1, T2 > > __cuda_callable__ inline Type1 min( const Type1& a, const Type2& b ) ResultType min( const T1& a, const T2& b ) { #if defined(__CUDA_ARCH__) return ::min( (ResultType) a, (ResultType) b ); #elif defined(__MIC__) return a < b ? a : b; }; // specialization for int template< class T > __cuda_callable__ inline typename enable_if_same_base< T, int >::type min( const T& a, const T& b ) { #if defined(__CUDA_ARCH__) || defined(__MIC__) return ::min( a, b ); #else return std::min( a, b ); #endif } // specialization for float template< class T > __cuda_callable__ inline typename enable_if_same_base< T, float >::type min( const T& a, const T& b ) { #ifdef __CUDA_ARCH__ return ::fminf( a, b ); #else return std::fmin( a, b ); #endif } // specialization for double template< class T > __cuda_callable__ inline typename enable_if_same_base< T, double >::type min( const T& a, const T& b ) { #ifdef __CUDA_ARCH__ return ::fmin( a, b ); #else return std::fmin( a, b ); return std::min( (ResultType) a, (ResultType) b ); #endif } /*** * This function returns maximum of two numbers. * Specializations use the functions defined in the CUDA's math_functions.h * in CUDA device code and STL functions otherwise. * GPU device code uses the functions defined in the CUDA's math_functions.h, * MIC uses trivial override and host uses the STL functions. */ template< typename Type1, typename Type2 > template< typename T1, typename T2, typename ResultType = larger_type< T1, T2 > > __cuda_callable__ Type1 max( const Type1& a, const Type2& b ) ResultType max( const T1& a, const T2& b ) { #if defined(__CUDA_ARCH__) return ::max( (ResultType) a, (ResultType) b ); #elif defined(__MIC__) return a > b ? a : b; }; // specialization for int template< class T > __cuda_callable__ inline typename enable_if_same_base< T, int >::type max( const T& a, const T& b ) { #if defined(__CUDA_ARCH__) || defined(__MIC__) return ::max( a, b ); #else return std::max( a, b ); #endif } // specialization for float template< class T > __cuda_callable__ inline typename enable_if_same_base< T, float >::type max( const T& a, const T& b ) { #ifdef __CUDA_ARCH__ return ::fmaxf( a, b ); #else return std::fmax( a, b ); #endif } // specialization for double template< class T > __cuda_callable__ inline typename enable_if_same_base< T, double >::type max( const T& a, const T& b ) { #ifdef __CUDA_ARCH__ return ::fmax( a, b ); #else return std::fmax( a, b ); return std::max( (ResultType) a, (ResultType) b ); #endif } /*** * This function returns absolute value of given number. * Specializations use the functions defined in the CUDA's math_functions.h * in CUDA device code and STL functions otherwise. */ template< class T > __cuda_callable__ inline typename std::enable_if< ! std::is_arithmetic< T >::value, T >::type abs( const T& n ) T abs( const T& n ) { #if defined(__MIC__) if( n < ( T ) 0 ) return -n; return n; #else return std::abs( n ); #endif } // specialization for any arithmetic type (e.g. int, float, double) template< class T > template< typename T1, typename T2, typename ResultType = larger_type< T1, T2 > > __cuda_callable__ inline typename std::enable_if< std::is_arithmetic< T >::value, T >::type abs( const T& n ) ResultType pow( const T1& base, const T2& exp ) { #if defined(__CUDA_ARCH__) || defined(__MIC__) return ::abs( n ); return ::pow( (ResultType) base, (ResultType) exp ); #else return std::abs( n ); return std::pow( (ResultType) base, (ResultType) exp ); #endif } template< class T > template< typename T > __cuda_callable__ inline T pow( const T& base, const T& exp ) T sqrt( const T& value ) { #ifdef __CUDA_ARCH__ return ::pow( base, exp ); #if defined(__CUDA_ARCH__) || defined(__MIC__) return ::sqrt( value ); #else return std::pow( base, exp ); return std::sqrt( value ); #endif } Loading @@ -173,7 +125,7 @@ void swap( Type& a, Type& b ) Type tmp( a ); a = b; b = tmp; }; } template< class T > __cuda_callable__ Loading @@ -182,7 +134,7 @@ T sign( const T& a ) if( a < ( T ) 0 ) return ( T ) -1; if( a == ( T ) 0 ) return ( T ) 0; return ( T ) 1; }; } template< typename Real > __cuda_callable__ Loading Loading @@ -217,4 +169,3 @@ inline bool isPow2( long int x ) } } // namespace TNL