Commit 664c84c0 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Fixed math functions in reduction-operations.h

parent 9d816749
Loading
Loading
Loading
Loading
+17 −187
Original line number Diff line number Diff line
@@ -10,186 +10,16 @@

#pragma once 

#include <cmath>
#include <TNL/Constants.h>
#include <TNL/Math.h>
#include <TNL/Devices/Cuda.h>

#ifdef HAVE_CUDA
#include <cuda.h>
#include <TNL/Math.h>
#endif


namespace TNL {
namespace Containers {
namespace Algorithms {

#ifdef HAVE_CUDA
/***
 * This function returns minimum of two numbers stored on the device.
 * TODO: Make it tnlMin, tnlMax etc.
 */
template< class T > __device__ T tnlCudaMin( const T& a,
                                             const T& b )
{
   return a < b ? a : b;
}

__device__ inline int tnlCudaMin( const int& a,
                                  const int& b )
{
   return min( a, b );
}

__device__ inline  float tnlCudaMin( const float& a,
                                     const float& b )
{
   return fminf( a, b );
}

__device__ inline  double tnlCudaMin( const double& a,
                                      const double& b )
{
   return fmin( a, b );
}

template< class T > __device__ T tnlCudaMin( volatile const T& a,
                                             volatile const T& b )
{
   return a < b ? a : b;
}

__device__ inline int tnlCudaMin( volatile const int& a,
                                  volatile const int& b )
{
   return min( a, b );
}

__device__ inline  float tnlCudaMin( volatile const float& a,
                                     volatile const float& b )
{
   return fminf( a, b );
}

__device__ inline  double tnlCudaMin( volatile const double& a,
                                      volatile const double& b )
{
   return fmin( a, b );
}


/***
 * This function returns maximum of two numbers stored on the device.
 */
template< class T > __device__ T tnlCudaMax( const T& a,
                                             const T& b )
{
   return a > b ? a : b;
}

__device__  inline int tnlCudaMax( const int& a,
                                   const int& b )
{
   return max( a, b );
}

__device__  inline float tnlCudaMax( const float& a,
                                     const float& b )
{
   return fmaxf( a, b );
}

__device__  inline double tnlCudaMax( const double& a,
                                      const double& b )
{
   return fmax( a, b );
}

template< class T > __device__ T tnlCudaMax( volatile const T& a,
                                             volatile const T& b )
{
   return a > b ? a : b;
}

__device__  inline int tnlCudaMax( volatile const int& a,
                                   volatile const int& b )
{
   return max( a, b );
}

__device__  inline float tnlCudaMax( volatile const float& a,
                                     volatile const float& b )
{
   return fmaxf( a, b );
}

__device__  inline double tnlCudaMax( volatile const double& a,
                                      volatile const double& b )
{
   return fmax( a, b );
}

/***
 * This function returns absolute value of given number on the device.
 */
__device__  inline int tnlCudaAbs( const int& a )
{
   return ::abs( a );
}

__device__  inline long int tnlCudaAbs( const long int& a )
{
   return ::abs( a );
}

__device__  inline float tnlCudaAbs( const float& a )
{
   return ::abs( a );
}

__device__  inline double tnlCudaAbs( const double& a )
{
   return ::abs( a );
}

__device__  inline long double tnlCudaAbs( const long double& a )
{
   return ::abs( ( double ) a );
}

__device__  inline int tnlCudaAbs( volatile const int& a )
{
   return ::abs( a );
}

__device__  inline long int tnlCudaAbs( volatile const long int& a )
{
   return ::abs( a );
}

__device__  inline float tnlCudaAbs( volatile const float& a )
{
   return ::abs( a );
}

__device__  inline double tnlCudaAbs( volatile const double& a )
{
   return ::abs( a );
}

__device__  inline long double tnlCudaAbs( volatile const long double& a )
{
   return ::abs( ( double ) a );
}


template< typename Type1, typename Type2 >
__device__ Type1 tnlCudaPow( const Type1& x, const Type2& power )
{
   return ( Type1 ) ::pow( ( double ) x, ( double ) power );
}
#endif

template< typename Real, typename Index >
class tnlParallelReductionSum
{
@@ -256,19 +86,19 @@ class tnlParallelReductionMin
                                              const RealType* data1,
                                              const RealType* data2 )
   {
      result = tnlCudaMin( result, data1[ index ] );
      result = TNL::min( result, data1[ index ] );
   }
 
   __cuda_callable__ void commonReductionOnDevice( ResultType& result,
                                                   const ResultType& data )
   {
      result = tnlCudaMin( result, data );
      result = TNL::min( result, data );
   };
 
   __cuda_callable__ void commonReductionOnDevice( volatile ResultType& result,
                                                   volatile const ResultType& data )
   {
      result = tnlCudaMin( result, data );
      result = TNL::min( result, data );
   };
};

@@ -297,19 +127,19 @@ class tnlParallelReductionMax
                                              const RealType* data1,
                                              const RealType* data2 )
   {
      result = tnlCudaMax( result, data1[ index ] );
      result = TNL::max( result, data1[ index ] );
   }
 
   __cuda_callable__ void commonReductionOnDevice( ResultType& result,
                                                   const ResultType& data )
   {
      result = tnlCudaMax( result, data );
      result = TNL::max( result, data );
   };

   __cuda_callable__ void commonReductionOnDevice( volatile ResultType& result,
                                                   volatile const ResultType& data )
   {
      result = tnlCudaMax( result, data );
      result = TNL::max( result, data );
   };
};

@@ -421,7 +251,7 @@ class tnlParallelReductionAbsSum : public tnlParallelReductionSum< Real, Index >
                                              const RealType* data1,
                                              const RealType* data2 )
   {
      result += tnlCudaAbs( data1[ index ] );
      result += TNL::abs( data1[ index ] );
   }
};

@@ -450,7 +280,7 @@ class tnlParallelReductionAbsMin : public tnlParallelReductionMin< Real, Index >
                                              const RealType* data1,
                                              const RealType* data2 )
   {
      result = tnlCudaMin( result, tnlCudaAbs( data1[ index ] ) );
      result = TNL::min( result, TNL::abs( data1[ index ] ) );
   }
};

@@ -479,7 +309,7 @@ class tnlParallelReductionAbsMax : public tnlParallelReductionMax< Real, Index >
                                              const RealType* data1,
                                              const RealType* data2 )
   {
      result = tnlCudaMax( result, tnlCudaAbs( data1[ index ] ) );
      result = TNL::max( result, TNL::abs( data1[ index ] ) );
   }
};

@@ -545,7 +375,7 @@ class tnlParallelReductionLpNorm : public tnlParallelReductionSum< Real, Index >
                                              const RealType* data1,
                                              const RealType* data2 )
   {
      result += tnlCudaPow( tnlCudaAbs( data1[ index ] ), p );
      result += TNL::pow( TNL::abs( data1[ index ] ), p );
   }
 
   protected:
@@ -694,7 +524,7 @@ class tnlParallelReductionDiffMin : public tnlParallelReductionMin< Real, Index
                                              const RealType* data1,
                                              const RealType* data2 )
   {
      result = tnlCudaMin( result, data1[ index ] - data2[ index ] );
      result = TNL::min( result, data1[ index ] - data2[ index ] );
   }
};

@@ -723,7 +553,7 @@ class tnlParallelReductionDiffMax : public tnlParallelReductionMax< Real, Index
                                              const RealType* data1,
                                              const RealType* data2 )
   {
      result = tnlCudaMax( result, data1[ index ] - data2[ index ] );
      result = TNL::max( result, data1[ index ] - data2[ index ] );
   }
};

@@ -752,7 +582,7 @@ class tnlParallelReductionDiffAbsSum : public tnlParallelReductionMax< Real, Ind
                                              const RealType* data1,
                                              const RealType* data2 )
   {
      result += tnlCudaAbs( data1[ index ] - data2[ index ] );
      result += TNL::abs( data1[ index ] - data2[ index ] );
   }
};

@@ -781,7 +611,7 @@ class tnlParallelReductionDiffAbsMin : public tnlParallelReductionMin< Real, Ind
                                              const RealType* data1,
                                              const RealType* data2 )
   {
      result = tnlCudaMin( result, tnlCudaAbs( data1[ index ] - data2[ index ] ) );
      result = TNL::min( result, TNL::abs( data1[ index ] - data2[ index ] ) );
   }
};

@@ -810,7 +640,7 @@ class tnlParallelReductionDiffAbsMax : public tnlParallelReductionMax< Real, Ind
                                              const RealType* data1,
                                              const RealType* data2 )
   {
      result = tnlCudaMax( result, tnlCudaAbs( data1[ index ] - data2[ index ] ) );
      result = TNL::max( result, TNL::abs( data1[ index ] - data2[ index ] ) );
   }
};

@@ -879,7 +709,7 @@ class tnlParallelReductionDiffLpNorm : public tnlParallelReductionSum< Real, Ind
                                              const RealType* data1,
                                              const RealType* data2 )
   {
      result += tnlCudaPow( tnlCudaAbs( data1[ index ] - data2[ index ] ), p );
      result += TNL::pow( TNL::abs( data1[ index ] - data2[ index ] ), p );
   }
 
   protected:
+137 −25
Original line number Diff line number Diff line
@@ -11,18 +11,76 @@
#pragma once 

#include <cmath>
#include <stdlib.h>
#include <type_traits>

#include <TNL/Devices/Cuda.h>

#ifdef HAVE_CUDA
#include <cuda.h>
#endif

namespace TNL {

template< typename T1, typename T2 >
using enable_if_same_base = std::enable_if< std::is_same< typename std::decay< T1 >::type, T2 >::value, T2 >;

/***
 * This function returns minimum of two numbers.
 * Specializations use the functions defined in the CUDA's math_functions.h
 * in CUDA device code and STL functions otherwise.
 */
template< typename Type1, typename Type2 >
__cuda_callable__
__cuda_callable__ inline
Type1 min( const Type1& a, const Type2& b )
{
   return a < b ? a : b;
};

// specialization for int
template< class T >
__cuda_callable__ inline
typename enable_if_same_base< T, int >::type
min( const T& a, const T& b )
{
#ifdef __CUDA_ARCH__
   return ::min( a, b );
#else
   return std::min( a, b );
#endif
}

// specialization for float
template< class T >
__cuda_callable__ inline
typename enable_if_same_base< T, float >::type
min( const T& a, const T& b )
{
#ifdef __CUDA_ARCH__
   return ::fminf( a, b );
#else
   return std::fmin( a, b );
#endif
}

// specialization for double
template< class T >
__cuda_callable__ inline
typename enable_if_same_base< T, double >::type
min( const T& a, const T& b )
{
#ifdef __CUDA_ARCH__
   return ::fmin( a, b );
#else
   return std::fmin( a, b );
#endif
}


/***
 * This function returns maximum of two numbers.
 * Specializations use the functions defined in the CUDA's math_functions.h
 * in CUDA device code and STL functions otherwise.
 */
template< typename Type1, typename Type2 >
__cuda_callable__
Type1 max( const Type1& a, const Type2& b )
@@ -30,49 +88,103 @@ Type1 max( const Type1& a, const Type2& b )
   return a > b ? a : b;
};

template< typename Type >
__cuda_callable__
void swap( Type& a, Type& b )
// specialization for int
template< class T >
__cuda_callable__ inline
typename enable_if_same_base< T, int >::type
max( const T& a, const T& b )
{
   Type tmp( a );
   a = b;
   b = tmp;
};
#ifdef __CUDA_ARCH__
   return ::max( a, b );
#else
   return std::max( a, b );
#endif
}

// specialization for float
template< class T >
__cuda_callable__
T sign( const T& a )
__cuda_callable__ inline
typename enable_if_same_base< T, float >::type
max( const T& a, const T& b )
{
   if( a < ( T ) 0 ) return -1;
   if( a == ( T ) 0 ) return 0;
   return 1;
};
#ifdef __CUDA_ARCH__
   return ::fmaxf( a, b );
#else
   return std::fmax( a, b );
#endif
}

// specialization for double
template< class T >
__cuda_callable__
T abs( const T& n )
__cuda_callable__ inline
typename enable_if_same_base< T, double >::type
max( const T& a, const T& b )
{
#ifdef __CUDA_ARCH__
   return ::fmax( a, b );
#else
   return std::fmax( a, b );
#endif
}


/***
 * This function returns absolute value of given number.
 * Specializations use the functions defined in the CUDA's math_functions.h
 * in CUDA device code and STL functions otherwise.
 */
template< class T >
__cuda_callable__ inline
typename std::enable_if< ! std::is_arithmetic< T >::value, T >::type
abs( const T& n )
{
   if( n < ( T ) 0 )
      return -n;
   return n;
};
}

__cuda_callable__
inline int abs( const int& n )
// specialization for any arithmetic type (e.g. int, float, double)
template< class T >
__cuda_callable__ inline
typename std::enable_if< std::is_arithmetic< T >::value, T >::type
abs( const T& n )
{
#ifdef __CUDA_ARCH__
   return ::abs( n );
};
#else
   return std::abs( n );
#endif
}


template< class T >
__cuda_callable__ inline
T pow( const T& base, const T& exp )
{
#ifdef __CUDA_ARCH__
   return ::pow( base, exp );
#else
   return std::pow( base, exp );
#endif
}


template< typename Type >
__cuda_callable__
inline float abs( const float& f )
void swap( Type& a, Type& b )
{
   return ::fabs( f );
   Type tmp( a );
   a = b;
   b = tmp;
};

template< class T >
__cuda_callable__
inline double abs( const double& d )
T sign( const T& a )
{
   return ::fabs( d );
   if( a < ( T ) 0 ) return -1;
   if( a == ( T ) 0 ) return 0;
   return 1;
};

template< typename Real >