Commit 2bfc2596 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Fixing the CUDA parallel reduction.

parent b274a488
Loading
Loading
Loading
Loading
+5 −4
Original line number Diff line number Diff line
@@ -74,6 +74,7 @@ typename Operation::IndexType reduceOnCudaDevice( const Operation& operation,
   dim3 blockSize( 256 ), gridSize( 0 );   
   gridSize.x = Min( tnlCuda::getNumberOfBlocks( size, blockSize.x ), desGridSize );
   
   //tnlCudaReductionBuffer cudaReductionBuffer( 8 * minGPUReductionDataSize );
   if( ! cudaReductionBuffer.setSize( gridSize.x * sizeof( ResultType ) ) )
      return false;
   output = cudaReductionBuffer.template getData< ResultType >();      
+1 −0
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@ class tnlCudaReductionBuffer
#ifdef HAVE_CUDA         
         if( size > this->size )
         {
            if( data ) cudaFree( data );
            this->size = size;
            if( cudaMalloc( ( void** ) &this->data, size ) != cudaSuccess )
            {
+2 −9
Original line number Diff line number Diff line
@@ -79,12 +79,14 @@ inline double tnlAbs( const double& d )
};

template< typename Real >
__cuda_callable__
bool isSmall( const Real& v,
              const Real& tolerance = 1.0e-5 )
{
   return ( -tolerance <= v && v <= tolerance );
}

__cuda_callable__
inline int roundUpDivision( const int num, const int div )
{
   return num / div + ( num % div != 0 );
@@ -108,14 +110,5 @@ inline bool isPow2( long int x )
   return ( x & ( x - 1 ) == 0 );
}

/*template< typename T >
void swap( T& a, T& b)
{
   T aux;
   aux = a;
   a = b;
   b = aux;
}*/


#endif
+27 −26
Original line number Diff line number Diff line
@@ -22,32 +22,33 @@
#include <float.h>
#include <cstdio>
#include <core/tnlAssert.h>

template< typename T > T tnlMinValue(){ tnlAssert( false,)};
template<> inline char tnlMinValue< char >() { return CHAR_MIN; }
template<> inline unsigned char tnlMinValue< unsigned char >() { return 0; }
template<> inline short int tnlMinValue< short int >() { return SHRT_MIN; }
template<> inline unsigned short int tnlMinValue< unsigned short int >() { return 0; }
template<> inline int tnlMinValue< int >() { return INT_MIN; }
template<> inline unsigned int tnlMinValue< unsigned int >() { return 0; }
template<> inline long int tnlMinValue< long int >() { return LONG_MIN; }
template<> inline unsigned long int tnlMinValue< unsigned long int >() { return 0; }
template<> inline float tnlMinValue< float >() { return -FLT_MAX; }
template<> inline double tnlMinValue< double >() { return -DBL_MAX; }
template<> inline long double tnlMinValue< long double >() { return -LDBL_MAX; }

template< typename T > T tnlMaxValue(){ tnlAssert( false,)};
template<> inline char tnlMaxValue< char >() { return CHAR_MAX; }
template<> inline unsigned char tnlMaxValue< unsigned char >() { return UCHAR_MAX; }
template<> inline short int tnlMaxValue< short int >() { return SHRT_MAX; }
template<> inline unsigned short int tnlMaxValue< unsigned short int >() { return USHRT_MAX; }
template<> inline int tnlMaxValue< int >() { return INT_MAX; }
template<> inline unsigned int tnlMaxValue< unsigned int >() { return UINT_MAX; }
template<> inline long int tnlMaxValue< long int >() { return LONG_MAX; }
template<> inline unsigned long int tnlMaxValue< unsigned long int >() { return ULONG_MAX; }
template<> inline float tnlMaxValue< float >() { return FLT_MAX; }
template<> inline double tnlMaxValue< double >() { return DBL_MAX; }
template<> inline long double tnlMaxValue< long double >() { return LDBL_MAX; }
#include <core/tnlCuda.h>

template< typename T > __cuda_callable__ T tnlMinValue(){ tnlAssert( false,)};
template<> inline __cuda_callable__ char               tnlMinValue< char >() { return CHAR_MIN; }
template<> inline __cuda_callable__ unsigned char      tnlMinValue< unsigned char >() { return 0; }
template<> inline __cuda_callable__ short int          tnlMinValue< short int >() { return SHRT_MIN; }
template<> inline __cuda_callable__ unsigned short int tnlMinValue< unsigned short int >() { return 0; }
template<> inline __cuda_callable__ int                tnlMinValue< int >() { return INT_MIN; }
template<> inline __cuda_callable__ unsigned int       tnlMinValue< unsigned int >() { return 0; }
template<> inline __cuda_callable__ long int           tnlMinValue< long int >() { return LONG_MIN; }
template<> inline __cuda_callable__ unsigned long int  tnlMinValue< unsigned long int >() { return 0; }
template<> inline __cuda_callable__ float              tnlMinValue< float >() { return -FLT_MAX; }
template<> inline __cuda_callable__ double             tnlMinValue< double >() { return -DBL_MAX; }
template<> inline __cuda_callable__ long double        tnlMinValue< long double >() { return -LDBL_MAX; }

template< typename T > __cuda_callable__ T tnlMaxValue(){ tnlAssert( false,)};
template<> inline __cuda_callable__ char               tnlMaxValue< char >() { return CHAR_MAX; }
template<> inline __cuda_callable__ unsigned char      tnlMaxValue< unsigned char >() { return UCHAR_MAX; }
template<> inline __cuda_callable__ short int          tnlMaxValue< short int >() { return SHRT_MAX; }
template<> inline __cuda_callable__ unsigned short int tnlMaxValue< unsigned short int >() { return USHRT_MAX; }
template<> inline __cuda_callable__ int                tnlMaxValue< int >() { return INT_MAX; }
template<> inline __cuda_callable__ unsigned int       tnlMaxValue< unsigned int >() { return UINT_MAX; }
template<> inline __cuda_callable__ long int           tnlMaxValue< long int >() { return LONG_MAX; }
template<> inline __cuda_callable__ unsigned long int  tnlMaxValue< unsigned long int >() { return ULONG_MAX; }
template<> inline __cuda_callable__ float              tnlMaxValue< float >() { return FLT_MAX; }
template<> inline __cuda_callable__ double             tnlMaxValue< double >() { return DBL_MAX; }
template<> inline __cuda_callable__ long double        tnlMaxValue< long double >() { return LDBL_MAX; }



+2 −2

File changed.

Contains only whitespace changes.