Fixing the CUDA parallel reduction. (2bfc2596) · Commits · TNL / tnl-dev

src/core/cuda/cuda-reduction_impl.h

+5 −4

Original line number	Diff line number	Diff line
		@@ -74,6 +74,7 @@ typename Operation::IndexType reduceOnCudaDevice( const Operation& operation,
		dim3 blockSize( 256 ), gridSize( 0 );
		gridSize.x = Min( tnlCuda::getNumberOfBlocks( size, blockSize.x ), desGridSize );

		//tnlCudaReductionBuffer cudaReductionBuffer( 8 * minGPUReductionDataSize );
		if( ! cudaReductionBuffer.setSize( gridSize.x * sizeof( ResultType ) ) )
		return false;
		output = cudaReductionBuffer.template getData< ResultType >();

src/core/cuda/tnlCudaReductionBuffer.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -33,6 +33,7 @@ class tnlCudaReductionBuffer
		#ifdef HAVE_CUDA
		if( size > this->size )
		{
		if( data ) cudaFree( data );
		this->size = size;
		if( cudaMalloc( ( void** ) &this->data, size ) != cudaSuccess )
		{

src/core/mfuncs.h

+2 −9

Original line number	Diff line number	Diff line
		@@ -79,12 +79,14 @@ inline double tnlAbs( const double& d )
		};

		template< typename Real >
		__cuda_callable__
		bool isSmall( const Real& v,
		const Real& tolerance = 1.0e-5 )
		{
		return ( -tolerance <= v && v <= tolerance );
		}

		__cuda_callable__
		inline int roundUpDivision( const int num, const int div )
		{
		return num / div + ( num % div != 0 );
		@@ -108,14 +110,5 @@ inline bool isPow2( long int x )
		return ( x & ( x - 1 ) == 0 );
		}

		/*template< typename T >
		void swap( T& a, T& b)
		{
		T aux;
		aux = a;
		a = b;
		b = aux;
		}*/


		#endif

src/core/tnlConstants.h

+27 −26

Original line number	Diff line number	Diff line
		@@ -22,32 +22,33 @@
		#include <float.h>
		#include <cstdio>
		#include <core/tnlAssert.h>

		template< typename T > T tnlMinValue(){ tnlAssert( false,)};
		template<> inline char tnlMinValue< char >() { return CHAR_MIN; }
		template<> inline unsigned char tnlMinValue< unsigned char >() { return 0; }
		template<> inline short int tnlMinValue< short int >() { return SHRT_MIN; }
		template<> inline unsigned short int tnlMinValue< unsigned short int >() { return 0; }
		template<> inline int tnlMinValue< int >() { return INT_MIN; }
		template<> inline unsigned int tnlMinValue< unsigned int >() { return 0; }
		template<> inline long int tnlMinValue< long int >() { return LONG_MIN; }
		template<> inline unsigned long int tnlMinValue< unsigned long int >() { return 0; }
		template<> inline float tnlMinValue< float >() { return -FLT_MAX; }
		template<> inline double tnlMinValue< double >() { return -DBL_MAX; }
		template<> inline long double tnlMinValue< long double >() { return -LDBL_MAX; }

		template< typename T > T tnlMaxValue(){ tnlAssert( false,)};
		template<> inline char tnlMaxValue< char >() { return CHAR_MAX; }
		template<> inline unsigned char tnlMaxValue< unsigned char >() { return UCHAR_MAX; }
		template<> inline short int tnlMaxValue< short int >() { return SHRT_MAX; }
		template<> inline unsigned short int tnlMaxValue< unsigned short int >() { return USHRT_MAX; }
		template<> inline int tnlMaxValue< int >() { return INT_MAX; }
		template<> inline unsigned int tnlMaxValue< unsigned int >() { return UINT_MAX; }
		template<> inline long int tnlMaxValue< long int >() { return LONG_MAX; }
		template<> inline unsigned long int tnlMaxValue< unsigned long int >() { return ULONG_MAX; }
		template<> inline float tnlMaxValue< float >() { return FLT_MAX; }
		template<> inline double tnlMaxValue< double >() { return DBL_MAX; }
		template<> inline long double tnlMaxValue< long double >() { return LDBL_MAX; }
		#include <core/tnlCuda.h>

		template< typename T > __cuda_callable__ T tnlMinValue(){ tnlAssert( false,)};
		template<> inline __cuda_callable__ char tnlMinValue< char >() { return CHAR_MIN; }
		template<> inline __cuda_callable__ unsigned char tnlMinValue< unsigned char >() { return 0; }
		template<> inline __cuda_callable__ short int tnlMinValue< short int >() { return SHRT_MIN; }
		template<> inline __cuda_callable__ unsigned short int tnlMinValue< unsigned short int >() { return 0; }
		template<> inline __cuda_callable__ int tnlMinValue< int >() { return INT_MIN; }
		template<> inline __cuda_callable__ unsigned int tnlMinValue< unsigned int >() { return 0; }
		template<> inline __cuda_callable__ long int tnlMinValue< long int >() { return LONG_MIN; }
		template<> inline __cuda_callable__ unsigned long int tnlMinValue< unsigned long int >() { return 0; }
		template<> inline __cuda_callable__ float tnlMinValue< float >() { return -FLT_MAX; }
		template<> inline __cuda_callable__ double tnlMinValue< double >() { return -DBL_MAX; }
		template<> inline __cuda_callable__ long double tnlMinValue< long double >() { return -LDBL_MAX; }

		template< typename T > __cuda_callable__ T tnlMaxValue(){ tnlAssert( false,)};
		template<> inline __cuda_callable__ char tnlMaxValue< char >() { return CHAR_MAX; }
		template<> inline __cuda_callable__ unsigned char tnlMaxValue< unsigned char >() { return UCHAR_MAX; }
		template<> inline __cuda_callable__ short int tnlMaxValue< short int >() { return SHRT_MAX; }
		template<> inline __cuda_callable__ unsigned short int tnlMaxValue< unsigned short int >() { return USHRT_MAX; }
		template<> inline __cuda_callable__ int tnlMaxValue< int >() { return INT_MAX; }
		template<> inline __cuda_callable__ unsigned int tnlMaxValue< unsigned int >() { return UINT_MAX; }
		template<> inline __cuda_callable__ long int tnlMaxValue< long int >() { return LONG_MAX; }
		template<> inline __cuda_callable__ unsigned long int tnlMaxValue< unsigned long int >() { return ULONG_MAX; }
		template<> inline __cuda_callable__ float tnlMaxValue< float >() { return FLT_MAX; }
		template<> inline __cuda_callable__ double tnlMaxValue< double >() { return DBL_MAX; }
		template<> inline __cuda_callable__ long double tnlMaxValue< long double >() { return LDBL_MAX; }

src/core/cuda/tnlCudaReduction_impl.h

+2 −2

File changed.

Contains only whitespace changes.