Style changes in cuda-reduction_impl.h (10d56a85) · Commits · TNL / tnl-dev

src/TNL/Containers/Algorithms/cuda-reduction_impl.h

+21 −18

Original line number	Diff line number	Diff line
		@@ -15,7 +15,6 @@
		#ifdef HAVE_CUDA
		#include <cuda.h>
		#endif
		#include <iostream>
		#include <TNL/Assert.h>
		#include <TNL/Containers/Algorithms/reduction-operations.h>
		#include <TNL/Containers/ArrayOperations.h>
		@@ -24,6 +23,7 @@
		#include <TNL/Containers/Algorithms/CudaReduction.h>

		#ifdef CUDA_REDUCTION_PROFILING
		#include <iostream>
		#include <TNL/Timer.h>
		#endif

		@@ -42,7 +42,8 @@ const int minGPUReductionDataSize = 256;//65536; //16384;//1024;//256;
		#ifdef HAVE_CUDA

		template< typename Operation, int blockSize >
		__global__ void CudaReductionKernel( Operation operation,
		__global__ void
		CudaReductionKernel( Operation operation,
		const typename Operation::IndexType size,
		const typename Operation::RealType* input1,
		const typename Operation::RealType* input2,
		@@ -53,7 +54,8 @@ __global__ void CudaReductionKernel( Operation operation,
		};

		template< typename Operation >
		typename Operation::IndexType reduceOnCudaDevice( Operation& operation,
		typename Operation::IndexType
		reduceOnCudaDevice( Operation& operation,
		const typename Operation::IndexType size,
		const typename Operation::RealType* input1,
		const typename Operation::RealType* input2,
		@@ -127,7 +129,8 @@ typename Operation::IndexType reduceOnCudaDevice( Operation& operation,
		#endif

		template< typename Operation >
		bool reductionOnCudaDevice( Operation& operation,
		bool
		reductionOnCudaDevice( Operation& operation,
		const typename Operation::IndexType size,
		const typename Operation::RealType* deviceInput1,
		const typename Operation::RealType* deviceInput2,
		@@ -144,10 +147,10 @@ bool reductionOnCudaDevice( Operation& operation,
		* First check if the input array(s) is/are large enough for the reduction on GPU.
		* Otherwise copy it/them to host and reduce on CPU.
		*/
		RealType hostArray1[ minGPUReductionDataSize ];
		RealType hostArray2[ minGPUReductionDataSize ];
		if( size <= minGPUReductionDataSize )
		{
		RealType hostArray1[ minGPUReductionDataSize ];
		RealType hostArray2[ minGPUReductionDataSize ];
		if( ! Containers::ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< RealType, RealType, IndexType >( hostArray1, deviceInput1, size ) )
		return false;
		if( deviceInput2 && !