Commit 10d56a85 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Style changes in cuda-reduction_impl.h

parent 95f376b0
Loading
Loading
Loading
Loading
+21 −18
Original line number Diff line number Diff line
@@ -15,7 +15,6 @@
#ifdef HAVE_CUDA
#include <cuda.h>
#endif
#include <iostream>
#include <TNL/Assert.h>
#include <TNL/Containers/Algorithms/reduction-operations.h>
#include <TNL/Containers/ArrayOperations.h>
@@ -24,6 +23,7 @@
#include <TNL/Containers/Algorithms/CudaReduction.h>

#ifdef CUDA_REDUCTION_PROFILING
#include <iostream>
#include <TNL/Timer.h>
#endif

@@ -42,7 +42,8 @@ const int minGPUReductionDataSize = 256;//65536; //16384;//1024;//256;
#ifdef HAVE_CUDA

template< typename Operation, int blockSize >
__global__ void CudaReductionKernel( Operation operation,
__global__ void
CudaReductionKernel( Operation operation,
                     const typename Operation::IndexType size,
                     const typename Operation::RealType* input1,
                     const typename Operation::RealType* input2,
@@ -53,7 +54,8 @@ __global__ void CudaReductionKernel( Operation operation,
};

template< typename Operation >
typename Operation::IndexType reduceOnCudaDevice( Operation& operation,
typename Operation::IndexType
reduceOnCudaDevice( Operation& operation,
                    const typename Operation::IndexType size,
                    const typename Operation::RealType* input1,
                    const typename Operation::RealType* input2,
@@ -127,7 +129,8 @@ typename Operation::IndexType reduceOnCudaDevice( Operation& operation,
#endif

template< typename Operation >
bool reductionOnCudaDevice( Operation& operation,
bool
reductionOnCudaDevice( Operation& operation,
                       const typename Operation::IndexType size,
                       const typename Operation::RealType* deviceInput1,
                       const typename Operation::RealType* deviceInput2,
@@ -144,10 +147,10 @@ bool reductionOnCudaDevice( Operation& operation,
    * First check if the input array(s) is/are large enough for the reduction on GPU.
    * Otherwise copy it/them to host and reduce on CPU.
    */
   RealType hostArray1[ minGPUReductionDataSize ];
   RealType hostArray2[ minGPUReductionDataSize ];
   if( size <= minGPUReductionDataSize )
   {
      RealType hostArray1[ minGPUReductionDataSize ];
      RealType hostArray2[ minGPUReductionDataSize ];
      if( ! Containers::ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< RealType, RealType, IndexType >( hostArray1, deviceInput1, size ) )
         return false;
      if( deviceInput2 && !