Commit 592f6355 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Small fixes in parallel reduction

parent 639df716
Loading
Loading
Loading
Loading
+7 −7
Original line number Diff line number Diff line
@@ -57,9 +57,9 @@ CudaReductionKernel( Operation operation,
    * gridSize is the number of element processed by all blocks at the
    * same time.
    */
   IndexType tid = threadIdx. x;
   const IndexType tid = threadIdx.x;
         IndexType gid = blockIdx.x * blockDim. x + threadIdx.x;
   IndexType gridSize = blockDim. x * gridDim.x;
   const IndexType gridSize = blockDim.x * gridDim.x;

   sdata[ tid ] = operation.initialValue();
   /***
+7 −7
Original line number Diff line number Diff line
@@ -103,7 +103,7 @@ reduce( Operation& operation,
                                                                   deviceAux1 );
   #ifdef CUDA_REDUCTION_PROFILING
      timer.stop();
      cout << "   Multireduction of " << n << " datasets on GPU to size " << reducedSize << " took " << timer.getRealTime() << " sec. " << endl;
      std::cout << "   Multireduction of " << n << " datasets on GPU to size " << reducedSize << " took " << timer.getRealTime() << " sec. " << std::endl;
      timer.reset();
      timer.start();
   #endif
@@ -117,18 +117,18 @@ reduce( Operation& operation,

   #ifdef CUDA_REDUCTION_PROFILING
      timer.stop();
      cout << "   Transferring data to CPU took " << timer.getRealTime() << " sec. " << endl;
      std::cout << "   Transferring data to CPU took " << timer.getRealTime() << " sec. " << std::endl;
      timer.reset();
      timer.start();
   #endif

//   cout << "resultArray = [";
//   std::cout << "resultArray = [";
//   for( int i = 0; i < n * reducedSize; i++ ) {
//      cout << resultArray[ i ];
//      std::cout << resultArray[ i ];
//      if( i < n * reducedSize - 1 )
//         cout << ", ";
//         std::cout << ", ";
//   }
//   cout << "]" << endl;
//   std::cout << "]" << std::endl;

   /***
    * Reduce the data on the host system.
@@ -138,7 +138,7 @@ reduce( Operation& operation,

   #ifdef CUDA_REDUCTION_PROFILING
      timer.stop();
      cout << "   Multireduction of small data set on CPU took " << timer.getRealTime() << " sec. " << endl;
      std::cout << "   Multireduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl;
   #endif

   return checkCudaDevice;
+0 −3
Original line number Diff line number Diff line
@@ -36,9 +36,6 @@ namespace Algorithms {
 */
const int minGPUReductionDataSize = 256;//65536; //16384;//1024;//256;

#ifdef HAVE_CUDA
#endif

template< typename Operation >
bool
reductionOnCudaDevice( Operation& operation,