Commit a6d80b09 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Adding todo note about a 'bug' in parallel reduction in CUDA.

parent cd6c5192
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -132,6 +132,8 @@ CudaReductionKernel( Operation operation,
         operation.commonReductionOnDevice( vsdata[ tid ], vsdata[ tid + 32 ] );
         //printf( "4: tid %d data %f \n", tid, sdata[ tid ] );
      }
      // TODO: If blocksize == 32, the following does not work
      // We do not check if tid < 16. Fix it!!!
      if( blockSize >= 32 )
      {
         operation.commonReductionOnDevice( vsdata[ tid ], vsdata[ tid + 16 ] );