Commit af6d1d6b authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Added default stream synchronizations after kernel launches in CudaPrefixSumKernel.h

parent 8d0d2638
Loading
Loading
Loading
Loading
+9 −8
Original line number Diff line number Diff line
@@ -231,6 +231,7 @@ struct CudaPrefixSumKernelLauncher
           output,
           auxArray1.getData(),
           gridShift );
      cudaStreamSynchronize(0);
      TNL_CHECK_CUDA_DEVICE;


@@ -260,6 +261,7 @@ struct CudaPrefixSumKernelLauncher
           gridShift,
           auxArray2.getData(),
           output );
      cudaStreamSynchronize(0);
      TNL_CHECK_CUDA_DEVICE;

      cudaMemcpy( &gridShift,
@@ -323,11 +325,10 @@ struct CudaPrefixSumKernelLauncher
            gridShift,
            &deviceInput[ gridOffset ],
            &deviceOutput[ gridOffset ] );
         TNL_CHECK_CUDA_DEVICE;
      }

      /***
       * Store the number of CUDA grids for a purpose of unit testing, i.e.
       * Store the number of CUDA grids for the purpose of unit testing, i.e.
       * to check if we test the algorithm with more than one CUDA grid.
       */
      gridsCount = numberOfGrids;