Loading src/TNL/Containers/Algorithms/CudaPrefixSumKernel.h +9 −8 Original line number Diff line number Diff line Loading @@ -231,6 +231,7 @@ struct CudaPrefixSumKernelLauncher output, auxArray1.getData(), gridShift ); cudaStreamSynchronize(0); TNL_CHECK_CUDA_DEVICE; Loading Loading @@ -260,6 +261,7 @@ struct CudaPrefixSumKernelLauncher gridShift, auxArray2.getData(), output ); cudaStreamSynchronize(0); TNL_CHECK_CUDA_DEVICE; cudaMemcpy( &gridShift, Loading Loading @@ -323,11 +325,10 @@ struct CudaPrefixSumKernelLauncher gridShift, &deviceInput[ gridOffset ], &deviceOutput[ gridOffset ] ); TNL_CHECK_CUDA_DEVICE; } /*** * Store the number of CUDA grids for a purpose of unit testing, i.e. * Store the number of CUDA grids for the purpose of unit testing, i.e. * to check if we test the algorithm with more than one CUDA grid. */ gridsCount = numberOfGrids; Loading Loading
src/TNL/Containers/Algorithms/CudaPrefixSumKernel.h +9 −8 Original line number Diff line number Diff line Loading @@ -231,6 +231,7 @@ struct CudaPrefixSumKernelLauncher output, auxArray1.getData(), gridShift ); cudaStreamSynchronize(0); TNL_CHECK_CUDA_DEVICE; Loading Loading @@ -260,6 +261,7 @@ struct CudaPrefixSumKernelLauncher gridShift, auxArray2.getData(), output ); cudaStreamSynchronize(0); TNL_CHECK_CUDA_DEVICE; cudaMemcpy( &gridShift, Loading Loading @@ -323,11 +325,10 @@ struct CudaPrefixSumKernelLauncher gridShift, &deviceInput[ gridOffset ], &deviceOutput[ gridOffset ] ); TNL_CHECK_CUDA_DEVICE; } /*** * Store the number of CUDA grids for a purpose of unit testing, i.e. * Store the number of CUDA grids for the purpose of unit testing, i.e. * to check if we test the algorithm with more than one CUDA grid. */ gridsCount = numberOfGrids; Loading