Commit da7148c4 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Added global synchronization after the launch of ParallelForKernel

parent 36b81712
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -76,6 +76,8 @@ struct ParallelFor< Devices::Cuda >

         Devices::Cuda::synchronizeDevice();
         ParallelForKernel<<< gridSize, blockSize >>>( start, end, f, args... );
         cudaDeviceSynchronize();
         TNL_CHECK_CUDA_DEVICE;
      }
#endif
   }