Skip to content
Snippets Groups Projects
Commit 43c2a7e1 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Added missing synchronizations after CUDA kernel launches in ArrayOperations

parent 96788ec2
No related branches found
No related tags found
No related merge requests found
......@@ -108,10 +108,11 @@ setMemory( Element* data,
TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
#ifdef HAVE_CUDA
dim3 blockSize( 0 ), gridSize( 0 );
blockSize. x = 256;
Index blocksNumber = TNL::ceil( ( double ) size / ( double ) blockSize. x );
gridSize. x = TNL::min( blocksNumber, Devices::Cuda::getMaxGridSize() );
blockSize.x = 256;
Index blocksNumber = TNL::ceil( ( double ) size / ( double ) blockSize.x );
gridSize.x = TNL::min( blocksNumber, Devices::Cuda::getMaxGridSize() );
setArrayValueCudaKernel<<< gridSize, blockSize >>>( data, size, value );
cudaStreamSynchronize(0);
TNL_CHECK_CUDA_DEVICE;
#else
throw Exceptions::CudaSupportMissing();
......@@ -160,10 +161,11 @@ copyMemory( DestinationElement* destination,
else
{
dim3 blockSize( 0 ), gridSize( 0 );
blockSize. x = 256;
Index blocksNumber = TNL::ceil( ( double ) size / ( double ) blockSize. x );
gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() );
blockSize.x = 256;
Index blocksNumber = TNL::ceil( ( double ) size / ( double ) blockSize.x );
gridSize.x = min( blocksNumber, Devices::Cuda::getMaxGridSize() );
copyMemoryCudaToCudaKernel<<< gridSize, blockSize >>>( destination, source, size );
cudaStreamSynchronize(0);
TNL_CHECK_CUDA_DEVICE;
}
#else
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment