From c81cd235285bf9267888d12db3ddfaa8d039a1ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkjak@fjfi.cvut.cz> Date: Tue, 3 Apr 2018 10:41:03 +0200 Subject: [PATCH] Disabled CUDA device synchronization before ParallelFor2D and 3D kernels --- src/TNL/ParallelFor.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/TNL/ParallelFor.h b/src/TNL/ParallelFor.h index ec66906a5e..54a3a86412 100644 --- a/src/TNL/ParallelFor.h +++ b/src/TNL/ParallelFor.h @@ -193,7 +193,7 @@ struct ParallelFor2D< Devices::Cuda > dim3 blockSize; if( sizeX >= sizeY * sizeY ) { - blockSize.x = TNL::min( 256, sizeX ); + blockSize.x = TNL::min( 128, sizeX ); blockSize.y = 1; } else if( sizeY >= sizeX * sizeX ) { @@ -212,7 +212,7 @@ struct ParallelFor2D< Devices::Cuda > gridCount.x = Devices::Cuda::getNumberOfGrids( sizeX ); gridCount.y = Devices::Cuda::getNumberOfGrids( sizeY ); - Devices::Cuda::synchronizeDevice(); +// Devices::Cuda::synchronizeDevice(); if( gridCount.x == 1 && gridCount.y == 1 ) ParallelFor2DKernel< false, false ><<< gridSize, blockSize >>> @@ -281,7 +281,7 @@ struct ParallelFor3D< Devices::Cuda > gridCount.y = Devices::Cuda::getNumberOfGrids( sizeY ); gridCount.z = Devices::Cuda::getNumberOfGrids( sizeZ ); - Devices::Cuda::synchronizeDevice(); +// Devices::Cuda::synchronizeDevice(); if( gridCount.x == 1 && gridCount.y == 1 && gridCount.z == 1 ) ParallelFor3DKernel< false, false, false ><<< gridSize, blockSize >>> -- GitLab