From 5dfb78d75e3ac5de6884a30b30d67efa9396ed6c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkjak@fjfi.cvut.cz>
Date: Tue, 16 Jul 2019 18:33:40 +0200
Subject: [PATCH] Revert "Fixed calculation of grids count in ParallelFor"

This reverts commit fa01e15be5b258386e890b6c51798d73b90eea33.
---
 src/TNL/ParallelFor.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/TNL/ParallelFor.h b/src/TNL/ParallelFor.h
index 4d5f466119..950a577bb4 100644
--- a/src/TNL/ParallelFor.h
+++ b/src/TNL/ParallelFor.h
@@ -205,7 +205,7 @@ struct ParallelFor< Devices::Cuda, Mode >
          dim3 gridSize;
          gridSize.x = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( end - start, blockSize.x ) );
 
-         if( Devices::Cuda::getNumberOfGrids( gridSize.x ) == 1 )
+         if( Devices::Cuda::getNumberOfGrids( end - start ) == 1 )
             ParallelForKernel< false ><<< gridSize, blockSize >>>( start, end, f, args... );
          else {
             // decrease the grid size and align to the number of multiprocessors
@@ -257,8 +257,8 @@ struct ParallelFor2D< Devices::Cuda, Mode >
          gridSize.y = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( sizeY, blockSize.y ) );
 
          dim3 gridCount;
-         gridCount.x = Devices::Cuda::getNumberOfGrids( gridSize.x );
-         gridCount.y = Devices::Cuda::getNumberOfGrids( gridSize.y );
+         gridCount.x = Devices::Cuda::getNumberOfGrids( sizeX );
+         gridCount.y = Devices::Cuda::getNumberOfGrids( sizeY );
 
          if( gridCount.x == 1 && gridCount.y == 1 )
             ParallelFor2DKernel< false, false ><<< gridSize, blockSize >>>
@@ -342,9 +342,9 @@ struct ParallelFor3D< Devices::Cuda, Mode >
          gridSize.z = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( sizeZ, blockSize.z ) );
 
          dim3 gridCount;
-         gridCount.x = Devices::Cuda::getNumberOfGrids( gridSize.x );
-         gridCount.y = Devices::Cuda::getNumberOfGrids( gridSize.y );
-         gridCount.z = Devices::Cuda::getNumberOfGrids( gridSize.z );
+         gridCount.x = Devices::Cuda::getNumberOfGrids( sizeX );
+         gridCount.y = Devices::Cuda::getNumberOfGrids( sizeY );
+         gridCount.z = Devices::Cuda::getNumberOfGrids( sizeZ );
 
          if( gridCount.x == 1 && gridCount.y == 1 && gridCount.z == 1 )
             ParallelFor3DKernel< false, false, false ><<< gridSize, blockSize >>>
-- 
GitLab