Skip to content
Snippets Groups Projects
Commit 5b9f09ad authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Added include of Math.h into ParallelFor.h

parent d1c11df4
No related branches found
No related tags found
No related merge requests found
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <TNL/Devices/Host.h> #include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h> #include <TNL/Devices/Cuda.h>
#include <TNL/Devices/CudaDeviceInfo.h> #include <TNL/Devices/CudaDeviceInfo.h>
#include <TNL/Math.h>
/* /*
* The implementation of ParallelFor is not meant to provide maximum performance * The implementation of ParallelFor is not meant to provide maximum performance
...@@ -72,7 +73,7 @@ struct ParallelFor< Devices::Cuda > ...@@ -72,7 +73,7 @@ struct ParallelFor< Devices::Cuda >
dim3 blockSize( 256 ); dim3 blockSize( 256 );
dim3 gridSize; dim3 gridSize;
const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() ); const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( end - start, blockSize.x ) ); gridSize.x = TNL::min( desGridSize, Devices::Cuda::getNumberOfBlocks( end - start, blockSize.x ) );
Devices::Cuda::synchronizeDevice(); Devices::Cuda::synchronizeDevice();
ParallelForKernel<<< gridSize, blockSize >>>( start, end, f, args... ); ParallelForKernel<<< gridSize, blockSize >>>( start, end, f, args... );
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment