diff --git a/src/TNL/Devices/Cuda.h b/src/TNL/Devices/Cuda.h index ff7471137fbac71b82c274c44dc2137aa099d7cb..71a372c9f80dd47276e7d88407470d449b00cb43 100644 --- a/src/TNL/Devices/Cuda.h +++ b/src/TNL/Devices/Cuda.h @@ -49,8 +49,9 @@ class Cuda static inline constexpr int getGPUTransferBufferSize(); #ifdef HAVE_CUDA - template< typename Index > - __device__ static Index getGlobalThreadIdx( const Index gridIdx = 0 ); + __device__ static inline int + getGlobalThreadIdx( const int gridIdx = 0, + const int gridSize = getMaxGridSize() ); #endif static int getNumberOfBlocks( const int threads, @@ -111,7 +112,7 @@ class Cuda */ static bool checkDevice( const char* file_name, int line, cudaError error ); #else - static bool checkDevice() { return false;}; + static bool checkDevice() { return false; }; #endif static void configSetup( Config::ConfigDescription& config, const String& prefix = "" ); @@ -132,8 +133,6 @@ class Cuda protected: static SmartPointersRegister smartPointersRegister; - - }; #ifdef HAVE_CUDA diff --git a/src/TNL/Devices/Cuda_impl.h b/src/TNL/Devices/Cuda_impl.h index 2b9cc4de2dc892b523b40458a854bb1d64b5d277..3dd7b45cc20cb957cbbd0f2d7c57f83dcce8a627 100644 --- a/src/TNL/Devices/Cuda_impl.h +++ b/src/TNL/Devices/Cuda_impl.h @@ -49,10 +49,9 @@ inline constexpr int Cuda::getGPUTransferBufferSize() } #ifdef HAVE_CUDA -template< typename Index > -__device__ Index Cuda::getGlobalThreadIdx( const Index gridIdx ) +__device__ inline int Cuda::getGlobalThreadIdx( const int gridIdx, const int gridSize ) { - return ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + return ( gridIdx * gridSize + blockIdx.x ) * blockDim.x + threadIdx.x; } #endif