From 14af6ad7e3af75166337063de6db365681868b2a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkjak@fjfi.cvut.cz>
Date: Sat, 19 Nov 2016 14:31:08 +0100
Subject: [PATCH] Fixed Cuda::getGlobalThreadIdx method

Fortunately it's not used anywhere at the moment...
---
 src/TNL/Devices/Cuda.h      | 9 ++++-----
 src/TNL/Devices/Cuda_impl.h | 5 ++---
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/TNL/Devices/Cuda.h b/src/TNL/Devices/Cuda.h
index ff7471137f..71a372c9f8 100644
--- a/src/TNL/Devices/Cuda.h
+++ b/src/TNL/Devices/Cuda.h
@@ -49,8 +49,9 @@ class Cuda
    static inline constexpr int getGPUTransferBufferSize();
 
 #ifdef HAVE_CUDA
-   template< typename Index >
-   __device__ static Index getGlobalThreadIdx( const Index gridIdx = 0 );
+   __device__ static inline int
+   getGlobalThreadIdx( const int gridIdx = 0,
+                       const int gridSize = getMaxGridSize() );
 #endif
 
    static int getNumberOfBlocks( const int threads,
@@ -111,7 +112,7 @@ class Cuda
     */
    static bool checkDevice( const char* file_name, int line, cudaError error );
 #else
-   static bool checkDevice() { return false;};
+   static bool checkDevice() { return false; };
 #endif
    
    static void configSetup( Config::ConfigDescription& config, const String& prefix = "" );
@@ -132,8 +133,6 @@ class Cuda
    protected:
    
    static SmartPointersRegister smartPointersRegister;
-   
-   
 };
 
 #ifdef HAVE_CUDA
diff --git a/src/TNL/Devices/Cuda_impl.h b/src/TNL/Devices/Cuda_impl.h
index 2b9cc4de2d..3dd7b45cc2 100644
--- a/src/TNL/Devices/Cuda_impl.h
+++ b/src/TNL/Devices/Cuda_impl.h
@@ -49,10 +49,9 @@ inline constexpr int Cuda::getGPUTransferBufferSize()
 }
 
 #ifdef HAVE_CUDA
-template< typename Index >
-__device__ Index Cuda::getGlobalThreadIdx( const Index gridIdx )
+__device__ inline int Cuda::getGlobalThreadIdx( const int gridIdx, const int gridSize )
 {
-   return ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   return ( gridIdx * gridSize + blockIdx.x ) * blockDim.x + threadIdx.x;
 }
 #endif
 
-- 
GitLab