From f77a6d82ac95538cec405e7fe19f1c127806b53b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkjak@fjfi.cvut.cz> Date: Sat, 19 Nov 2016 14:12:51 +0100 Subject: [PATCH] Added constexpr to some Cuda methods --- src/TNL/Devices/Cuda.cpp | 5 ----- src/TNL/Devices/Cuda.h | 14 +++++++------- src/TNL/Devices/Cuda_impl.h | 25 +++++++++++++++---------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/TNL/Devices/Cuda.cpp b/src/TNL/Devices/Cuda.cpp index 11301ef868..30ce841c4c 100644 --- a/src/TNL/Devices/Cuda.cpp +++ b/src/TNL/Devices/Cuda.cpp @@ -26,11 +26,6 @@ String Cuda::getDeviceType() return String( "Cuda" ); } -int Cuda::getGPUTransferBufferSize() -{ - return 1 << 20; -} - int Cuda::getNumberOfBlocks( const int threads, const int blockSize ) { diff --git a/src/TNL/Devices/Cuda.h b/src/TNL/Devices/Cuda.h index 4a09f0cd81..b83e0bb119 100644 --- a/src/TNL/Devices/Cuda.h +++ b/src/TNL/Devices/Cuda.h @@ -38,11 +38,15 @@ class Cuda static String getDeviceType(); - __cuda_callable__ static inline int getMaxGridSize(); + __cuda_callable__ static inline constexpr int getMaxGridSize(); - __cuda_callable__ static inline int getMaxBlockSize(); + __cuda_callable__ static inline constexpr int getMaxBlockSize(); - __cuda_callable__ static inline int getWarpSize(); + __cuda_callable__ static inline constexpr int getWarpSize(); + + __cuda_callable__ static inline constexpr int getNumberOfSharedMemoryBanks(); + + static inline constexpr int getGPUTransferBufferSize(); #ifdef HAVE_CUDA static int getDeviceId(); @@ -51,10 +55,6 @@ class Cuda __device__ static Index getGlobalThreadIdx( const Index gridIdx = 0 ); #endif - __cuda_callable__ static inline int getNumberOfSharedMemoryBanks(); - - static int getGPUTransferBufferSize(); - static int getNumberOfBlocks( const int threads, const int blockSize ); diff --git a/src/TNL/Devices/Cuda_impl.h b/src/TNL/Devices/Cuda_impl.h index 580653b708..2b9cc4de2d 100644 --- a/src/TNL/Devices/Cuda_impl.h +++ b/src/TNL/Devices/Cuda_impl.h @@ -16,26 +16,38 @@ namespace TNL { namespace Devices { __cuda_callable__ -inline int Cuda::getMaxGridSize() +inline constexpr int Cuda::getMaxGridSize() { // TODO: make it preprocessor macro constant defined in tnlConfig return 65535; }; __cuda_callable__ -inline int Cuda::getMaxBlockSize() +inline constexpr int Cuda::getMaxBlockSize() { // TODO: make it preprocessor macro constant defined in tnlConfig return 1024; }; __cuda_callable__ -inline int Cuda::getWarpSize() +inline constexpr int Cuda::getWarpSize() { // TODO: make it preprocessor macro constant defined in tnlConfig return 32; } +__cuda_callable__ +inline constexpr int Cuda::getNumberOfSharedMemoryBanks() +{ + // TODO: make it preprocessor macro constant defined in tnlConfig + return 32; +} + +inline constexpr int Cuda::getGPUTransferBufferSize() +{ + return 1 << 20; +} + #ifdef HAVE_CUDA template< typename Index > __device__ Index Cuda::getGlobalThreadIdx( const Index gridIdx ) @@ -45,13 +57,6 @@ __device__ Index Cuda::getGlobalThreadIdx( const Index gridIdx ) #endif -__cuda_callable__ -inline int Cuda::getNumberOfSharedMemoryBanks() -{ - // TODO: make it preprocessor macro constant defined in tnlConfig - return 32; -} - template< typename ObjectType > ObjectType* Cuda::passToDevice( const ObjectType& object ) { -- GitLab