diff --git a/src/TNL/Devices/Cuda.cpp b/src/TNL/Devices/Cuda.cpp index 11301ef8687534f30202526bdfd59a06d142f530..30ce841c4c6b428590325d9b5a31cf7500c1c595 100644 --- a/src/TNL/Devices/Cuda.cpp +++ b/src/TNL/Devices/Cuda.cpp @@ -26,11 +26,6 @@ String Cuda::getDeviceType() return String( "Cuda" ); } -int Cuda::getGPUTransferBufferSize() -{ - return 1 << 20; -} - int Cuda::getNumberOfBlocks( const int threads, const int blockSize ) { diff --git a/src/TNL/Devices/Cuda.h b/src/TNL/Devices/Cuda.h index 4a09f0cd81c6028199bde20750034a28418f6e4c..b83e0bb119c5e8b03b4dff1497f5892b7cfd28e8 100644 --- a/src/TNL/Devices/Cuda.h +++ b/src/TNL/Devices/Cuda.h @@ -38,11 +38,15 @@ class Cuda static String getDeviceType(); - __cuda_callable__ static inline int getMaxGridSize(); + __cuda_callable__ static inline constexpr int getMaxGridSize(); - __cuda_callable__ static inline int getMaxBlockSize(); + __cuda_callable__ static inline constexpr int getMaxBlockSize(); - __cuda_callable__ static inline int getWarpSize(); + __cuda_callable__ static inline constexpr int getWarpSize(); + + __cuda_callable__ static inline constexpr int getNumberOfSharedMemoryBanks(); + + static inline constexpr int getGPUTransferBufferSize(); #ifdef HAVE_CUDA static int getDeviceId(); @@ -51,10 +55,6 @@ class Cuda __device__ static Index getGlobalThreadIdx( const Index gridIdx = 0 ); #endif - __cuda_callable__ static inline int getNumberOfSharedMemoryBanks(); - - static int getGPUTransferBufferSize(); - static int getNumberOfBlocks( const int threads, const int blockSize ); diff --git a/src/TNL/Devices/Cuda_impl.h b/src/TNL/Devices/Cuda_impl.h index 580653b708ec95ba88e312c7a492eaafd5483757..2b9cc4de2dc892b523b40458a854bb1d64b5d277 100644 --- a/src/TNL/Devices/Cuda_impl.h +++ b/src/TNL/Devices/Cuda_impl.h @@ -16,26 +16,38 @@ namespace TNL { namespace Devices { __cuda_callable__ -inline int Cuda::getMaxGridSize() +inline constexpr int Cuda::getMaxGridSize() { // TODO: make it preprocessor macro constant defined in tnlConfig return 65535; }; __cuda_callable__ -inline int Cuda::getMaxBlockSize() +inline constexpr int Cuda::getMaxBlockSize() { // TODO: make it preprocessor macro constant defined in tnlConfig return 1024; }; __cuda_callable__ -inline int Cuda::getWarpSize() +inline constexpr int Cuda::getWarpSize() { // TODO: make it preprocessor macro constant defined in tnlConfig return 32; } +__cuda_callable__ +inline constexpr int Cuda::getNumberOfSharedMemoryBanks() +{ + // TODO: make it preprocessor macro constant defined in tnlConfig + return 32; +} + +inline constexpr int Cuda::getGPUTransferBufferSize() +{ + return 1 << 20; +} + #ifdef HAVE_CUDA template< typename Index > __device__ Index Cuda::getGlobalThreadIdx( const Index gridIdx ) @@ -45,13 +57,6 @@ __device__ Index Cuda::getGlobalThreadIdx( const Index gridIdx ) #endif -__cuda_callable__ -inline int Cuda::getNumberOfSharedMemoryBanks() -{ - // TODO: make it preprocessor macro constant defined in tnlConfig - return 32; -} - template< typename ObjectType > ObjectType* Cuda::passToDevice( const ObjectType& object ) {