From 15b5e2c40d9c1a68518c0f4408bb4ecdd1b56a36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz> Date: Thu, 22 Aug 2019 19:31:20 +0200 Subject: [PATCH] Moved atomicAdd function from Devices/Cuda.h into Atomic.h --- src/TNL/Atomic.h | 29 +++++++++++++++++++++++++++-- src/TNL/Devices/Cuda.h | 8 -------- src/TNL/Devices/Cuda_impl.h | 26 -------------------------- 3 files changed, 27 insertions(+), 36 deletions(-) diff --git a/src/TNL/Atomic.h b/src/TNL/Atomic.h index 4855b8f90e..e84236287c 100644 --- a/src/TNL/Atomic.h +++ b/src/TNL/Atomic.h @@ -17,11 +17,36 @@ #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> +// double-precision atomicAdd function for Maxwell and older GPUs +// copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions +#ifdef HAVE_CUDA +#if __CUDA_ARCH__ < 600 +namespace { + __device__ double atomicAdd(double* address, double val) + { + unsigned long long int* address_as_ull = + (unsigned long long int*)address; + unsigned long long int old = *address_as_ull, assumed; + + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __double_as_longlong(val + + __longlong_as_double(assumed))); + + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } while (assumed != old); + + return __longlong_as_double(old); + } +} // namespace +#endif +#endif + namespace TNL { template< typename T, typename Device > -class Atomic -{}; +class Atomic; template< typename T > class Atomic< T, Devices::Host > diff --git a/src/TNL/Devices/Cuda.h b/src/TNL/Devices/Cuda.h index fc924ccc9f..6784da34d9 100644 --- a/src/TNL/Devices/Cuda.h +++ b/src/TNL/Devices/Cuda.h @@ -36,14 +36,6 @@ public: static constexpr std::size_t TransferBufferSize = 5 * 2<<20; }; -#ifdef HAVE_CUDA -#if __CUDA_ARCH__ < 600 -namespace { - __device__ double atomicAdd(double* address, double val); -} -#endif -#endif - } // namespace Devices } // namespace TNL diff --git a/src/TNL/Devices/Cuda_impl.h b/src/TNL/Devices/Cuda_impl.h index 7a4d59fcc0..5109f689e8 100644 --- a/src/TNL/Devices/Cuda_impl.h +++ b/src/TNL/Devices/Cuda_impl.h @@ -56,31 +56,5 @@ inline constexpr int Cuda::getGPUTransferBufferSize() return 1 << 20; } -// double-precision atomicAdd function for Maxwell and older GPUs -// copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions -#ifdef HAVE_CUDA -#if __CUDA_ARCH__ < 600 -namespace { - __device__ double atomicAdd(double* address, double val) - { - unsigned long long int* address_as_ull = - (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed, - __double_as_longlong(val + - __longlong_as_double(assumed))); - - // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) - } while (assumed != old); - - return __longlong_as_double(old); - } -} // namespace -#endif -#endif - } // namespace Devices } // namespace TNL -- GitLab