From 15b5e2c40d9c1a68518c0f4408bb4ecdd1b56a36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Thu, 22 Aug 2019 19:31:20 +0200
Subject: [PATCH] Moved atomicAdd function from Devices/Cuda.h into Atomic.h

---
 src/TNL/Atomic.h            | 29 +++++++++++++++++++++++++++--
 src/TNL/Devices/Cuda.h      |  8 --------
 src/TNL/Devices/Cuda_impl.h | 26 --------------------------
 3 files changed, 27 insertions(+), 36 deletions(-)

diff --git a/src/TNL/Atomic.h b/src/TNL/Atomic.h
index 4855b8f90e..e84236287c 100644
--- a/src/TNL/Atomic.h
+++ b/src/TNL/Atomic.h
@@ -17,11 +17,36 @@
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 
+// double-precision atomicAdd function for Maxwell and older GPUs
+// copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions
+#ifdef HAVE_CUDA
+#if __CUDA_ARCH__ < 600
+namespace {
+   __device__ double atomicAdd(double* address, double val)
+   {
+       unsigned long long int* address_as_ull =
+                                 (unsigned long long int*)address;
+       unsigned long long int old = *address_as_ull, assumed;
+
+       do {
+           assumed = old;
+           old = atomicCAS(address_as_ull, assumed,
+                           __double_as_longlong(val +
+                                  __longlong_as_double(assumed)));
+
+       // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
+       } while (assumed != old);
+
+       return __longlong_as_double(old);
+   }
+} // namespace
+#endif
+#endif
+
 namespace TNL {
 
 template< typename T, typename Device >
-class Atomic
-{};
+class Atomic;
 
 template< typename T >
 class Atomic< T, Devices::Host >
diff --git a/src/TNL/Devices/Cuda.h b/src/TNL/Devices/Cuda.h
index fc924ccc9f..6784da34d9 100644
--- a/src/TNL/Devices/Cuda.h
+++ b/src/TNL/Devices/Cuda.h
@@ -36,14 +36,6 @@ public:
    static constexpr std::size_t TransferBufferSize = 5 * 2<<20;
 };
 
-#ifdef HAVE_CUDA
-#if __CUDA_ARCH__ < 600
-namespace {
-   __device__ double atomicAdd(double* address, double val);
-}
-#endif
-#endif
-
 } // namespace Devices
 } // namespace TNL
 
diff --git a/src/TNL/Devices/Cuda_impl.h b/src/TNL/Devices/Cuda_impl.h
index 7a4d59fcc0..5109f689e8 100644
--- a/src/TNL/Devices/Cuda_impl.h
+++ b/src/TNL/Devices/Cuda_impl.h
@@ -56,31 +56,5 @@ inline constexpr int Cuda::getGPUTransferBufferSize()
    return 1 << 20;
 }
 
-// double-precision atomicAdd function for Maxwell and older GPUs
-// copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions
-#ifdef HAVE_CUDA
-#if __CUDA_ARCH__ < 600
-namespace {
-   __device__ double atomicAdd(double* address, double val)
-   {
-       unsigned long long int* address_as_ull =
-                                 (unsigned long long int*)address;
-       unsigned long long int old = *address_as_ull, assumed;
-
-       do {
-           assumed = old;
-           old = atomicCAS(address_as_ull, assumed,
-                           __double_as_longlong(val +
-                                  __longlong_as_double(assumed)));
-
-       // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
-       } while (assumed != old);
-
-       return __longlong_as_double(old);
-   }
-} // namespace
-#endif
-#endif
-
 } // namespace Devices
 } // namespace TNL
-- 
GitLab