Moved atomicAdd function from Devices/Cuda.h into Atomic.h (15b5e2c4) · Commits · TNL / tnl-dev

src/TNL/Atomic.h

+27 −2

Original line number	Diff line number	Diff line
		@@ -17,11 +17,36 @@
		#include <TNL/Devices/Host.h>
		#include <TNL/Devices/Cuda.h>

		// double-precision atomicAdd function for Maxwell and older GPUs
		// copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions
		#ifdef HAVE_CUDA
		#if __CUDA_ARCH__ < 600
		namespace {
		__device__ double atomicAdd(double* address, double val)
		{
		unsigned long long int* address_as_ull =
		(unsigned long long int*)address;
		unsigned long long int old = *address_as_ull, assumed;

		do {
		assumed = old;
		old = atomicCAS(address_as_ull, assumed,
		__double_as_longlong(val +
		__longlong_as_double(assumed)));

		// Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
		} while (assumed != old);

		return __longlong_as_double(old);
		}
		} // namespace
		#endif
		#endif

		namespace TNL {

		template< typename T, typename Device >
		class Atomic
		{};
		class Atomic;

		template< typename T >
		class Atomic< T, Devices::Host >

+0 −8

Original line number	Diff line number	Diff line
		@@ -36,14 +36,6 @@ public:
		static constexpr std::size_t TransferBufferSize = 5 * 2<<20;
		};

		#ifdef HAVE_CUDA
		#if __CUDA_ARCH__ < 600
		namespace {
		__device__ double atomicAdd(double* address, double val);
		}
		#endif
		#endif

		} // namespace Devices
		} // namespace TNL

+0 −26

Original line number	Diff line number	Diff line
		@@ -56,31 +56,5 @@ inline constexpr int Cuda::getGPUTransferBufferSize()
		return 1 << 20;
		}

		// double-precision atomicAdd function for Maxwell and older GPUs
		// copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions
		#ifdef HAVE_CUDA
		#if __CUDA_ARCH__ < 600
		namespace {
		__device__ double atomicAdd(double* address, double val)
		{
		unsigned long long int* address_as_ull =
		(unsigned long long int*)address;
		unsigned long long int old = *address_as_ull, assumed;

		do {
		assumed = old;
		old = atomicCAS(address_as_ull, assumed,
		__double_as_longlong(val +
		__longlong_as_double(assumed)));

		// Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
		} while (assumed != old);

		return __longlong_as_double(old);
		}
		} // namespace
		#endif
		#endif

		} // namespace Devices
		} // namespace TNL