Skip to content
Snippets Groups Projects
Commit 15b5e2c4 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Moved atomicAdd function from Devices/Cuda.h into Atomic.h

parent 1743358a
No related branches found
No related tags found
1 merge request!42Refactoring for execution policies
......@@ -17,11 +17,36 @@
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
// double-precision atomicAdd function for Maxwell and older GPUs
// copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions
#ifdef HAVE_CUDA
#if __CUDA_ARCH__ < 600
namespace {
__device__ double atomicAdd(double* address, double val)
{
unsigned long long int* address_as_ull =
(unsigned long long int*)address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = atomicCAS(address_as_ull, assumed,
__double_as_longlong(val +
__longlong_as_double(assumed)));
// Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
} while (assumed != old);
return __longlong_as_double(old);
}
} // namespace
#endif
#endif
namespace TNL {
template< typename T, typename Device >
class Atomic
{};
class Atomic;
template< typename T >
class Atomic< T, Devices::Host >
......
......@@ -36,14 +36,6 @@ public:
static constexpr std::size_t TransferBufferSize = 5 * 2<<20;
};
#ifdef HAVE_CUDA
#if __CUDA_ARCH__ < 600
namespace {
__device__ double atomicAdd(double* address, double val);
}
#endif
#endif
} // namespace Devices
} // namespace TNL
......
......@@ -56,31 +56,5 @@ inline constexpr int Cuda::getGPUTransferBufferSize()
return 1 << 20;
}
// double-precision atomicAdd function for Maxwell and older GPUs
// copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions
#ifdef HAVE_CUDA
#if __CUDA_ARCH__ < 600
namespace {
__device__ double atomicAdd(double* address, double val)
{
unsigned long long int* address_as_ull =
(unsigned long long int*)address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = atomicCAS(address_as_ull, assumed,
__double_as_longlong(val +
__longlong_as_double(assumed)));
// Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
} while (assumed != old);
return __longlong_as_double(old);
}
} // namespace
#endif
#endif
} // namespace Devices
} // namespace TNL
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment