Commit 15b5e2c4 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Moved atomicAdd function from Devices/Cuda.h into Atomic.h

parent 1743358a
Loading
Loading
Loading
Loading
+27 −2
Original line number Diff line number Diff line
@@ -17,11 +17,36 @@
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>

// double-precision atomicAdd function for Maxwell and older GPUs
// copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions
#ifdef HAVE_CUDA
#if __CUDA_ARCH__ < 600
namespace {
   __device__ double atomicAdd(double* address, double val)
   {
       unsigned long long int* address_as_ull =
                                 (unsigned long long int*)address;
       unsigned long long int old = *address_as_ull, assumed;

       do {
           assumed = old;
           old = atomicCAS(address_as_ull, assumed,
                           __double_as_longlong(val +
                                  __longlong_as_double(assumed)));

       // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
       } while (assumed != old);

       return __longlong_as_double(old);
   }
} // namespace
#endif
#endif

namespace TNL {

template< typename T, typename Device >
class Atomic
{};
class Atomic;

template< typename T >
class Atomic< T, Devices::Host >
+0 −8
Original line number Diff line number Diff line
@@ -36,14 +36,6 @@ public:
   static constexpr std::size_t TransferBufferSize = 5 * 2<<20;
};

#ifdef HAVE_CUDA
#if __CUDA_ARCH__ < 600
namespace {
   __device__ double atomicAdd(double* address, double val);
}
#endif
#endif

} // namespace Devices
} // namespace TNL

+0 −26
Original line number Diff line number Diff line
@@ -56,31 +56,5 @@ inline constexpr int Cuda::getGPUTransferBufferSize()
   return 1 << 20;
}

// double-precision atomicAdd function for Maxwell and older GPUs
// copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions
#ifdef HAVE_CUDA
#if __CUDA_ARCH__ < 600
namespace {
   __device__ double atomicAdd(double* address, double val)
   {
       unsigned long long int* address_as_ull =
                                 (unsigned long long int*)address;
       unsigned long long int old = *address_as_ull, assumed;

       do {
           assumed = old;
           old = atomicCAS(address_as_ull, assumed,
                           __double_as_longlong(val +
                                  __longlong_as_double(assumed)));

       // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
       } while (assumed != old);

       return __longlong_as_double(old);
   }
} // namespace
#endif
#endif

} // namespace Devices
} // namespace TNL