Commit 69488aac authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Moved Cuda::configSetup and Cuda::setup from Cuda.cpp to Cuda_impl.h

This will fix the bugs described in the FIXME notes.

Compilation fails due to cyclic dependency between the headers, it will
be fixed later.
parent 674312d4
Loading
Loading
Loading
Loading
+0 −30
Original line number Diff line number Diff line
@@ -10,8 +10,6 @@

#include <TNL/Devices/Cuda.h>
#include <TNL/Math.h>
#include <TNL/Config/ConfigDescription.h>
#include <TNL/Config/ParameterContainer.h>
#include <TNL/Devices/CudaDeviceInfo.h>

namespace TNL {
@@ -37,34 +35,6 @@ int Cuda::getNumberOfGrids( const int blocks,
   return roundUpDivision( blocks, gridSize );
}

void Cuda::configSetup( Config::ConfigDescription& config,
                        const String& prefix )
{
// FIXME: HAVE_CUDA is never defined in .cpp files
#ifdef HAVE_CUDA
   config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device to run the computation.", 0 );
#else
   config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device to run the computation (not supported on this system).", 0 );
#endif
}

bool Cuda::setup( const Config::ParameterContainer& parameters,
                  const String& prefix )
{
// FIXME: HAVE_CUDA is never defined in .cpp files
#ifdef HAVE_CUDA
   int cudaDevice = parameters.getParameter< int >( prefix + "cuda-device" );
   if( cudaSetDevice( cudaDevice ) != cudaSuccess )
   {
      std::cerr << "I cannot activate CUDA device number " << cudaDevice << "." << std::endl;
      return false;
   }
   smartPointersSynchronizationTimer.reset();
   smartPointersSynchronizationTimer.stop();
#endif
   return true;
}

void Cuda::insertSmartPointer( SmartPointer* pointer )
{
   smartPointersRegister.insert( pointer, Devices::CudaDeviceInfo::getActiveDevice() );
+30 −0
Original line number Diff line number Diff line
@@ -14,6 +14,8 @@
#include <TNL/Exceptions/CudaBadAlloc.h>
#include <TNL/Exceptions/CudaSupportMissing.h>
#include <TNL/CudaSharedMemory.h>
#include <TNL/Config/ConfigDescription.h>
#include <TNL/Config/ParameterContainer.h>

namespace TNL {
namespace Devices {
@@ -160,6 +162,34 @@ __device__ Element* Cuda::getSharedMemory()
   return CudaSharedMemory< Element >();
}

inline void
Cuda::configSetup( Config::ConfigDescription& config,
                   const String& prefix )
{
#ifdef HAVE_CUDA
   config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device to run the computation.", 0 );
#else
   config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device to run the computation (not supported on this system).", 0 );
#endif
}

inline bool
Cuda::setup( const Config::ParameterContainer& parameters,
             const String& prefix )
{
#ifdef HAVE_CUDA
   int cudaDevice = parameters.getParameter< int >( prefix + "cuda-device" );
   if( cudaSetDevice( cudaDevice ) != cudaSuccess )
   {
      std::cerr << "I cannot activate CUDA device number " << cudaDevice << "." << std::endl;
      return false;
   }
   smartPointersSynchronizationTimer.reset();
   smartPointersSynchronizationTimer.stop();
#endif
   return true;
}

// double-precision atomicAdd function for Maxwell and older GPUs
// copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions
#if __CUDA_ARCH__ < 600