Commit 1743358a authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Moved synchronization of smart pointers from Devices::Cuda into TNL::Pointers...

Moved synchronization of smart pointers from Devices::Cuda into TNL::Pointers namespace as free functions

synchronizeDevice() was renamed to synchronizeSmartPointersOnDevice()
for clarity - there are many similarly named functions in CUDA (e.g.
cudaDeviceSynchronize()).
parent 2d5176fb
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -490,7 +490,7 @@ getExplicitUpdate( const RealType& time,
         
         //std::cerr << "Setting boundary conditions..." << std::endl;

         Devices::Cuda::synchronizeDevice();
         Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
         for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ )
            for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
               boundaryConditionsTemplatedCompact< MeshType, CellType, BoundaryCondition, MeshFunctionType >
@@ -594,7 +594,7 @@ getExplicitUpdate( const RealType& time,
                               gridYSize / 16 + ( gridYSize % 16 != 0 ) );
            */

            TNL::Devices::Cuda::synchronizeDevice();
            Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
            int cudaErr;
            Meshes::Traverser< MeshType, Cell > meshTraverser;
            meshTraverser.template processInteriorEntities< UserData,
+2 −2
Original line number Diff line number Diff line
@@ -246,7 +246,7 @@ processEntities(
      IndexType cudaThreadsCount = 2 * ( end.x() - begin.x() + end.y() - begin.y() + 1 );
      Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, cudaThreadsCount );
      dim3 gridIdx, cudaGridSize;
      Devices::Cuda::synchronizeDevice();
      Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
      for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x++ )
      {
         Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
@@ -273,7 +273,7 @@ processEntities(
      auto& pool = Cuda::StreamPool::getInstance();
      const cudaStream_t& s = pool.getStream( stream );

      Devices::Cuda::synchronizeDevice();
      Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
      dim3 gridIdx, cudaGridSize;
      for( gridIdx.y = 0; gridIdx.y < cudaGridsCount.y; gridIdx.y ++ )
         for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x ++ )
+1 −1
Original line number Diff line number Diff line
@@ -130,7 +130,7 @@ benchmarkIterativeSolvers( Benchmark& benchmark,
   *cudaMatrixPointer = *matrixPointer;

   // synchronize shared pointers
   Devices::Cuda::synchronizeDevice();
   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
#endif

   using namespace Solvers::Linear;
+0 −21
Original line number Diff line number Diff line
@@ -10,13 +10,7 @@

#pragma once

#include <iostream>

#include <TNL/String.h>
#include <TNL/Assert.h>
#include <TNL/Pointers/SmartPointersRegister.h>
#include <TNL/Timer.h>
#include <TNL/Cuda/CudaCallable.h>
#include <TNL/Config/ConfigDescription.h>
#include <TNL/Config/ParameterContainer.h>

@@ -33,16 +27,6 @@ public:

   static inline constexpr int getGPUTransferBufferSize();

   static inline void insertSmartPointer( Pointers::SmartPointer* pointer );

   static inline void removeSmartPointer( Pointers::SmartPointer* pointer );

   // Negative deviceId means that CudaDeviceInfo::getActiveDevice will be
   // called to get the device ID.
   static inline bool synchronizeDevice( int deviceId = -1 );

   static inline Timer& getSmartPointersSynchronizationTimer();

   ////
   // When we transfer data between the GPU and the CPU we use 5 MB buffer. This
   // size should ensure good performance -- see.
@@ -50,11 +34,6 @@ public:
   // We use the same buffer size even for retyping data during IO operations.
   //
   static constexpr std::size_t TransferBufferSize = 5 * 2<<20;


   protected:

   static inline Pointers::SmartPointersRegister& getSmartPointersRegister();
};

#ifdef HAVE_CUDA
+5 −38
Original line number Diff line number Diff line
@@ -10,12 +10,15 @@

#pragma once

#include <iostream>

#include <TNL/Math.h>
#include <TNL/Devices/Cuda.h>
#include <TNL/Cuda/DeviceInfo.h>
#include <TNL/Exceptions/CudaBadAlloc.h>
#include <TNL/Exceptions/CudaSupportMissing.h>
#include <TNL/Exceptions/CudaRuntimeError.h>
#include <TNL/Pointers/SmartPointersRegister.h>

namespace TNL {
namespace Devices {
@@ -42,8 +45,8 @@ Cuda::setup( const Config::ParameterContainer& parameters,
      std::cerr << "I cannot activate CUDA device number " << cudaDevice << "." << std::endl;
      return false;
   }
   getSmartPointersSynchronizationTimer().reset();
   getSmartPointersSynchronizationTimer().stop();
   Pointers::getSmartPointersSynchronizationTimer< Devices::Cuda >().reset();
   Pointers::getSmartPointersSynchronizationTimer< Devices::Cuda >().stop();
#endif
   return true;
}
@@ -53,42 +56,6 @@ inline constexpr int Cuda::getGPUTransferBufferSize()
   return 1 << 20;
}

inline void Cuda::insertSmartPointer( Pointers::SmartPointer* pointer )
{
   getSmartPointersRegister().insert( pointer, TNL::Cuda::DeviceInfo::getActiveDevice() );
}

inline void Cuda::removeSmartPointer( Pointers::SmartPointer* pointer )
{
   getSmartPointersRegister().remove( pointer, TNL::Cuda::DeviceInfo::getActiveDevice() );
}

inline bool Cuda::synchronizeDevice( int deviceId )
{
#ifdef HAVE_CUDA
   if( deviceId < 0 )
      deviceId = TNL::Cuda::DeviceInfo::getActiveDevice();
   getSmartPointersSynchronizationTimer().start();
   bool b = getSmartPointersRegister().synchronizeDevice( deviceId );
   getSmartPointersSynchronizationTimer().stop();
   return b;
#else
   return true;
#endif
}

inline Timer& Cuda::getSmartPointersSynchronizationTimer()
{
   static Timer timer;
   return timer;
}

inline Pointers::SmartPointersRegister& Cuda::getSmartPointersRegister()
{
   static Pointers::SmartPointersRegister reg;
   return reg;
}

// double-precision atomicAdd function for Maxwell and older GPUs
// copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions
#ifdef HAVE_CUDA
Loading