Commit 0447fb81 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Split the __cuda_callable__ macro into separate header

parent 864969d9
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
set (headers Cuda.h
             Cuda_impl.h
             CudaCallable.h
             CudaDeviceInfo.h
             Host.h )

+2 −7
Original line number Diff line number Diff line
/***************************************************************************
                          Devices::Cuda.h  -  description
                          Cuda.h  -  description
                             -------------------
    begin                : Nov 7, 2012
    copyright            : (C) 2012 by Tomas Oberhuber
@@ -16,6 +16,7 @@
#include <TNL/Assert.h>
#include <TNL/SmartPointersRegister.h>
#include <TNL/Timer.h>
#include <TNL/Devices/CudaCallable.h>

namespace TNL {

@@ -26,12 +27,6 @@ namespace Config {

namespace Devices {

#ifdef HAVE_CUDA
#define __cuda_callable__ __device__ __host__
#else
#define __cuda_callable__
#endif

class Cuda
{
   public:
+23 −0
Original line number Diff line number Diff line
/***************************************************************************
                          CudaCallable.h  -  description
                             -------------------
    begin                : Jun 20, 2017
    copyright            : (C) 2017 by Tomas Oberhuber et al.
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

/* See Copyright Notice in tnl/Copyright */

#pragma once

// The __cuda_callable__ macro has to be in a separate header file to avoid
// infinite loops by the #include directives.
//
// For example, the implementation of Devices::Cuda needs TNL_ASSERT_*
// macros, which need __cuda_callable__ functions.

#ifdef HAVE_CUDA
#define __cuda_callable__ __device__ __host__
#else
#define __cuda_callable__
#endif
+6 −8
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@

#include <TNL/Devices/Cuda.h>
#include <TNL/Exceptions/CudaBadAlloc.h>
#include <TNL/Exceptions/CudaSupportMissing.h>

namespace TNL {
namespace Devices {   
@@ -69,7 +70,6 @@ __device__ inline int Cuda::getGlobalThreadIdx_z( const dim3& gridIdx )
{
   return ( gridIdx.z * getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z;
}

#endif


@@ -93,8 +93,7 @@ ObjectType* Cuda::passToDevice( const ObjectType& object )
   }
   return deviceObject;
#else
   TNL_ASSERT( false, std::cerr << "CUDA support is missing." );
   return 0;
   throw Exceptions::CudaSupportMissing();
#endif
}

@@ -110,8 +109,7 @@ ObjectType Cuda::passFromDevice( const ObjectType* object )
   TNL_CHECK_CUDA_DEVICE;
   return aux;
#else
   TNL_ASSERT( false, std::cerr << "CUDA support is missing." );
   return 0;
   throw Exceptions::CudaSupportMissing();
#endif
}

@@ -126,7 +124,7 @@ void Cuda::passFromDevice( const ObjectType* deviceObject,
               cudaMemcpyDeviceToHost );
   TNL_CHECK_CUDA_DEVICE;
#else
   TNL_ASSERT( false, std::cerr << "CUDA support is missing." );
   throw Exceptions::CudaSupportMissing();
#endif
}

@@ -148,7 +146,7 @@ void Cuda::freeFromDevice( ObjectType* deviceObject )
   cudaFree( ( void* ) deviceObject );
   TNL_CHECK_CUDA_DEVICE;
#else
   TNL_ASSERT( false, std::cerr << "CUDA support is missing." );
   throw Exceptions::CudaSupportMissing();
#endif
}

+2 −1
Original line number Diff line number Diff line
@@ -12,8 +12,9 @@

#include <cmath>
#include <type_traits>
#include <algorithm>

#include <TNL/Devices/Cuda.h>
#include <TNL/Devices/CudaCallable.h>

#ifdef HAVE_CUDA
#include <cuda.h>