Split the __cuda_callable__ macro into separate header (0447fb81) · Commits · TNL / tnl-dev

src/TNL/Devices/CMakeLists.txt

+1 −0

+2 −7

Original line number	Diff line number	Diff line
		/***************************************************************************
		Devices::Cuda.h - description
		Cuda.h - description
		-------------------
		begin : Nov 7, 2012
		copyright : (C) 2012 by Tomas Oberhuber
		@@ -16,6 +16,7 @@
		#include <TNL/Assert.h>
		#include <TNL/SmartPointersRegister.h>
		#include <TNL/Timer.h>
		#include <TNL/Devices/CudaCallable.h>

		namespace TNL {

		@@ -26,12 +27,6 @@ namespace Config {

		namespace Devices {

		#ifdef HAVE_CUDA
		#define __cuda_callable__ __device__ __host__
		#else
		#define __cuda_callable__
		#endif

		class Cuda
		{
		public:

0 → 100644

+23 −0

Original line number	Diff line number	Diff line
		/***************************************************************************
		CudaCallable.h - description
		-------------------
		begin : Jun 20, 2017
		copyright : (C) 2017 by Tomas Oberhuber et al.
		email : tomas.oberhuber@fjfi.cvut.cz
		***************************************************************************/

		/* See Copyright Notice in tnl/Copyright */

		#pragma once

		// The __cuda_callable__ macro has to be in a separate header file to avoid
		// infinite loops by the #include directives.
		//
		// For example, the implementation of Devices::Cuda needs TNL_ASSERT_*
		// macros, which need __cuda_callable__ functions.

		#ifdef HAVE_CUDA
		#define __cuda_callable__ __device__ __host__
		#else
		#define __cuda_callable__
		#endif

+6 −8

Original line number	Diff line number	Diff line
		@@ -12,6 +12,7 @@

		#include <TNL/Devices/Cuda.h>
		#include <TNL/Exceptions/CudaBadAlloc.h>
		#include <TNL/Exceptions/CudaSupportMissing.h>

		namespace TNL {
		namespace Devices {
		@@ -69,7 +70,6 @@ __device__ inline int Cuda::getGlobalThreadIdx_z( const dim3& gridIdx )
		{
		return ( gridIdx.z * getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z;
		}

		#endif


		@@ -93,8 +93,7 @@ ObjectType* Cuda::passToDevice( const ObjectType& object )
		}
		return deviceObject;
		#else
		TNL_ASSERT( false, std::cerr << "CUDA support is missing." );
		return 0;
		throw Exceptions::CudaSupportMissing();
		#endif
		}

		@@ -110,8 +109,7 @@ ObjectType Cuda::passFromDevice( const ObjectType* object )
		TNL_CHECK_CUDA_DEVICE;
		return aux;
		#else
		TNL_ASSERT( false, std::cerr << "CUDA support is missing." );
		return 0;
		throw Exceptions::CudaSupportMissing();
		#endif
		}

		@@ -126,7 +124,7 @@ void Cuda::passFromDevice( const ObjectType* deviceObject,
		cudaMemcpyDeviceToHost );
		TNL_CHECK_CUDA_DEVICE;
		#else
		TNL_ASSERT( false, std::cerr << "CUDA support is missing." );
		throw Exceptions::CudaSupportMissing();
		#endif
		}

		@@ -148,7 +146,7 @@ void Cuda::freeFromDevice( ObjectType* deviceObject )
		cudaFree( ( void* ) deviceObject );
		TNL_CHECK_CUDA_DEVICE;
		#else
		TNL_ASSERT( false, std::cerr << "CUDA support is missing." );
		throw Exceptions::CudaSupportMissing();
		#endif
		}

+2 −1

Original line number	Diff line number	Diff line
		@@ -12,8 +12,9 @@

		#include <cmath>
		#include <type_traits>
		#include <algorithm>

		#include <TNL/Devices/Cuda.h>
		#include <TNL/Devices/CudaCallable.h>

		#ifdef HAVE_CUDA
		#include <cuda.h>