diff --git a/src/TNL/Allocators/Cuda.h b/src/TNL/Allocators/Cuda.h new file mode 100644 index 0000000000000000000000000000000000000000..74ebb840432136d9033a17a86684607098a80d86 --- /dev/null +++ b/src/TNL/Allocators/Cuda.h @@ -0,0 +1,99 @@ +/*************************************************************************** + Cuda.h - description + ------------------- + begin : Apr 8, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +// Implemented by: Jakub Klinkovsky + +#pragma once + +#include <TNL/Devices/Cuda.h> + +namespace TNL { +namespace Allocators { + +/** + * \brief Allocator for the CUDA device memory space. + * + * The allocation is done using the `cudaMalloc` function and the deallocation + * is done using the `cudaFree` function. + */ +template< class T > +struct Cuda +{ + using value_type = T; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + + Cuda() = default; + Cuda( const Cuda& ) = default; + Cuda( Cuda&& ) = default; + + Cuda& operator=( const Cuda& ) = default; + Cuda& operator=( Cuda&& ) = default; + + template< class U > + Cuda( const Cuda< U >& ) + {} + + template< class U > + Cuda( Cuda< U >&& ) + {} + + template< class U > + Cuda& operator=( const Cuda< U >& ) + { + return *this; + } + + template< class U > + Cuda& operator=( Cuda< U >&& ) + { + return *this; + } + + value_type* allocate( size_type n ) + { +#ifdef HAVE_CUDA + TNL_CHECK_CUDA_DEVICE; + value_type* result = nullptr; + if( cudaMalloc( (void**) &result, n * sizeof(value_type) ) != cudaSuccess ) + throw Exceptions::CudaBadAlloc(); + TNL_CHECK_CUDA_DEVICE; + return result; +#else + throw Exceptions::CudaSupportMissing(); +#endif + } + + void deallocate(value_type* ptr, size_type) + { +#ifdef HAVE_CUDA + TNL_CHECK_CUDA_DEVICE; + cudaFree( ptr ); + TNL_CHECK_CUDA_DEVICE; +#else + throw Exceptions::CudaSupportMissing(); +#endif + } +}; + +template<class T1, class T2> +bool operator==(const Cuda<T1>&, const Cuda<T2>&) +{ + return true; +} + +template<class T1, class T2> +bool operator!=(const Cuda<T1>& lhs, const Cuda<T2>& rhs) +{ + return !(lhs == rhs); +} + +} // namespace Allocators +} // namespace TNL diff --git a/src/TNL/Allocators/CudaHost.h b/src/TNL/Allocators/CudaHost.h new file mode 100644 index 0000000000000000000000000000000000000000..284c91fe9b8dbc7abe8e3d4685ef1d7551d19a89 --- /dev/null +++ b/src/TNL/Allocators/CudaHost.h @@ -0,0 +1,106 @@ +/*************************************************************************** + CudaHost.h - description + ------------------- + begin : Apr 8, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +// Implemented by: Jakub Klinkovsky + +#pragma once + +#include <TNL/Devices/Cuda.h> + +namespace TNL { +namespace Allocators { + +/** + * \brief Allocator for page-locked memory on the host. + * + * The allocation is done using the `cudaMallocHost` function and the + * deallocation is done using the `cudaFreeHost` function. + */ +template< class T > +struct CudaHost +{ + using value_type = T; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + + CudaHost() = default; + CudaHost( const CudaHost& ) = default; + CudaHost( CudaHost&& ) = default; + + CudaHost& operator=( const CudaHost& ) = default; + CudaHost& operator=( CudaHost&& ) = default; + + template< class U > + CudaHost( const CudaHost< U >& ) + {} + + template< class U > + CudaHost( CudaHost< U >&& ) + {} + + template< class U > + CudaHost& operator=( const CudaHost< U >& ) + { + return *this; + } + + template< class U > + CudaHost& operator=( CudaHost< U >&& ) + { + return *this; + } + + value_type* allocate( size_type n ) + { +#ifdef HAVE_CUDA + TNL_CHECK_CUDA_DEVICE; + value_type* result = nullptr; + // cudaHostAllocPortable - The memory returned by this call will be considered as pinned memory by all + // CUDA contexts, not just the one that performed the allocation. + // cudaHostAllocMapped - Maps the allocation into the CUDA address space. + // Also note that we assume that the cudaDevAttrCanUseHostPointerForRegisteredMem attribute is non-zero + // on all devices visible to the application, in which case the pointer returned by cudaMallocHost can + // be used directly by all devices without having to call cudaHostGetDevicePointer. See the reference: + // https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1gc00502b44e5f1bdc0b424487ebb08db0 + if( cudaMallocHost( (void**) &result, n * sizeof(value_type), cudaHostAllocPortable | cudaHostAllocMapped ) != cudaSuccess ) + throw Exceptions::CudaBadAlloc(); + TNL_CHECK_CUDA_DEVICE; + return result; +#else + throw Exceptions::CudaSupportMissing(); +#endif + } + + void deallocate(value_type* ptr, size_type) + { +#ifdef HAVE_CUDA + TNL_CHECK_CUDA_DEVICE; + cudaFreeHost( ptr ); + TNL_CHECK_CUDA_DEVICE; +#else + throw Exceptions::CudaSupportMissing(); +#endif + } +}; + +template<class T1, class T2> +bool operator==(const CudaHost<T1>&, const CudaHost<T2>&) +{ + return true; +} + +template<class T1, class T2> +bool operator!=(const CudaHost<T1>& lhs, const CudaHost<T2>& rhs) +{ + return !(lhs == rhs); +} + +} // namespace Allocators +} // namespace TNL diff --git a/src/TNL/Allocators/CudaManaged.h b/src/TNL/Allocators/CudaManaged.h new file mode 100644 index 0000000000000000000000000000000000000000..db29f86cb618bf79e4f1c0fa0ac1ad2750d476bc --- /dev/null +++ b/src/TNL/Allocators/CudaManaged.h @@ -0,0 +1,101 @@ +/*************************************************************************** + CudaManaged.h - description + ------------------- + begin : Apr 8, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +// Implemented by: Jakub Klinkovsky + +#pragma once + +#include <TNL/Devices/Cuda.h> + +namespace TNL { +namespace Allocators { + +/** + * \brief Allocator for the CUDA Unified Memory system. + * + * The memory allocated by this allocator will be automatically managed by the + * CUDA Unified Memory system. The allocation is done using the + * `cudaMallocManaged` function and the deallocation is done using the + * `cudaFree` function. + */ +template< class T > +struct CudaManaged +{ + using value_type = T; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + + CudaManaged() = default; + CudaManaged( const CudaManaged& ) = default; + CudaManaged( CudaManaged&& ) = default; + + CudaManaged& operator=( const CudaManaged& ) = default; + CudaManaged& operator=( CudaManaged&& ) = default; + + template< class U > + CudaManaged( const CudaManaged< U >& ) + {} + + template< class U > + CudaManaged( CudaManaged< U >&& ) + {} + + template< class U > + CudaManaged& operator=( const CudaManaged< U >& ) + { + return *this; + } + + template< class U > + CudaManaged& operator=( CudaManaged< U >&& ) + { + return *this; + } + + value_type* allocate( size_type n ) + { +#ifdef HAVE_CUDA + TNL_CHECK_CUDA_DEVICE; + value_type* result = nullptr; + if( cudaMallocManaged( &result, n * sizeof(value_type) ) != cudaSuccess ) + throw Exceptions::CudaBadAlloc(); + TNL_CHECK_CUDA_DEVICE; + return result; +#else + throw Exceptions::CudaSupportMissing(); +#endif + } + + void deallocate(value_type* ptr, size_type) + { +#ifdef HAVE_CUDA + TNL_CHECK_CUDA_DEVICE; + cudaFree( ptr ); + TNL_CHECK_CUDA_DEVICE; +#else + throw Exceptions::CudaSupportMissing(); +#endif + } +}; + +template<class T1, class T2> +bool operator==(const CudaManaged<T1>&, const CudaManaged<T2>&) +{ + return true; +} + +template<class T1, class T2> +bool operator!=(const CudaManaged<T1>& lhs, const CudaManaged<T2>& rhs) +{ + return !(lhs == rhs); +} + +} // namespace Allocators +} // namespace TNL diff --git a/src/TNL/Allocators/Default.h b/src/TNL/Allocators/Default.h new file mode 100644 index 0000000000000000000000000000000000000000..6906a905c3a82d3e2400c4ba6a767848bf1be061 --- /dev/null +++ b/src/TNL/Allocators/Default.h @@ -0,0 +1,57 @@ +/*************************************************************************** + Default.h - description + ------------------- + begin : Jul 2, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +// Implemented by: Jakub Klinkovsky + +#pragma once + +#include <TNL/Allocators/Host.h> +#include <TNL/Allocators/Cuda.h> +#include <TNL/Allocators/MIC.h> +#include <TNL/Devices/Host.h> +#include <TNL/Devices/Cuda.h> +#include <TNL/Devices/MIC.h> + +namespace TNL { +namespace Allocators { + +/** + * \brief A trait-like class used for the selection of a default allocators for + * given device. + */ +template< typename Device > +struct Default; + +//! Sets \ref Allocators::Host as the default allocator for \ref Devices::Host. +template<> +struct Default< Devices::Host > +{ + template< typename T > + using Allocator = Allocators::Host< T >; +}; + +//! Sets \ref Allocators::Cuda as the default allocator for \ref Devices::Cuda. +template<> +struct Default< Devices::Cuda > +{ + template< typename T > + using Allocator = Allocators::Cuda< T >; +}; + +//! Sets \ref Allocators::MIC as the default allocator for \ref Devices::MIC. +template<> +struct Default< Devices::MIC > +{ + template< typename T > + using Allocator = Allocators::MIC< T >; +}; + +} // namespace Allocators +} // namespace TNL diff --git a/src/TNL/Allocators/Host.h b/src/TNL/Allocators/Host.h new file mode 100644 index 0000000000000000000000000000000000000000..65c0b18c11c11eb97bbb91de9619ecaa6d976b18 --- /dev/null +++ b/src/TNL/Allocators/Host.h @@ -0,0 +1,35 @@ +/*************************************************************************** + Host.h - description + ------------------- + begin : Apr 8, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +// Implemented by: Jakub Klinkovsky + +#pragma once + +#include <memory> + +namespace TNL { + +/** + * \brief Namespace for TNL allocators. + * + * All TNL allocators must satisfy the requirements imposed by the + * [Allocator concept](https://en.cppreference.com/w/cpp/named_req/Allocator) + * from STL. + */ +namespace Allocators { + +/** + * \brief Allocator for the host memory space -- alias for \ref std::allocator. + */ +template< class T > +using Host = std::allocator< T >; + +} // namespace Allocators +} // namespace TNL diff --git a/src/TNL/Allocators/MIC.h b/src/TNL/Allocators/MIC.h new file mode 100644 index 0000000000000000000000000000000000000000..c3599f449cd85f9f83c0ef0e5974bb015d04a6ef --- /dev/null +++ b/src/TNL/Allocators/MIC.h @@ -0,0 +1,100 @@ +/*************************************************************************** + MIC.h - description + ------------------- + begin : Jul 2, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +// Implemented by: Jakub Klinkovsky + +#pragma once + +#include <TNL/Devices/MIC.h> +#include <TNL/Exceptions/MICSupportMissing.h> + +namespace TNL { +namespace Allocators { + +/** + * \brief Allocator for the MIC device memory space. + */ +template< class T > +struct MIC +{ + using value_type = T; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + + MIC() = default; + MIC( const MIC& ) = default; + MIC( MIC&& ) = default; + + MIC& operator=( const MIC& ) = default; + MIC& operator=( MIC&& ) = default; + + template< class U > + MIC( const MIC< U >& ) + {} + + template< class U > + MIC( MIC< U >&& ) + {} + + template< class U > + MIC& operator=( const MIC< U >& ) + { + return *this; + } + + template< class U > + MIC& operator=( MIC< U >&& ) + { + return *this; + } + + value_type* allocate( size_type size ) + { +#ifdef HAVE_MIC + Devices::MICHider<void> hide_ptr; + #pragma offload target(mic) out(hide_ptr) in(size) + { + hide_ptr.pointer = malloc(size * sizeof(value_type)); + } + return hide_ptr.pointer; +#else + throw Exceptions::MICSupportMissing(); +#endif + } + + void deallocate(value_type* ptr, size_type) + { +#ifdef HAVE_MIC + Devices::MICHider<void> hide_ptr; + hide_ptr.pointer=ptr; + #pragma offload target(mic) in(hide_ptr) + { + free(hide_ptr.pointer); + } +#else + throw Exceptions::MICSupportMissing(); +#endif + } +}; + +template<class T1, class T2> +bool operator==(const MIC<T1>&, const MIC<T2>&) +{ + return true; +} + +template<class T1, class T2> +bool operator!=(const MIC<T1>& lhs, const MIC<T2>& rhs) +{ + return !(lhs == rhs); +} + +} // namespace Allocators +} // namespace TNL