diff --git a/Documentation/Examples/Pointers/DevicePointerExample.cpp b/Documentation/Examples/Pointers/DevicePointerExample.cpp index 144ae98b0e57a4aab79eb6d3e4aa20135dc12ca7..897b92962ab8e9dd65aa973cd207708287327fed 100644 --- a/Documentation/Examples/Pointers/DevicePointerExample.cpp +++ b/Documentation/Examples/Pointers/DevicePointerExample.cpp @@ -15,6 +15,7 @@ struct Tuple Pointers::DevicePointer< ArrayCuda > a1, a2; }; +#ifdef HAVE_CUDA __global__ void printTuple( const Tuple t ) { printf( "Tuple size is: %d\n", t.a1->getSize() ); @@ -24,6 +25,7 @@ __global__ void printTuple( const Tuple t ) printf( "a2[ %d ] = %d \n", i, ( *t.a2 )[ i ] ); } } +#endif int main( int argc, char* argv[] ) { diff --git a/Documentation/Examples/Pointers/SharedPointerExample.cpp b/Documentation/Examples/Pointers/SharedPointerExample.cpp index 287aae8e8dd2f9faf3c2ebeb86670f5e77f489a0..be149518cc04e1c230397015d6733970527293d9 100644 --- a/Documentation/Examples/Pointers/SharedPointerExample.cpp +++ b/Documentation/Examples/Pointers/SharedPointerExample.cpp @@ -21,6 +21,7 @@ struct Tuple Pointers::SharedPointer< ArrayCuda > a1, a2; }; +#ifdef HAVE_CUDA __global__ void printTuple( const Tuple t ) { printf( "Tuple size is: %d\n", t.a1->getSize() ); @@ -30,6 +31,7 @@ __global__ void printTuple( const Tuple t ) printf( "a2[ %d ] = %d \n", i, ( *t.a2 )[ i ] ); } } +#endif int main( int argc, char* argv[] ) { diff --git a/src/TNL/Pointers/SharedPointer.h b/src/TNL/Pointers/SharedPointer.h index 93f63f807c5038795c53cc0c5182571ab2d8a9c4..293434ccd2ea589b48f574e994caabfcfc7d99fd 100644 --- a/src/TNL/Pointers/SharedPointer.h +++ b/src/TNL/Pointers/SharedPointer.h @@ -22,6 +22,32 @@ namespace TNL { namespace Pointers { +/** + * \brief Cross-device shared smart pointer. + * + * This smart pointer is inspired by std::shared_ptr from STL library. It means + * that the object owned by the smart pointer can be shared with other + * smart pointers. One can make a copy of this smart pointer. In addition, + * the smart pointer is able to work across different devices which means that the + * object owned by the smart pointer is mirrored on both host and device. + * + * **NOTE: When using smart pointers to pass objects on GPU, one must call + * \ref Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >() + * before calling a CUDA kernel working with smart pointers.** + * + * \tparam Object is a type of object to be owned by the pointer. + * \tparam Device is device where the object is to be allocated. The object is + * always allocated on the host system as well for easier object manipulation. + * + * See also \ref UniquePointer and \ref DevicePointer. + * + * See also \ref SharedPointer< Object, Devices::Host > and \ref SharedPointer< Object, Devices::Cuda >. + * + * \par Example + * \include Pointers/SharedPointerExample.cpp + * \par Output + * \include SharedPointerExample.out + */ template< typename Object, typename Device = typename Object::DeviceType > class SharedPointer diff --git a/src/TNL/Pointers/SharedPointerCuda.h b/src/TNL/Pointers/SharedPointerCuda.h index 510f172d8ef2e77c2090ce38bd7a2532bb1f6a59..81951a5e90ff6ce0d32c9b6b9ccffd12ca55dcfd 100644 --- a/src/TNL/Pointers/SharedPointerCuda.h +++ b/src/TNL/Pointers/SharedPointerCuda.h @@ -28,15 +28,25 @@ namespace Pointers { //#define HAVE_CUDA_UNIFIED_MEMORY -#ifdef HAVE_CUDA_UNIFIED_MEMORY +#if ! defined HAVE_CUDA_UNIFIED_MEMORY + +/** + * \brief Specialization of the UniquePointer for the CUDA device. + * + * \tparam Object is a type of object to be owned by the pointer. + */ template< typename Object > class SharedPointer< Object, Devices::Cuda > : public SmartPointer { private: - // Convenient template alias for controlling the selection of copy- and - // move-constructors and assignment operators using SFINAE. - // The type Object_ is "enabled" iff Object_ and Object are not the same, - // but after removing const and volatile qualifiers they are the same. + /** + * \typedef Enabler + * + * Convenient template alias for controlling the selection of copy- and + * move-constructors and assignment operators using SFINAE. + * The type Object_ is "enabled" iff Object_ and Object are not the same, + * but after removing const and volatile qualifiers they are the same. + */ template< typename Object_ > using Enabler = std::enable_if< ! std::is_same< Object_, Object >::value && std::is_same< typename std::remove_cv< Object >::type, Object_ >::value >; @@ -47,71 +57,129 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer public: + /** + * \typedef ObjectType is the type of object owned by the pointer. + */ using ObjectType = Object; - using DeviceType = Devices::Cuda; + /** + * \typedef DeviceType is the type of device where the object is to be + * mirrored. + */ + using DeviceType = Devices::Cuda; + + /** + * \brief Constructor of empty pointer. + */ SharedPointer( std::nullptr_t ) - : pd( nullptr ) + : pd( nullptr ), + cuda_pointer( nullptr ) {} + /** + * \brief Constructor with parameters of the Object constructor. + * + * \tparam Args is variadic template type of arguments of the Object constructor. + * \tparam args are arguments passed to the Object constructor. + */ template< typename... Args > explicit SharedPointer( Args... args ) - : pd( nullptr ) + : pd( nullptr ), + cuda_pointer( nullptr ) { -#ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Creating shared pointer to " << getType< ObjectType >() << std::endl; -#endif this->allocate( args... ); } - // this is needed only to avoid the default compiler-generated constructor - SharedPointer( const SharedPointer& pointer ) - : pd( (PointerData*) pointer.pd ) + /** + * \brief Copy constructor. + * + * \param pointer is the source shared pointer. + */ + SharedPointer( const SharedPointer& pointer ) // this is needed only to avoid the default compiler-generated constructor + : pd( (PointerData*) pointer.pd ), + cuda_pointer( pointer.cuda_pointer ) { this->pd->counter += 1; } - // conditional constructor for non-const -> const data + /** + * \brief Copy constructor. + * + * This is specialization for compatible object types. + * + * See \ref Enabler. + * + * \param pointer is the source shared pointer. + */ template< typename Object_, typename = typename Enabler< Object_ >::type > - SharedPointer( const SharedPointer< Object_, DeviceType >& pointer ) - : pd( (PointerData*) pointer.pd ) + SharedPointer( const SharedPointer< Object_, DeviceType >& pointer ) // conditional constructor for non-const -> const data + : pd( (PointerData*) pointer.pd ), + cuda_pointer( pointer.cuda_pointer ) { this->pd->counter += 1; } - // this is needed only to avoid the default compiler-generated constructor - SharedPointer( SharedPointer&& pointer ) - : pd( (PointerData*) pointer.pd ) + /** + * \brief Move constructor. + * + * \param pointer is the source shared pointer. + */ + SharedPointer( SharedPointer&& pointer ) // this is needed only to avoid the default compiler-generated constructor + : pd( (PointerData*) pointer.pd ), + cuda_pointer( pointer.cuda_pointer ) { pointer.pd = nullptr; + pointer.cuda_pointer = nullptr; } - // conditional constructor for non-const -> const data + /** + * \brief Move constructor. + * + * This is specialization for compatible object types. + * + * See \ref Enabler. + * + * \param pointer is the source shared pointer. + */ template< typename Object_, typename = typename Enabler< Object_ >::type > - SharedPointer( SharedPointer< Object_, DeviceType >&& pointer ) - : pd( (PointerData*) pointer.pd ) + SharedPointer( SharedPointer< Object_, DeviceType >&& pointer ) // conditional constructor for non-const -> const data + : pd( (PointerData*) pointer.pd ), + cuda_pointer( pointer.cuda_pointer ) { pointer.pd = nullptr; + pointer.cuda_pointer = nullptr; } + /** + * \brief Create new object based in given constructor parameters. + * + * \tparam Args is variadic template type of arguments to be passed to the + * object constructor. + * \param args are arguments to be passed to the object constructor. + * \return true if recreation was successful, false otherwise. + */ template< typename... Args > bool recreate( Args... args ) { #ifdef TNL_DEBUG_SHARED_POINTERS std::cerr << "Recreating shared pointer to " << getType< ObjectType >() << std::endl; #endif - if( ! this->counter ) + if( ! this->pd ) return this->allocate( args... ); - if( *this->pd->counter == 1 ) + if( this->pd->counter == 1 ) { /**** * The object is not shared -> recreate it in-place, without reallocation */ - this->pd->data.~ObjectType(); - new ( this->pd->data ) ObjectType( args... ); + this->pd->data.~Object(); + new ( &this->pd->data ) Object( args... ); +#ifdef HAVE_CUDA + cudaMemcpy( (void*) this->cuda_pointer, (void*) &this->pd->data, sizeof( Object ), cudaMemcpyHostToDevice ); +#endif + this->set_last_sync_state(); return true; } @@ -121,167 +189,380 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer return this->allocate( args... ); } + /** + * \brief Arrow operator for accessing the object owned by constant smart pointer. + * + * \return constant pointer to the object owned by this smart pointer. It + * returns pointer to object image on the CUDA device if it is called from CUDA + * kernel and pointer to host image otherwise. + */ + __cuda_callable__ const Object* operator->() const { TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); +#ifdef __CUDA_ARCH__ + return this->cuda_pointer; +#else return &this->pd->data; +#endif } + /** + * \brief Arrow operator for accessing the object owned by non-constant smart pointer. + * + * \return pointer to the object owned by this smart pointer. It + * returns pointer to object image on the CUDA device if it is called from CUDA + * kernel and pointer to host image otherwise. + */ + __cuda_callable__ Object* operator->() { TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); +#ifdef __CUDA_ARCH__ + return this->cuda_pointer; +#else + this->pd->maybe_modified = true; return &this->pd->data; +#endif } + /** + * \brief Dereferencing operator for accessing the object owned by constant smart pointer. + * + * \return constant reference to the object owned by this smart pointer. It + * returns reference to object image on the CUDA device if it is called from CUDA + * kernel and reference to host image otherwise. + */ + __cuda_callable__ const Object& operator *() const { TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); +#ifdef __CUDA_ARCH__ + return *( this->cuda_pointer ); +#else return this->pd->data; +#endif } + /** + * \brief Dereferencing operator for accessing the object owned by non-constant smart pointer. + * + * \return reference to the object owned by this smart pointer. It + * returns reference to object image on the CUDA device if it is called from CUDA + * kernel and reference to host image otherwise. + */ + __cuda_callable__ Object& operator *() { TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); +#ifdef __CUDA_ARCH__ + return *( this->cuda_pointer ); +#else + this->pd->maybe_modified = true; return this->pd->data; +#endif } + /** + * \brief Conversion to boolean type. + * + * \return Returns true if the pointer is not empty, false otherwise. + */ __cuda_callable__ operator bool() const { return this->pd; } + /** + * \brief Negation operator. + * + * \return Returns false if the pointer is not empty, true otherwise. + */ __cuda_callable__ bool operator!() const { return ! this->pd; } + /** + * \brief Constant object reference getter. + * + * No synchronization of this pointer will be performed due to calling + * this method. + * + * \tparam Device says what image of the object one want to dereference. It + * can be either \ref DeviceType or Devices::Host. + * \return constant reference to the object image on given device. + */ template< typename Device = Devices::Host > __cuda_callable__ const Object& getData() const { + static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, "Only Devices::Host or Devices::Cuda devices are accepted here." ); TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); - return this->pd->data; + TNL_ASSERT_TRUE( this->cuda_pointer, "Attempt to dereference a null pointer" ); + if( std::is_same< Device, Devices::Host >::value ) + return this->pd->data; + if( std::is_same< Device, Devices::Cuda >::value ) + return *( this->cuda_pointer ); } + /** + * \brief Non-constant object reference getter. + * + * After calling this method, the object owned by the pointer might need + * to be synchronized. One should not forget to call + * \ref Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >() + * before calling CUDA kernel using object from this smart pointer. + * + * \tparam Device says what image of the object one want to dereference. It + * can be either \ref DeviceType or Devices::Host. + * \return constant reference to the object image on given device. + */ template< typename Device = Devices::Host > __cuda_callable__ Object& modifyData() { + static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, "Only Devices::Host or Devices::Cuda devices are accepted here." ); TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); - return this->pd->data; + TNL_ASSERT_TRUE( this->cuda_pointer, "Attempt to dereference a null pointer" ); + if( std::is_same< Device, Devices::Host >::value ) + { + this->pd->maybe_modified = true; + return this->pd->data; + } + if( std::is_same< Device, Devices::Cuda >::value ) + return *( this->cuda_pointer ); } - // this is needed only to avoid the default compiler-generated operator - const SharedPointer& operator=( const SharedPointer& ptr ) + /** + * \brief Assignment operator. + * + * It assigns object owned by the pointer \ref ptr to \ref this pointer. + * + * \param ptr input pointer + * \return constant reference to \ref this + */ + const SharedPointer& operator=( const SharedPointer& ptr ) // this is needed only to avoid the default compiler-generated operator { this->free(); this->pd = (PointerData*) ptr.pd; - if( this->pd != nullptr ) + this->cuda_pointer = ptr.cuda_pointer; + if( this->pd != nullptr ) this->pd->counter += 1; +#ifdef TNL_DEBUG_SHARED_POINTERS + std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl; +#endif return *this; } - // conditional operator for non-const -> const data + /** + * \brief Assignment operator for compatible object types. + * + * It assigns object owned by the pointer \ref ptr to \ref this pointer. + * + * See \ref Enabler. + * + * \param ptr input pointer + * \return constant reference to \ref this + */ template< typename Object_, typename = typename Enabler< Object_ >::type > - const SharedPointer& operator=( const SharedPointer< Object_, DeviceType >& ptr ) + const SharedPointer& operator=( const SharedPointer< Object_, DeviceType >& ptr ) // conditional operator for non-const -> const data { this->free(); this->pd = (PointerData*) ptr.pd; + this->cuda_pointer = ptr.cuda_pointer; if( this->pd != nullptr ) this->pd->counter += 1; +#ifdef TNL_DEBUG_SHARED_POINTERS + std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl; +#endif return *this; } - // this is needed only to avoid the default compiler-generated operator - const SharedPointer& operator=( SharedPointer&& ptr ) + /** + * \brief Move operator. + * + * It assigns object owned by the pointer \ref ptr to \ref this pointer. + * + * \param ptr input pointer + * \return constant reference to \ref this + */ + const SharedPointer& operator=( SharedPointer&& ptr ) // this is needed only to avoid the default compiler-generated operator { this->free(); this->pd = (PointerData*) ptr.pd; + this->cuda_pointer = ptr.cuda_pointer; ptr.pd = nullptr; + ptr.cuda_pointer = nullptr; +#ifdef TNL_DEBUG_SHARED_POINTERS + std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl; +#endif return *this; } - // conditional operator for non-const -> const data + /** + * \brief Move operator. + * + * It assigns object owned by the pointer \ref ptr to \ref this pointer. + * + * See \ref Enabler. + * + * \param ptr input pointer + * \return constant reference to \ref this + */ template< typename Object_, typename = typename Enabler< Object_ >::type > - const SharedPointer& operator=( SharedPointer< Object_, DeviceType >&& ptr ) + const SharedPointer& operator=( SharedPointer< Object_, DeviceType >&& ptr ) // conditional operator for non-const -> const data { this->free(); this->pd = (PointerData*) ptr.pd; + this->cuda_pointer = ptr.cuda_pointer; ptr.pd = nullptr; + ptr.cuda_pointer = nullptr; +#ifdef TNL_DEBUG_SHARED_POINTERS + std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl; +#endif return *this; } + /** + * \brief Cross-device pointer synchronization. + * + * For the smart pointers in the host, this method does nothing. + * + * \return true. + */ bool synchronize() { + if( ! this->pd ) + return true; +#ifdef HAVE_CUDA + if( this->modified() ) + { +#ifdef TNL_DEBUG_SHARED_POINTERS + std::cerr << "Synchronizing shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl; + std::cerr << " ( " << sizeof( Object ) << " bytes, CUDA adress " << this->cuda_pointer << " )" << std::endl; +#endif + TNL_ASSERT( this->cuda_pointer, ); + cudaMemcpy( (void*) this->cuda_pointer, (void*) &this->pd->data, sizeof( Object ), cudaMemcpyHostToDevice ); + TNL_CHECK_CUDA_DEVICE; + this->set_last_sync_state(); + return true; + } return true; +#else + return false; +#endif } + /** + * \brief Reset the pointer to empty state. + */ void clear() { this->free(); } + /** + * \brief Swap the owned object with another pointer. + * + * \param ptr2 the other shared pointer for swapping. + */ void swap( SharedPointer& ptr2 ) { std::swap( this->pd, ptr2.pd ); + std::swap( this->cuda_pointer, ptr2.cuda_pointer ); } + /** + * \brief Destructor. + */ ~SharedPointer() { this->free(); + getSmartPointersRegister< DeviceType >().remove( this ); } - protected: struct PointerData { Object data; + char data_image[ sizeof(Object) ]; int counter; + bool maybe_modified; template< typename... Args > explicit PointerData( Args... args ) : data( args... ), - counter( 1 ) + counter( 1 ), + maybe_modified( false ) {} }; template< typename... Args > bool allocate( Args... args ) { -#ifdef HAVE_CUDA - if( cudaMallocManaged( ( void** ) &this->pd, sizeof( PointerData ) != cudaSuccess ) ) - return false; - new ( this->pd ) PointerData( args... ); - return true; -#else - return false; + this->pd = new PointerData( args... ); + // pass to device + this->cuda_pointer = Cuda::passToDevice( this->pd->data ); + // set last-sync state + this->set_last_sync_state(); +#ifdef TNL_DEBUG_SHARED_POINTERS + std::cerr << "Created shared pointer to " << getType< ObjectType >() << " (cuda_pointer = " << this->cuda_pointer << ")" << std::endl; #endif + getSmartPointersRegister< DeviceType >().insert( this ); + return true; + } + + void set_last_sync_state() + { + TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); + std::memcpy( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( Object ) ); + this->pd->maybe_modified = false; + } + + bool modified() + { + TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); + // optimization: skip bitwise comparison if we're sure that the data is the same + if( ! this->pd->maybe_modified ) + return false; + return std::memcmp( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( Object ) ) != 0; } void free() { if( this->pd ) { +#ifdef TNL_DEBUG_SHARED_POINTERS + std::cerr << "Freeing shared pointer: counter = " << this->pd->counter << ", cuda_pointer = " << this->cuda_pointer << ", type: " << getType< ObjectType >() << std::endl; +#endif if( ! --this->pd->counter ) { -#ifdef HAVE_CUDA - cudaFree( this->pd ); -#endif + delete this->pd; this->pd = nullptr; + if( this->cuda_pointer ) + Cuda::freeFromDevice( this->cuda_pointer ); +#ifdef TNL_DEBUG_SHARED_POINTERS + std::cerr << "...deleted data." << std::endl; +#endif } } } PointerData* pd; -}; - -#else // HAVE_CUDA_UNIFIED_MEMORY + // cuda_pointer can't be part of PointerData structure, since we would be + // unable to dereference this-pd on the device + Object* cuda_pointer; +}; + +#else +// Implementation with CUDA unified memory. It is very slow, we keep it only for experimental reasons. template< typename Object > class SharedPointer< Object, Devices::Cuda > : public SmartPointer { @@ -304,22 +585,22 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer using DeviceType = Devices::Cuda; SharedPointer( std::nullptr_t ) - : pd( nullptr ), - cuda_pointer( nullptr ) + : pd( nullptr ) {} template< typename... Args > explicit SharedPointer( Args... args ) - : pd( nullptr ), - cuda_pointer( nullptr ) + : pd( nullptr ) { +#ifdef TNL_DEBUG_SHARED_POINTERS + std::cerr << "Creating shared pointer to " << getType< ObjectType >() << std::endl; +#endif this->allocate( args... ); } // this is needed only to avoid the default compiler-generated constructor SharedPointer( const SharedPointer& pointer ) - : pd( (PointerData*) pointer.pd ), - cuda_pointer( pointer.cuda_pointer ) + : pd( (PointerData*) pointer.pd ) { this->pd->counter += 1; } @@ -328,30 +609,25 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer template< typename Object_, typename = typename Enabler< Object_ >::type > SharedPointer( const SharedPointer< Object_, DeviceType >& pointer ) - : pd( (PointerData*) pointer.pd ), - cuda_pointer( pointer.cuda_pointer ) + : pd( (PointerData*) pointer.pd ) { this->pd->counter += 1; } // this is needed only to avoid the default compiler-generated constructor SharedPointer( SharedPointer&& pointer ) - : pd( (PointerData*) pointer.pd ), - cuda_pointer( pointer.cuda_pointer ) + : pd( (PointerData*) pointer.pd ) { pointer.pd = nullptr; - pointer.cuda_pointer = nullptr; } // conditional constructor for non-const -> const data template< typename Object_, typename = typename Enabler< Object_ >::type > SharedPointer( SharedPointer< Object_, DeviceType >&& pointer ) - : pd( (PointerData*) pointer.pd ), - cuda_pointer( pointer.cuda_pointer ) + : pd( (PointerData*) pointer.pd ) { pointer.pd = nullptr; - pointer.cuda_pointer = nullptr; } template< typename... Args > @@ -360,20 +636,16 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer #ifdef TNL_DEBUG_SHARED_POINTERS std::cerr << "Recreating shared pointer to " << getType< ObjectType >() << std::endl; #endif - if( ! this->pd ) + if( ! this->counter ) return this->allocate( args... ); - if( this->pd->counter == 1 ) + if( *this->pd->counter == 1 ) { /**** * The object is not shared -> recreate it in-place, without reallocation */ - this->pd->data.~Object(); - new ( &this->pd->data ) Object( args... ); -#ifdef HAVE_CUDA - cudaMemcpy( (void*) this->cuda_pointer, (void*) &this->pd->data, sizeof( Object ), cudaMemcpyHostToDevice ); -#endif - this->set_last_sync_state(); + this->pd->data.~ObjectType(); + new ( this->pd->data ) ObjectType( args... ); return true; } @@ -383,50 +655,28 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer return this->allocate( args... ); } - __cuda_callable__ const Object* operator->() const { TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); -#ifdef __CUDA_ARCH__ - return this->cuda_pointer; -#else return &this->pd->data; -#endif } - __cuda_callable__ Object* operator->() { TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); -#ifdef __CUDA_ARCH__ - return this->cuda_pointer; -#else - this->pd->maybe_modified = true; return &this->pd->data; -#endif } - __cuda_callable__ const Object& operator *() const { TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); -#ifdef __CUDA_ARCH__ - return *( this->cuda_pointer ); -#else return this->pd->data; -#endif } - __cuda_callable__ Object& operator *() { TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); -#ifdef __CUDA_ARCH__ - return *( this->cuda_pointer ); -#else - this->pd->maybe_modified = true; return this->pd->data; -#endif } __cuda_callable__ @@ -445,29 +695,16 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer __cuda_callable__ const Object& getData() const { - static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, "Only Devices::Host or Devices::Cuda devices are accepted here." ); TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); - TNL_ASSERT_TRUE( this->cuda_pointer, "Attempt to dereference a null pointer" ); - if( std::is_same< Device, Devices::Host >::value ) - return this->pd->data; - if( std::is_same< Device, Devices::Cuda >::value ) - return *( this->cuda_pointer ); + return this->pd->data; } template< typename Device = Devices::Host > __cuda_callable__ Object& modifyData() { - static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, "Only Devices::Host or Devices::Cuda devices are accepted here." ); TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); - TNL_ASSERT_TRUE( this->cuda_pointer, "Attempt to dereference a null pointer" ); - if( std::is_same< Device, Devices::Host >::value ) - { - this->pd->maybe_modified = true; - return this->pd->data; - } - if( std::is_same< Device, Devices::Cuda >::value ) - return *( this->cuda_pointer ); + return this->pd->data; } // this is needed only to avoid the default compiler-generated operator @@ -475,12 +712,8 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer { this->free(); this->pd = (PointerData*) ptr.pd; - this->cuda_pointer = ptr.cuda_pointer; - if( this->pd != nullptr ) + if( this->pd != nullptr ) this->pd->counter += 1; -#ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl; -#endif return *this; } @@ -491,12 +724,8 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer { this->free(); this->pd = (PointerData*) ptr.pd; - this->cuda_pointer = ptr.cuda_pointer; if( this->pd != nullptr ) this->pd->counter += 1; -#ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl; -#endif return *this; } @@ -505,12 +734,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer { this->free(); this->pd = (PointerData*) ptr.pd; - this->cuda_pointer = ptr.cuda_pointer; ptr.pd = nullptr; - ptr.cuda_pointer = nullptr; -#ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl; -#endif return *this; } @@ -521,36 +745,13 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer { this->free(); this->pd = (PointerData*) ptr.pd; - this->cuda_pointer = ptr.cuda_pointer; ptr.pd = nullptr; - ptr.cuda_pointer = nullptr; -#ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl; -#endif return *this; } bool synchronize() { - if( ! this->pd ) - return true; -#ifdef HAVE_CUDA - if( this->modified() ) - { -#ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Synchronizing shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl; - std::cerr << " ( " << sizeof( Object ) << " bytes, CUDA adress " << this->cuda_pointer << " )" << std::endl; -#endif - TNL_ASSERT( this->cuda_pointer, ); - cudaMemcpy( (void*) this->cuda_pointer, (void*) &this->pd->data, sizeof( Object ), cudaMemcpyHostToDevice ); - TNL_CHECK_CUDA_DEVICE; - this->set_last_sync_state(); - return true; - } return true; -#else - return false; -#endif } void clear() @@ -561,90 +762,59 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer void swap( SharedPointer& ptr2 ) { std::swap( this->pd, ptr2.pd ); - std::swap( this->cuda_pointer, ptr2.cuda_pointer ); } ~SharedPointer() { this->free(); - getSmartPointersRegister< DeviceType >().remove( this ); } + protected: struct PointerData { Object data; - char data_image[ sizeof(Object) ]; int counter; - bool maybe_modified; template< typename... Args > explicit PointerData( Args... args ) : data( args... ), - counter( 1 ), - maybe_modified( false ) + counter( 1 ) {} }; template< typename... Args > bool allocate( Args... args ) { - this->pd = new PointerData( args... ); - // pass to device - this->cuda_pointer = Cuda::passToDevice( this->pd->data ); - // set last-sync state - this->set_last_sync_state(); -#ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Created shared pointer to " << getType< ObjectType >() << " (cuda_pointer = " << this->cuda_pointer << ")" << std::endl; -#endif - getSmartPointersRegister< DeviceType >().insert( this ); - return true; - } - - void set_last_sync_state() - { - TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); - std::memcpy( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( Object ) ); - this->pd->maybe_modified = false; - } - - bool modified() - { - TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); - // optimization: skip bitwise comparison if we're sure that the data is the same - if( ! this->pd->maybe_modified ) +#ifdef HAVE_CUDA + if( cudaMallocManaged( ( void** ) &this->pd, sizeof( PointerData ) != cudaSuccess ) ) return false; - return std::memcmp( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( Object ) ) != 0; + new ( this->pd ) PointerData( args... ); + return true; +#else + return false; +#endif } void free() { if( this->pd ) { -#ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Freeing shared pointer: counter = " << this->pd->counter << ", cuda_pointer = " << this->cuda_pointer << ", type: " << getType< ObjectType >() << std::endl; -#endif if( ! --this->pd->counter ) { - delete this->pd; - this->pd = nullptr; - if( this->cuda_pointer ) - Cuda::freeFromDevice( this->cuda_pointer ); -#ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "...deleted data." << std::endl; +#ifdef HAVE_CUDA + cudaFree( this->pd ); #endif + this->pd = nullptr; } } } PointerData* pd; - - // cuda_pointer can't be part of PointerData structure, since we would be - // unable to dereference this-pd on the device - Object* cuda_pointer; }; -#endif // HAVE_CUDA_UNIFIED_MEMORY + +#endif // ! HAVE_CUDA_UNIFIED_MEMORY } // namespace Pointers } // namespace TNL diff --git a/src/TNL/Pointers/SharedPointerHost.h b/src/TNL/Pointers/SharedPointerHost.h index 39a6d4da4a2b8ab8b964110173d1716fade1ac71..9e71205f5269d59559cec71106b6c798195fdc15 100644 --- a/src/TNL/Pointers/SharedPointerHost.h +++ b/src/TNL/Pointers/SharedPointerHost.h @@ -24,14 +24,23 @@ namespace TNL { namespace Pointers { +/** + * \brief Specialization of the UniquePointer for the host system. + * + * \tparam Object is a type of object to be owned by the pointer. + */ template< typename Object > class SharedPointer< Object, Devices::Host > : public SmartPointer { private: - // Convenient template alias for controlling the selection of copy- and - // move-constructors and assignment operators using SFINAE. - // The type Object_ is "enabled" iff Object_ and Object are not the same, - // but after removing const and volatile qualifiers they are the same. + + /** + * \typedef Enabler + * Convenient template alias for controlling the selection of copy- and + * move-constructors and assignment operators using SFINAE. + * The type Object_ is "enabled" iff Object_ and Object are not the same, + * but after removing const and volatile qualifiers they are the same. + */ template< typename Object_ > using Enabler = std::enable_if< ! std::is_same< Object_, Object >::value && std::is_same< typename std::remove_cv< Object >::type, Object_ >::value >; @@ -42,13 +51,30 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer public: + /** + * \typedef ObjectType is the type of object owned by the pointer. + */ using ObjectType = Object; - using DeviceType = Devices::Host; + /** + * \typedef DeviceType is the type of device where the object is to be + * mirrored. + */ + using DeviceType = Devices::Host; + + /** + * \brief Constructor of empty pointer. + */ SharedPointer( std::nullptr_t ) : pd( nullptr ) {} + /** + * \brief Constructor with parameters of the Object constructor. + * + * \tparam Args is variadic template type of arguments of the Object constructor. + * \tparam args are arguments passed to the Object constructor. + */ template< typename... Args > explicit SharedPointer( Args... args ) : pd( nullptr ) @@ -59,38 +85,70 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer this->allocate( args... ); } - // this is needed only to avoid the default compiler-generated constructor - SharedPointer( const SharedPointer& pointer ) + /** + * \brief Copy constructor. + * + * \param pointer is the source shared pointer. + */ + SharedPointer( const SharedPointer& pointer ) // this is needed only to avoid the default compiler-generated constructor : pd( (PointerData*) pointer.pd ) { this->pd->counter += 1; } - // conditional constructor for non-const -> const data + /** + * \brief Copy constructor. + * + * This is specialization for compatible object types. + * + * See \ref Enabler. + * + * \param pointer is the source shared pointer. + */ template< typename Object_, typename = typename Enabler< Object_ >::type > - SharedPointer( const SharedPointer< Object_, DeviceType >& pointer ) + SharedPointer( const SharedPointer< Object_, DeviceType >& pointer ) // conditional constructor for non-const -> const data : pd( (PointerData*) pointer.pd ) { this->pd->counter += 1; } - // this is needed only to avoid the default compiler-generated constructor - SharedPointer( SharedPointer&& pointer ) + /** + * \brief Move constructor. + * + * \param pointer is the source shared pointer. + */ + SharedPointer( SharedPointer&& pointer ) // this is needed only to avoid the default compiler-generated constructor : pd( (PointerData*) pointer.pd ) { pointer.pd = nullptr; } - // conditional constructor for non-const -> const data + /** + * \brief Move constructor. + * + * This is specialization for compatible object types. + * + * See \ref Enabler. + * + * \param pointer is the source shared pointer. + */ template< typename Object_, typename = typename Enabler< Object_ >::type > - SharedPointer( SharedPointer< Object_, DeviceType >&& pointer ) + SharedPointer( SharedPointer< Object_, DeviceType >&& pointer ) // conditional constructor for non-const -> const data : pd( (PointerData*) pointer.pd ) { pointer.pd = nullptr; } + /** + * \brief Create new object based in given constructor parameters. + * + * \tparam Args is variadic template type of arguments to be passed to the + * object constructor. + * \param args are arguments to be passed to the object constructor. + * \return true if recreation was successful, false otherwise. + */ template< typename... Args > bool recreate( Args... args ) { @@ -116,42 +174,80 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer return this->allocate( args... ); } + /** + * \brief Arrow operator for accessing the object owned by constant smart pointer. + * + * \return constant pointer to the object owned by this smart pointer. + */ const Object* operator->() const { TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); return &this->pd->data; } + /** + * \brief Arrow operator for accessing the object owned by non-constant smart pointer. + * + * \return pointer to the object owned by this smart pointer. + */ Object* operator->() { TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); return &this->pd->data; } + /** + * \brief Dereferencing operator for accessing the object owned by constant smart pointer. + * + * \return constant reference to the object owned by this smart pointer. + */ const Object& operator *() const { TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); return this->pd->data; } + /** + * \brief Dereferencing operator for accessing the object owned by non-constant smart pointer. + * + * \return reference to the object owned by this smart pointer. + */ Object& operator *() { TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); return this->pd->data; } - __cuda_callable__ + /** + * \brief Conversion to boolean type. + * + * \return Returns true if the pointer is not empty, false otherwise. + */ operator bool() const { return this->pd; } - __cuda_callable__ + /** + * \brief Negation operator. + * + * \return Returns false if the pointer is not empty, true otherwise. + */ bool operator!() const { return ! this->pd; } + /** + * \brief Constant object reference getter. + * + * No synchronization of this pointer will be performed due to calling + * this method. + * + * \tparam Device says what image of the object one want to dereference. It + * can be either \ref DeviceType or Devices::Host. + * \return constant reference to the object image on given device. + */ template< typename Device = Devices::Host > __cuda_callable__ const Object& getData() const @@ -160,6 +256,16 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer return this->pd->data; } + /** + * \brief Non-constant object reference getter. + * + * No synchronization of this pointer will be performed due to calling + * this method. + * + * \tparam Device says what image of the object one want to dereference. It + * can be either \ref DeviceType or Devices::Host. + * \return constant reference to the object image on given device. + */ template< typename Device = Devices::Host > __cuda_callable__ Object& modifyData() @@ -168,8 +274,15 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer return this->pd->data; } - // this is needed only to avoid the default compiler-generated operator - const SharedPointer& operator=( const SharedPointer& ptr ) + /** + * \brief Assignment operator. + * + * It assigns object owned by the pointer \ref ptr to \ref this pointer. + * + * \param ptr input pointer + * \return constant reference to \ref this + */ + const SharedPointer& operator=( const SharedPointer& ptr ) // this is needed only to avoid the default compiler-generated operator { this->free(); this->pd = (PointerData*) ptr.pd; @@ -178,10 +291,19 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer return *this; } - // conditional operator for non-const -> const data + /** + * \brief Assignment operator for compatible object types. + * + * It assigns object owned by the pointer \ref ptr to \ref this pointer. + * + * See \ref Enabler. + * + * \param ptr input pointer + * \return constant reference to \ref this + */ template< typename Object_, typename = typename Enabler< Object_ >::type > - const SharedPointer& operator=( const SharedPointer< Object_, DeviceType >& ptr ) + const SharedPointer& operator=( const SharedPointer< Object_, DeviceType >& ptr ) // conditional operator for non-const -> const data { this->free(); this->pd = (PointerData*) ptr.pd; @@ -190,8 +312,15 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer return *this; } - // this is needed only to avoid the default compiler-generated operator - const SharedPointer& operator=( SharedPointer&& ptr ) + /** + * \brief Move operator. + * + * It assigns object owned by the pointer \ref ptr to \ref this pointer. + * + * \param ptr input pointer + * \return constant reference to \ref this + */ + const SharedPointer& operator=( SharedPointer&& ptr ) // this is needed only to avoid the default compiler-generated operator { this->free(); this->pd = (PointerData*) ptr.pd; @@ -199,10 +328,19 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer return *this; } - // conditional operator for non-const -> const data + /** + * \brief Move operator. + * + * It assigns object owned by the pointer \ref ptr to \ref this pointer. + * + * See \ref Enabler. + * + * \param ptr input pointer + * \return constant reference to \ref this + */ template< typename Object_, typename = typename Enabler< Object_ >::type > - const SharedPointer& operator=( SharedPointer< Object_, DeviceType >&& ptr ) + const SharedPointer& operator=( SharedPointer< Object_, DeviceType >&& ptr ) // conditional operator for non-const -> const data { this->free(); this->pd = (PointerData*) ptr.pd; @@ -210,21 +348,39 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer return *this; } + /** + * \brief Cross-device pointer synchronization. + * + * For the smart pointers in the host, this method does nothing. + * + * \return true. + */ bool synchronize() { return true; } + /** + * \brief Reset the pointer to empty state. + */ void clear() { this->free(); } + /** + * \brief Swap the owned object with another pointer. + * + * \param ptr2 the other shared pointer for swapping. + */ void swap( SharedPointer& ptr2 ) { std::swap( this->pd, ptr2.pd ); } + /** + * \brief Destructor. + */ ~SharedPointer() { this->free(); diff --git a/src/TNL/Pointers/UniquePointer.h b/src/TNL/Pointers/UniquePointer.h index baa93e58928e6b595e84eaacdc6f9126b43dedbd..76f06f5237d0dcad9db7615c0b2d8efbca2efef1 100644 --- a/src/TNL/Pointers/UniquePointer.h +++ b/src/TNL/Pointers/UniquePointer.h @@ -57,7 +57,7 @@ class UniquePointer }; /** - * \brief Specialization of the UniqueSmart pointer for the host system. + * \brief Specialization of the UniquePointer for the host system. * * \tparam Object is a type of object to be owned by the pointer. */ @@ -217,7 +217,7 @@ class UniquePointer< Object, Devices::Host > : public SmartPointer } /** - * \brief Assignment operator. + * \brief Move operator. * * It assigns object owned by the pointer \ref ptr to \ref this pointer. * The original pointer \ref ptr is reset to empty state. @@ -233,10 +233,9 @@ class UniquePointer< Object, Devices::Host > : public SmartPointer /** * \brief Cross-device pointer synchronization. * - * This method is usually called by the smart pointers register when calling - * \ref Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >() + * For the smart pointers in the host, this method does nothing. * - * \return true if the synchronization was successful, false otherwise. + * \return true. */ bool synchronize() { @@ -259,7 +258,7 @@ class UniquePointer< Object, Devices::Host > : public SmartPointer }; /** - * \brief Specialization of the UniqueSmart pointer for the CUDA device. + * \brief Specialization of the UniquePointer for the CUDA device. * * \tparam Object is a type of object to be owned by the pointer. */ @@ -438,7 +437,7 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer } /** - * \brief Assignment operator. + * \brief Move operator. * * It assigns object owned by the pointer \ref ptr to \ref this pointer. * The original pointer \ref ptr is reset to empty state.