From 537407ec9a19e066f2711679c1cb8b5267994d9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com> Date: Mon, 25 Nov 2019 16:58:41 +0100 Subject: [PATCH] Writing documentation on DevicePointer. --- .../Tutorials/Pointers/CMakeLists.txt | 5 +- .../Pointers/DevicePointerExample.cpp | 54 +++++++++++++++++++ .../Pointers/DevicePointerExample.cu | 1 + .../Pointers/SharedPointerExample.cpp | 4 +- .../Tutorials/Pointers/tutorial_Pointers.md | 8 ++- src/TNL/Pointers/DevicePointer.h | 20 +++++++ 6 files changed, 88 insertions(+), 4 deletions(-) create mode 100644 Documentation/Tutorials/Pointers/DevicePointerExample.cpp create mode 120000 Documentation/Tutorials/Pointers/DevicePointerExample.cu diff --git a/Documentation/Tutorials/Pointers/CMakeLists.txt b/Documentation/Tutorials/Pointers/CMakeLists.txt index bf6581c52b..0535e8fd5d 100644 --- a/Documentation/Tutorials/Pointers/CMakeLists.txt +++ b/Documentation/Tutorials/Pointers/CMakeLists.txt @@ -3,6 +3,8 @@ IF( BUILD_CUDA ) ADD_CUSTOM_COMMAND( COMMAND UniquePointerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/UniquePointerExample.out OUTPUT UniquePointerExample.out ) CUDA_ADD_EXECUTABLE( SharedPointerExample SharedPointerExample.cu ) ADD_CUSTOM_COMMAND( COMMAND SharedPointerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SharedPointerExample.out OUTPUT SharedPointerExample.out ) + CUDA_ADD_EXECUTABLE( DevicePointerExample DevicePointerExample.cu ) + ADD_CUSTOM_COMMAND( COMMAND DevicePointerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DevicePointerExample.out OUTPUT DevicePointerExample.out ) ELSE() ADD_EXECUTABLE( UniquePointerExample UniquePointerExample.cpp ) ADD_CUSTOM_COMMAND( COMMAND UniquePointerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/UniquePointerExample.out OUTPUT UniquePointerExample.out ) @@ -15,7 +17,8 @@ ADD_CUSTOM_COMMAND( COMMAND UniquePointerHostExample > ${TNL_DOCUMENTATION_OUTPU IF( BUILD_CUDA ) ADD_CUSTOM_TARGET( TutorialsPointersCuda ALL DEPENDS UniquePointerExample.out - SharedPointerExample.out ) + SharedPointerExample.out + DevicePointerExample.out ) ENDIF() ADD_CUSTOM_TARGET( TutorialsPointers ALL DEPENDS diff --git a/Documentation/Tutorials/Pointers/DevicePointerExample.cpp b/Documentation/Tutorials/Pointers/DevicePointerExample.cpp new file mode 100644 index 0000000000..144ae98b0e --- /dev/null +++ b/Documentation/Tutorials/Pointers/DevicePointerExample.cpp @@ -0,0 +1,54 @@ +#include <iostream> +#include <cstdlib> +#include <TNL/Containers/Array.h> +#include <TNL/Pointers/DevicePointer.h> + +using namespace TNL; + +using ArrayCuda = Containers::Array< int, Devices::Cuda >; + +struct Tuple +{ + Tuple( ArrayCuda& _a1, ArrayCuda& _a2 ): + a1( _a1 ), a2( _a2 ){}; + + Pointers::DevicePointer< ArrayCuda > a1, a2; +}; + +__global__ void printTuple( const Tuple t ) +{ + printf( "Tuple size is: %d\n", t.a1->getSize() ); + for( int i = 0; i < t.a1->getSize(); i++ ) + { + printf( "a1[ %d ] = %d \n", i, ( *t.a1 )[ i ] ); + printf( "a2[ %d ] = %d \n", i, ( *t.a2 )[ i ] ); + } +} + +int main( int argc, char* argv[] ) +{ + /*** + * Create a tuple of arrays and print them in CUDA kernel + */ +#ifdef HAVE_CUDA + ArrayCuda a1( 3 ), a2( 3 ); + Tuple t( a1, a2 ); + a1 = 1; + a2 = 2; + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); + printTuple<<< 1, 1 >>>( t ); + + /*** + * Resize the arrays + */ + a1.setSize( 5 ); + a2.setSize( 5 ); + a1 = 3; + a2 = 4; + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); + printTuple<<< 1, 1 >>>( t ); +#endif + return EXIT_SUCCESS; + +} + diff --git a/Documentation/Tutorials/Pointers/DevicePointerExample.cu b/Documentation/Tutorials/Pointers/DevicePointerExample.cu new file mode 120000 index 0000000000..b17ef30da0 --- /dev/null +++ b/Documentation/Tutorials/Pointers/DevicePointerExample.cu @@ -0,0 +1 @@ +DevicePointerExample.cpp \ No newline at end of file diff --git a/Documentation/Tutorials/Pointers/SharedPointerExample.cpp b/Documentation/Tutorials/Pointers/SharedPointerExample.cpp index 5facaf2016..287aae8e8d 100644 --- a/Documentation/Tutorials/Pointers/SharedPointerExample.cpp +++ b/Documentation/Tutorials/Pointers/SharedPointerExample.cpp @@ -34,7 +34,7 @@ __global__ void printTuple( const Tuple t ) int main( int argc, char* argv[] ) { /*** - * Create a tuple of arrays and print the in CUDA kernel + * Create a tuple of arrays and print them in CUDA kernel */ #ifdef HAVE_CUDA Tuple t( 3 ); @@ -44,7 +44,7 @@ int main( int argc, char* argv[] ) printTuple<<< 1, 1 >>>( t ); /*** - * Resize the array + * Resize the arrays */ t.setSize( 5 ); *t.a1 = 3; diff --git a/Documentation/Tutorials/Pointers/tutorial_Pointers.md b/Documentation/Tutorials/Pointers/tutorial_Pointers.md index d8fbef6eec..f9ef457e4d 100644 --- a/Documentation/Tutorials/Pointers/tutorial_Pointers.md +++ b/Documentation/Tutorials/Pointers/tutorial_Pointers.md @@ -77,4 +77,10 @@ One of the differences between `UniquePointer` and `SmartPointer` is that the `S ## Device pointers <a name="device_pointers"></a> -The last type of the smart pointer implemented in TNL is `DevicePointer`. It works the same way as `SharedPointer` but it does not create new object on the host system. `DevicePointer` is therefore useful in situation when there is already an object created in the host memory and we want to create its image even on the device. Both images are linked one with each other and so one can just manipulate the one on the host and then synchronize it on the device. +The last type of the smart pointer implemented in TNL is `DevicePointer`. It works the same way as `SharedPointer` but it does not create new object on the host system. `DevicePointer` is therefore useful in situation when there is already an object created in the host memory and we want to create its image even on the device. Both images are linked one with each other and so one can just manipulate the one on the host and then synchronize it on the device. The following listing is a modification of the previous example with tuple: + +\include DevicePointerExample.cpp + +The result looks the same: + +\include DevicePointerExample.out diff --git a/src/TNL/Pointers/DevicePointer.h b/src/TNL/Pointers/DevicePointer.h index 5276c3ed46..b72aaf9b1a 100644 --- a/src/TNL/Pointers/DevicePointer.h +++ b/src/TNL/Pointers/DevicePointer.h @@ -267,26 +267,46 @@ class DevicePointer< Object, Devices::Cuda > : public SmartPointer pointer.cuda_pointer = nullptr; } + __cuda_callable__ const Object* operator->() const { +#ifdef __CUDA_ARCH__ + return this->cuda_pointer; +#else return this->pointer; +#endif } + __cuda_callable__ Object* operator->() { +#ifdef __CUDA_ARCH__ + return this->cuda_pointer; +#else this->pd->maybe_modified = true; return this->pointer; +#endif } + __cuda_callable__ const Object& operator *() const { +#ifdef __CUDA_ARCH__ + return *( this->cuda_pointer ); +#else return *( this->pointer ); +#endif } + __cuda_callable__ Object& operator *() { +#ifdef __CUDA_ARCH__ + return *( this->cuda_pointer ); +#else this->pd->maybe_modified = true; return *( this->pointer ); +#endif } __cuda_callable__ -- GitLab