diff --git a/Documentation/Tutorials/Pointers/CMakeLists.txt b/Documentation/Tutorials/Pointers/CMakeLists.txt index bf6581c52b4da5d763e189468d8b38d687b4aa9b..0535e8fd5df0c242c4df984a483ec6a34dd32e46 100644 --- a/Documentation/Tutorials/Pointers/CMakeLists.txt +++ b/Documentation/Tutorials/Pointers/CMakeLists.txt @@ -3,6 +3,8 @@ IF( BUILD_CUDA ) ADD_CUSTOM_COMMAND( COMMAND UniquePointerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/UniquePointerExample.out OUTPUT UniquePointerExample.out ) CUDA_ADD_EXECUTABLE( SharedPointerExample SharedPointerExample.cu ) ADD_CUSTOM_COMMAND( COMMAND SharedPointerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SharedPointerExample.out OUTPUT SharedPointerExample.out ) + CUDA_ADD_EXECUTABLE( DevicePointerExample DevicePointerExample.cu ) + ADD_CUSTOM_COMMAND( COMMAND DevicePointerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DevicePointerExample.out OUTPUT DevicePointerExample.out ) ELSE() ADD_EXECUTABLE( UniquePointerExample UniquePointerExample.cpp ) ADD_CUSTOM_COMMAND( COMMAND UniquePointerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/UniquePointerExample.out OUTPUT UniquePointerExample.out ) @@ -15,7 +17,8 @@ ADD_CUSTOM_COMMAND( COMMAND UniquePointerHostExample > ${TNL_DOCUMENTATION_OUTPU IF( BUILD_CUDA ) ADD_CUSTOM_TARGET( TutorialsPointersCuda ALL DEPENDS UniquePointerExample.out - SharedPointerExample.out ) + SharedPointerExample.out + DevicePointerExample.out ) ENDIF() ADD_CUSTOM_TARGET( TutorialsPointers ALL DEPENDS diff --git a/Documentation/Tutorials/Pointers/DevicePointerExample.cpp b/Documentation/Tutorials/Pointers/DevicePointerExample.cpp new file mode 100644 index 0000000000000000000000000000000000000000..144ae98b0e57a4aab79eb6d3e4aa20135dc12ca7 --- /dev/null +++ b/Documentation/Tutorials/Pointers/DevicePointerExample.cpp @@ -0,0 +1,54 @@ +#include <iostream> +#include <cstdlib> +#include <TNL/Containers/Array.h> +#include <TNL/Pointers/DevicePointer.h> + +using namespace TNL; + +using ArrayCuda = Containers::Array< int, Devices::Cuda >; + +struct Tuple +{ + Tuple( ArrayCuda& _a1, ArrayCuda& _a2 ): + a1( _a1 ), a2( _a2 ){}; + + Pointers::DevicePointer< ArrayCuda > a1, a2; +}; + +__global__ void printTuple( const Tuple t ) +{ + printf( "Tuple size is: %d\n", t.a1->getSize() ); + for( int i = 0; i < t.a1->getSize(); i++ ) + { + printf( "a1[ %d ] = %d \n", i, ( *t.a1 )[ i ] ); + printf( "a2[ %d ] = %d \n", i, ( *t.a2 )[ i ] ); + } +} + +int main( int argc, char* argv[] ) +{ + /*** + * Create a tuple of arrays and print them in CUDA kernel + */ +#ifdef HAVE_CUDA + ArrayCuda a1( 3 ), a2( 3 ); + Tuple t( a1, a2 ); + a1 = 1; + a2 = 2; + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); + printTuple<<< 1, 1 >>>( t ); + + /*** + * Resize the arrays + */ + a1.setSize( 5 ); + a2.setSize( 5 ); + a1 = 3; + a2 = 4; + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); + printTuple<<< 1, 1 >>>( t ); +#endif + return EXIT_SUCCESS; + +} + diff --git a/Documentation/Tutorials/Pointers/DevicePointerExample.cu b/Documentation/Tutorials/Pointers/DevicePointerExample.cu new file mode 120000 index 0000000000000000000000000000000000000000..b17ef30da03880e80daeb03fb1506bdc00e9b832 --- /dev/null +++ b/Documentation/Tutorials/Pointers/DevicePointerExample.cu @@ -0,0 +1 @@ +DevicePointerExample.cpp \ No newline at end of file diff --git a/Documentation/Tutorials/Pointers/SharedPointerExample.cpp b/Documentation/Tutorials/Pointers/SharedPointerExample.cpp index 5facaf2016949d2121d75edb779a1ea9fd03ce8e..287aae8e8dd2f9faf3c2ebeb86670f5e77f489a0 100644 --- a/Documentation/Tutorials/Pointers/SharedPointerExample.cpp +++ b/Documentation/Tutorials/Pointers/SharedPointerExample.cpp @@ -34,7 +34,7 @@ __global__ void printTuple( const Tuple t ) int main( int argc, char* argv[] ) { /*** - * Create a tuple of arrays and print the in CUDA kernel + * Create a tuple of arrays and print them in CUDA kernel */ #ifdef HAVE_CUDA Tuple t( 3 ); @@ -44,7 +44,7 @@ int main( int argc, char* argv[] ) printTuple<<< 1, 1 >>>( t ); /*** - * Resize the array + * Resize the arrays */ t.setSize( 5 ); *t.a1 = 3; diff --git a/Documentation/Tutorials/Pointers/tutorial_Pointers.md b/Documentation/Tutorials/Pointers/tutorial_Pointers.md index d8fbef6eec6fda8cd2957fbc34b95fb0c8e74b2a..f9ef457e4d65ff0735f6b66c08e615bb2d281062 100644 --- a/Documentation/Tutorials/Pointers/tutorial_Pointers.md +++ b/Documentation/Tutorials/Pointers/tutorial_Pointers.md @@ -77,4 +77,10 @@ One of the differences between `UniquePointer` and `SmartPointer` is that the `S ## Device pointers <a name="device_pointers"></a> -The last type of the smart pointer implemented in TNL is `DevicePointer`. It works the same way as `SharedPointer` but it does not create new object on the host system. `DevicePointer` is therefore useful in situation when there is already an object created in the host memory and we want to create its image even on the device. Both images are linked one with each other and so one can just manipulate the one on the host and then synchronize it on the device. +The last type of the smart pointer implemented in TNL is `DevicePointer`. It works the same way as `SharedPointer` but it does not create new object on the host system. `DevicePointer` is therefore useful in situation when there is already an object created in the host memory and we want to create its image even on the device. Both images are linked one with each other and so one can just manipulate the one on the host and then synchronize it on the device. The following listing is a modification of the previous example with tuple: + +\include DevicePointerExample.cpp + +The result looks the same: + +\include DevicePointerExample.out diff --git a/src/TNL/Pointers/DevicePointer.h b/src/TNL/Pointers/DevicePointer.h index 5276c3ed465938e7e7fcdfde2885dc8986cac3b5..b72aaf9b1a38618433e713cc6a4151e3f503a95c 100644 --- a/src/TNL/Pointers/DevicePointer.h +++ b/src/TNL/Pointers/DevicePointer.h @@ -267,26 +267,46 @@ class DevicePointer< Object, Devices::Cuda > : public SmartPointer pointer.cuda_pointer = nullptr; } + __cuda_callable__ const Object* operator->() const { +#ifdef __CUDA_ARCH__ + return this->cuda_pointer; +#else return this->pointer; +#endif } + __cuda_callable__ Object* operator->() { +#ifdef __CUDA_ARCH__ + return this->cuda_pointer; +#else this->pd->maybe_modified = true; return this->pointer; +#endif } + __cuda_callable__ const Object& operator *() const { +#ifdef __CUDA_ARCH__ + return *( this->cuda_pointer ); +#else return *( this->pointer ); +#endif } + __cuda_callable__ Object& operator *() { +#ifdef __CUDA_ARCH__ + return *( this->cuda_pointer ); +#else this->pd->maybe_modified = true; return *( this->pointer ); +#endif } __cuda_callable__