From f11da7b1849f932e24cafc144df9337e21e4274c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com> Date: Mon, 25 Nov 2019 14:41:46 +0100 Subject: [PATCH] SharedPointer dereferencing is device sensitive. --- src/TNL/Pointers/SharedPointerCuda.h | 20 +++++++++++++++++++ .../Pointers/SharedPointerCudaTest.cu | 14 +++++++++++++ 2 files changed, 34 insertions(+) diff --git a/src/TNL/Pointers/SharedPointerCuda.h b/src/TNL/Pointers/SharedPointerCuda.h index 54dd4ee3c7..510f172d8e 100644 --- a/src/TNL/Pointers/SharedPointerCuda.h +++ b/src/TNL/Pointers/SharedPointerCuda.h @@ -383,30 +383,50 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer return this->allocate( args... ); } + __cuda_callable__ const Object* operator->() const { TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); +#ifdef __CUDA_ARCH__ + return this->cuda_pointer; +#else return &this->pd->data; +#endif } + __cuda_callable__ Object* operator->() { TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); +#ifdef __CUDA_ARCH__ + return this->cuda_pointer; +#else this->pd->maybe_modified = true; return &this->pd->data; +#endif } + __cuda_callable__ const Object& operator *() const { TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); +#ifdef __CUDA_ARCH__ + return *( this->cuda_pointer ); +#else return this->pd->data; +#endif } + __cuda_callable__ Object& operator *() { TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); +#ifdef __CUDA_ARCH__ + return *( this->cuda_pointer ); +#else this->pd->maybe_modified = true; return this->pd->data; +#endif } __cuda_callable__ diff --git a/src/UnitTests/Pointers/SharedPointerCudaTest.cu b/src/UnitTests/Pointers/SharedPointerCudaTest.cu index 83b6b4793b..d21f4319cc 100644 --- a/src/UnitTests/Pointers/SharedPointerCudaTest.cu +++ b/src/UnitTests/Pointers/SharedPointerCudaTest.cu @@ -77,6 +77,14 @@ __global__ void copyArrayKernel( const TNL::Containers::Array< int, Devices::Cud } } +__global__ void copyArrayKernel2( const Pointers::SharedPointer< TNL::Containers::Array< int, Devices::Cuda > > inArray, + int* outArray ) +{ + if( threadIdx.x < 2 ) + { + outArray[ threadIdx.x ] = ( *inArray )[ threadIdx.x ]; + } +} #endif TEST( SharedPointerCudaTest, getDataArrayTest ) @@ -100,6 +108,12 @@ TEST( SharedPointerCudaTest, getDataArrayTest ) ASSERT_EQ( testArray_host[ 0 ], 1 ); ASSERT_EQ( testArray_host[ 1 ], 2 ); + copyArrayKernel2<<< 1, 2 >>>( ptr, testArray_device ); + cudaMemcpy( testArray_host, testArray_device, 2 * sizeof( int ), cudaMemcpyDeviceToHost ); + + ASSERT_EQ( testArray_host[ 0 ], 1 ); + ASSERT_EQ( testArray_host[ 1 ], 2 ); + delete[] testArray_host; cudaFree( testArray_device ); -- GitLab