From f11da7b1849f932e24cafc144df9337e21e4274c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 25 Nov 2019 14:41:46 +0100
Subject: [PATCH] SharedPointer dereferencing is device sensitive.

---
 src/TNL/Pointers/SharedPointerCuda.h          | 20 +++++++++++++++++++
 .../Pointers/SharedPointerCudaTest.cu         | 14 +++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/src/TNL/Pointers/SharedPointerCuda.h b/src/TNL/Pointers/SharedPointerCuda.h
index 54dd4ee3c7..510f172d8e 100644
--- a/src/TNL/Pointers/SharedPointerCuda.h
+++ b/src/TNL/Pointers/SharedPointerCuda.h
@@ -383,30 +383,50 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
          return this->allocate( args... );
       }
 
+      __cuda_callable__
       const Object* operator->() const
       {
          TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+#ifdef __CUDA_ARCH__
+         return this->cuda_pointer;
+#else
          return &this->pd->data;
+#endif
       }
 
+      __cuda_callable__
       Object* operator->()
       {
          TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+#ifdef __CUDA_ARCH__
+         return this->cuda_pointer;
+#else
          this->pd->maybe_modified = true;
          return &this->pd->data;
+#endif
       }
 
+      __cuda_callable__
       const Object& operator *() const
       {
          TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+#ifdef __CUDA_ARCH__
+         return *( this->cuda_pointer );
+#else
          return this->pd->data;
+#endif
       }
 
+      __cuda_callable__
       Object& operator *()
       {
          TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+#ifdef __CUDA_ARCH__
+         return *( this->cuda_pointer );
+#else
          this->pd->maybe_modified = true;
          return this->pd->data;
+#endif
       }
 
       __cuda_callable__
diff --git a/src/UnitTests/Pointers/SharedPointerCudaTest.cu b/src/UnitTests/Pointers/SharedPointerCudaTest.cu
index 83b6b4793b..d21f4319cc 100644
--- a/src/UnitTests/Pointers/SharedPointerCudaTest.cu
+++ b/src/UnitTests/Pointers/SharedPointerCudaTest.cu
@@ -77,6 +77,14 @@ __global__ void copyArrayKernel( const TNL::Containers::Array< int, Devices::Cud
    }
 }
 
+__global__ void copyArrayKernel2( const Pointers::SharedPointer< TNL::Containers::Array< int, Devices::Cuda > > inArray,
+                                  int* outArray )
+{
+   if( threadIdx.x < 2 )
+   {
+      outArray[ threadIdx.x ] = ( *inArray )[ threadIdx.x ];
+   }
+}
 #endif
 
 TEST( SharedPointerCudaTest, getDataArrayTest )
@@ -100,6 +108,12 @@ TEST( SharedPointerCudaTest, getDataArrayTest )
    ASSERT_EQ( testArray_host[ 0 ], 1 );
    ASSERT_EQ( testArray_host[ 1 ], 2 );
 
+   copyArrayKernel2<<< 1, 2 >>>( ptr, testArray_device );
+   cudaMemcpy( testArray_host, testArray_device, 2 * sizeof( int ), cudaMemcpyDeviceToHost );
+
+   ASSERT_EQ( testArray_host[ 0 ], 1 );
+   ASSERT_EQ( testArray_host[ 1 ], 2 );
+
    delete[] testArray_host;
    cudaFree( testArray_device );
 
-- 
GitLab