Commit 7857b230 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Simplified CUDA specializations of setElement and getElement in MemoryOperations

parent f2e51e10
Loading
Loading
Loading
Loading
+8 −24
Original line number Diff line number Diff line
@@ -83,17 +83,11 @@ setElement( Element* data,
#ifdef __CUDA_ARCH__
   *data = value;
#else
#ifdef HAVE_CUDA
   cudaMemcpy( ( void* ) data, ( void* ) &value, sizeof( Element ), cudaMemcpyHostToDevice );
   TNL_CHECK_CUDA_DEVICE;
#else
   throw Exceptions::CudaSupportMissing();
#endif
   // TODO: For some reason the following does not work after adding
   // #ifdef __CUDA_ARCH__ to Array::setElement and ArrayView::setElement.
   // Probably it might be a problem with lambda function 'kernel' which
   // nvcc probably does not handle properly.
   //MemoryOperations< Devices::Cuda >::set( data, value, 1 );
   // NOTE: calling `MemoryOperations< Devices::Cuda >::set( data, value, 1 );`
   // does not work here due to `#ifdef __CUDA_ARCH__` above. It would involve
   // launching a CUDA kernel with an extended lambda, which would be discarded
   // by nvcc (never called).
   MultiDeviceMemoryOperations< Devices::Cuda, void >::copy( data, &value, 1 );
#endif
}

@@ -106,19 +100,9 @@ getElement( const Element* data )
#ifdef __CUDA_ARCH__
   return *data;
#else
   // TODO: For some reason the following does not work after adding
   // #ifdef __CUDA_ARCH__ to Array::getElement and ArrayView::getElement
   // Probably it might be a problem with lambda function 'kernel' which
   // nvcc probably does not handle properly.
   //MultiDeviceMemoryOperations< void, Devices::Cuda >::template copy< Element, Element, int >( &result, data, 1 );
   #ifdef HAVE_CUDA
   Element result;
      cudaMemcpy( ( void* ) &result, ( void* ) data, sizeof( Element ), cudaMemcpyDeviceToHost );
      TNL_CHECK_CUDA_DEVICE;
   MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( &result, data, 1 );
   return result;
   #else
      throw Exceptions::CudaSupportMissing();
   #endif
#endif
}