Skip to content
Snippets Groups Projects
Commit 7af752a5 authored by Matouš Fencl's avatar Matouš Fencl Committed by Tomáš Oberhuber
Browse files

DeepCopy removed from CUDA

parent 204f7f1d
No related branches found
No related tags found
1 merge request!43Hamilton jacobi rebase
......@@ -465,17 +465,31 @@ solve( const MeshPointer& mesh,
// Helping meshFunction that switches with AuxPtr in every calculation of CudaUpdateCellCaller<<<>>>()
MeshFunctionPointer helpFunc( mesh );
helpFunc.template modifyData() = auxPtr.template getData();
Devices::Cuda::synchronizeDevice();
//MeshFunctionPointer helpFunc1( mesh );
// Setting number of threads and blocks in grid for DeepCopy of meshFunction
int numBlocksXWithoutOverlaps = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize );
/*int numBlocksXWithoutOverlaps = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize );
int numBlocksYWithoutOverlaps = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize );
dim3 gridSizeWithoutOverlaps( numBlocksXWithoutOverlaps, numBlocksYWithoutOverlaps );
Devices::Cuda::synchronizeDevice();
DeepCopy<<< gridSizeWithoutOverlaps, blockSize >>>( auxPtr.template getData< Device>(),
helpFunc.template modifyData< Device>(), 1, i );
cudaDeviceSynchronize();
TNL_CHECK_CUDA_DEVICE;
Devices::Cuda::synchronizeDevice();
DeepCopy<<< gridSizeWithoutOverlaps, blockSize >>>( auxPtr.template getData< Device>(),
helpFunc.template modifyData< Device>(), 0, i );
cudaDeviceSynchronize();
TNL_CHECK_CUDA_DEVICE;*/
#if ForDebug
/*int numBlocksXWithoutOverlaps = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize );
int numBlocksYWithoutOverlaps = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize );
dim3 gridSizeWithoutOverlaps( numBlocksXWithoutOverlaps, numBlocksYWithoutOverlaps );*/
DeepCopy<<< gridSizeWithoutOverlaps, blockSize >>>( auxPtr.template getData< Device>(),
helpFunc.template modifyData< Device>(), 0, i );
#endif
......@@ -536,7 +550,7 @@ solve( const MeshPointer& mesh,
Devices::Cuda::synchronizeDevice();
CudaUpdateCellCaller<18><<< gridSize, blockSize >>>( ptr,
interfaceMapPtr.template getData< Device >(),
auxPtr.template modifyData< Device>(),
auxPtr.template getData< Device>(),
helpFunc.template modifyData< Device>(),
BlockIterDevice, vLower, vUpper, i );
cudaDeviceSynchronize();
......@@ -701,7 +715,7 @@ __global__ void DeepCopy( const Functions::MeshFunction< Meshes::Grid< 2, Real,
const Meshes::Grid< 2, Real, Device, Index >& mesh = aux.template getMesh< Devices::Cuda >();
if( copy ){
if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() )
helpFunc[ j * mesh.getDimensions().x() + i ] = aux[ j * mesh.getDimensions().x() + i ];
helpFunc[ j * mesh.getDimensions().x() + i ] = 1;//aux[ j * mesh.getDimensions().x() + i ];
}
else
{
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment