Commit 7af752a5 authored by Matouš Fencl's avatar Matouš Fencl Committed by Tomáš Oberhuber

DeepCopy removed from CUDA

parent 204f7f1d
...@@ -465,17 +465,31 @@ solve( const MeshPointer& mesh, ...@@ -465,17 +465,31 @@ solve( const MeshPointer& mesh,
// Helping meshFunction that switches with AuxPtr in every calculation of CudaUpdateCellCaller<<<>>>() // Helping meshFunction that switches with AuxPtr in every calculation of CudaUpdateCellCaller<<<>>>()
MeshFunctionPointer helpFunc( mesh ); MeshFunctionPointer helpFunc( mesh );
helpFunc.template modifyData() = auxPtr.template getData();
Devices::Cuda::synchronizeDevice();
//MeshFunctionPointer helpFunc1( mesh ); //MeshFunctionPointer helpFunc1( mesh );
// Setting number of threads and blocks in grid for DeepCopy of meshFunction // Setting number of threads and blocks in grid for DeepCopy of meshFunction
int numBlocksXWithoutOverlaps = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize ); /*int numBlocksXWithoutOverlaps = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize );
int numBlocksYWithoutOverlaps = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize ); int numBlocksYWithoutOverlaps = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize );
dim3 gridSizeWithoutOverlaps( numBlocksXWithoutOverlaps, numBlocksYWithoutOverlaps ); dim3 gridSizeWithoutOverlaps( numBlocksXWithoutOverlaps, numBlocksYWithoutOverlaps );
Devices::Cuda::synchronizeDevice();
DeepCopy<<< gridSizeWithoutOverlaps, blockSize >>>( auxPtr.template getData< Device>(), DeepCopy<<< gridSizeWithoutOverlaps, blockSize >>>( auxPtr.template getData< Device>(),
helpFunc.template modifyData< Device>(), 1, i ); helpFunc.template modifyData< Device>(), 1, i );
cudaDeviceSynchronize();
TNL_CHECK_CUDA_DEVICE;
Devices::Cuda::synchronizeDevice();
DeepCopy<<< gridSizeWithoutOverlaps, blockSize >>>( auxPtr.template getData< Device>(),
helpFunc.template modifyData< Device>(), 0, i );
cudaDeviceSynchronize();
TNL_CHECK_CUDA_DEVICE;*/
#if ForDebug #if ForDebug
/*int numBlocksXWithoutOverlaps = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize );
int numBlocksYWithoutOverlaps = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize );
dim3 gridSizeWithoutOverlaps( numBlocksXWithoutOverlaps, numBlocksYWithoutOverlaps );*/
DeepCopy<<< gridSizeWithoutOverlaps, blockSize >>>( auxPtr.template getData< Device>(), DeepCopy<<< gridSizeWithoutOverlaps, blockSize >>>( auxPtr.template getData< Device>(),
helpFunc.template modifyData< Device>(), 0, i ); helpFunc.template modifyData< Device>(), 0, i );
#endif #endif
...@@ -536,7 +550,7 @@ solve( const MeshPointer& mesh, ...@@ -536,7 +550,7 @@ solve( const MeshPointer& mesh,
Devices::Cuda::synchronizeDevice(); Devices::Cuda::synchronizeDevice();
CudaUpdateCellCaller<18><<< gridSize, blockSize >>>( ptr, CudaUpdateCellCaller<18><<< gridSize, blockSize >>>( ptr,
interfaceMapPtr.template getData< Device >(), interfaceMapPtr.template getData< Device >(),
auxPtr.template modifyData< Device>(), auxPtr.template getData< Device>(),
helpFunc.template modifyData< Device>(), helpFunc.template modifyData< Device>(),
BlockIterDevice, vLower, vUpper, i ); BlockIterDevice, vLower, vUpper, i );
cudaDeviceSynchronize(); cudaDeviceSynchronize();
...@@ -701,7 +715,7 @@ __global__ void DeepCopy( const Functions::MeshFunction< Meshes::Grid< 2, Real, ...@@ -701,7 +715,7 @@ __global__ void DeepCopy( const Functions::MeshFunction< Meshes::Grid< 2, Real,
const Meshes::Grid< 2, Real, Device, Index >& mesh = aux.template getMesh< Devices::Cuda >(); const Meshes::Grid< 2, Real, Device, Index >& mesh = aux.template getMesh< Devices::Cuda >();
if( copy ){ if( copy ){
if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() )
helpFunc[ j * mesh.getDimensions().x() + i ] = aux[ j * mesh.getDimensions().x() + i ]; helpFunc[ j * mesh.getDimensions().x() + i ] = 1;//aux[ j * mesh.getDimensions().x() + i ];
} }
else else
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment