Commit 008601ad authored by Matouš Fencl's avatar Matouš Fencl Committed by Tomáš Oberhuber

Fix 2D GPU neighbours. Version with Chess method and OpenMP FSM methods.

parent f2dc4517
......@@ -365,13 +365,13 @@ __global__ void GetNeighbours( const TNL::Containers::Array< int, Devices::Cuda,
m = i%numBlockX;
k = i/numBlockX;
if( m > 0 && blockCalculationIndicator[ i - 1 ] ){
pom = 1;//BlockIterPom[ i ] = 1;
pom = 1;//blockCalculationIndicatorHelp[ i ] = 1;
}else if( m < numBlockX -1 && blockCalculationIndicator[ i + 1 ] ){
pom = 1;//BlockIterPom[ i ] = 1;
}else if( k > 0 && blockCalculationIndicatorHelp[ i - numBlockX ] ){
pom = 1;// BlockIterPom[ i ] = 1;
pom = 1;//blockCalculationIndicatorHelp[ i ] = 1;
}else if( k > 0 && blockCalculationIndicator[ i - numBlockX ] ){
pom = 1;// blockCalculationIndicatorHelp[ i ] = 1;
}else if( k < numBlockY -1 && blockCalculationIndicator[ i + numBlockX ] ){
pom = 1;//BlockIterPom[ i ] = 1;
pom = 1;//blockCalculationIndicatorHelp[ i ] = 1;
}
if( blockCalculationIndicator[ i ] != 1 )
......
......@@ -109,7 +109,7 @@ initInterface( const MeshFunctionPointer& _input,
output[ cell.getIndex() ] = pom;
pom = pom - TNL::sign( c )*hx;
if( TNL::abs( output[ e ] ) > TNL::abs( pom ) )
output[ e ] = pom; //( hy * c )/( c - input[ n ]) - hy;
output[ e ] = pom;
interfaceMap[ cell.getIndex() ] = true;
interfaceMap[ e ] = true;
......@@ -122,7 +122,7 @@ initInterface( const MeshFunctionPointer& _input,
output[ cell.getIndex() ] = pom;
pom = pom - TNL::sign( c )*hz;
if( TNL::abs( output[ t ] ) > TNL::abs( pom ) )
output[ t ] = pom; //( hy * c )/( c - input[ n ]) - hy;
output[ t ] = pom;
interfaceMap[ cell.getIndex() ] = true;
interfaceMap[ t ] = true;
......@@ -736,7 +736,7 @@ updateBlocks( const InterfaceMapType interfaceMap,
MeshFunctionType& helpFunc,
ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ )
{
//#pragma omp parallel for schedule( dynamic )
#pragma omp parallel for schedule( dynamic )
for( IndexType i = 0; i < BlockIterHost.getSize(); i++ )
{
if( BlockIterHost[ i ] )
......
......@@ -363,7 +363,7 @@ solve( const MeshPointer& mesh,
interfaceMapPtr.template getData< Device >(),
auxPtr.template getData< Device>(),
helpFunc.template modifyData< Device>(),
blockCalculationIndicator,
blockCalculationIndicator, vecLowerOverlaps, vecUpperOverlaps,
oddEvenBlock );
cudaDeviceSynchronize();
TNL_CHECK_CUDA_DEVICE;
......@@ -381,15 +381,8 @@ solve( const MeshPointer& mesh,
oddEvenBlock= (oddEvenBlock == 0) ? 1: 0;
CudaParallelReduc<<< nBlocks , 1024 >>>( blockCalculationIndicator, dBlock, ( numBlocksX * numBlocksY ) );
cudaDeviceSynchronize();
TNL_CHECK_CUDA_DEVICE;
CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks );
cudaDeviceSynchronize();
TNL_CHECK_CUDA_DEVICE;
BlockIterD = dBlock.getElement( 0 );*/
calculateCudaBlocksAgain = blockCalculationIndicator.containsValue(1);
*/
/**------------------------------------------------------------------------------------------------*/
......@@ -441,7 +434,7 @@ solve( const MeshPointer& mesh,
cudaDeviceSynchronize();
TNL_CHECK_CUDA_DEVICE;
// "Parallel reduction" to see if we should calculate again BlockIterD
// "Parallel reduction" to see if we should calculate again calculateCudaBlocksAgain
calculateCudaBlocksAgain = blockCalculationIndicator.containsValue(1);
// When we change something then we should caclucate again in the next passage of MPI ( calculated = true )
......
......@@ -103,8 +103,30 @@ solve( const MeshPointer& mesh,
calculateMPIAgain = 0;
/** HERE IS FSM FOR OPENMP (NO MPI) - isnt worthy */
/*int numThreadsPerBlock = 64;
/*int numThreadsPerBlock = -1;
numThreadsPerBlock = ( mesh->getDimensions().x()/2 + (mesh->getDimensions().x() % 2 != 0 ? 1:0));
//printf("numThreadsPerBlock = %d\n", numThreadsPerBlock);
if( numThreadsPerBlock <= 16 )
numThreadsPerBlock = 16;
else if(numThreadsPerBlock <= 32 )
numThreadsPerBlock = 32;
else if(numThreadsPerBlock <= 64 )
numThreadsPerBlock = 64;
else if(numThreadsPerBlock <= 128 )
numThreadsPerBlock = 128;
else if(numThreadsPerBlock <= 256 )
numThreadsPerBlock = 256;
else if(numThreadsPerBlock <= 512 )
numThreadsPerBlock = 512;
else
numThreadsPerBlock = 1024;
//printf("numThreadsPerBlock = %d\n", numThreadsPerBlock);
if( numThreadsPerBlock == -1 ){
printf("Fail in setting numThreadsPerBlock.\n");
break;
}
int numBlocksX = mesh->getDimensions().x() / numThreadsPerBlock + (mesh->getDimensions().x() % numThreadsPerBlock != 0 ? 1:0);
int numBlocksY = mesh->getDimensions().y() / numThreadsPerBlock + (mesh->getDimensions().y() % numThreadsPerBlock != 0 ? 1:0);
......@@ -140,8 +162,22 @@ solve( const MeshPointer& mesh,
helpFunc1 = auxPtr;
auxPtr = helpFunc;
helpFunc = helpFunc1;
switch ( numThreadsPerBlock ){
case 16:
this->template updateBlocks< 18 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock );
case 32:
this->template updateBlocks< 34 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock );
case 64:
this->template updateBlocks< 66 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock );
case 128:
this->template updateBlocks< 130 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock );
case 256:
this->template updateBlocks< 258 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock );
case 512:
this->template updateBlocks< 514 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock );
default:
this->template updateBlocks< 1028 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock );
}
//Reduction
for( int i = 0; i < BlockIterHost.getSize(); i++ ){
if( IsCalculationDone == 0 ){
......@@ -176,43 +212,43 @@ solve( const MeshPointer& mesh,
// TOP, NORTH and WEST
boundsFrom[2] = vecLowerOverlaps[2]; boundsTo[2] = mesh->getDimensions().z() - vecUpperOverlaps[2];
boundsFrom[1] = vecLowerOverlaps[1]; boundsTo[1] = mesh->getDimensions().y() - vecUpperOverlaps[1];
boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = vecLowerOverlaps[0];
boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = - 1 + vecLowerOverlaps[0];
goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy );
// TOP, SOUTH and EAST
boundsFrom[2] = vecLowerOverlaps[2]; boundsTo[2] = mesh->getDimensions().z() - vecUpperOverlaps[2];
boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = vecLowerOverlaps[1];
boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = - 1 + vecLowerOverlaps[1];
boundsFrom[0] = vecLowerOverlaps[0]; boundsTo[0] = mesh->getDimensions().x() - vecUpperOverlaps[0];
goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy );
// TOP, SOUTH and WEST
boundsFrom[2] = vecLowerOverlaps[2]; boundsTo[2] = mesh->getDimensions().z() - vecUpperOverlaps[2];
boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = vecLowerOverlaps[1];
boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = vecLowerOverlaps[0];
boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = - 1 + vecLowerOverlaps[1];
boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = - 1 + vecLowerOverlaps[0];
goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy );
// BOTTOM, NOTH and EAST
boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = vecLowerOverlaps[2];
boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = - 1 + vecLowerOverlaps[2];
boundsFrom[1] = vecLowerOverlaps[1]; boundsTo[1] = mesh->getDimensions().y() - vecUpperOverlaps[1];
boundsFrom[0] = vecLowerOverlaps[0]; boundsTo[0] = mesh->getDimensions().x() - vecUpperOverlaps[0];
goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy );
// BOTTOM, NOTH and WEST
boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = vecLowerOverlaps[2];
boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = - 1 + vecLowerOverlaps[2];
boundsFrom[1] = vecLowerOverlaps[1]; boundsTo[1] = mesh->getDimensions().y() - vecUpperOverlaps[1];
boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = vecLowerOverlaps[0];
boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = - 1 + vecLowerOverlaps[0];
goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy );
// BOTTOM, SOUTH and EAST
boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = vecLowerOverlaps[2];
boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = vecLowerOverlaps[1];
boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = - 1 + vecLowerOverlaps[2];
boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = - 1 + vecLowerOverlaps[1];
boundsFrom[0] = vecLowerOverlaps[0]; boundsTo[0] = mesh->getDimensions().x() - vecUpperOverlaps[0];
goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy );
// BOTTOM, SOUTH and WEST
boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = vecLowerOverlaps[2];
boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = vecLowerOverlaps[1];
boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = vecLowerOverlaps[0];
boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = - 1 + vecLowerOverlaps[2];
boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = - 1 + vecLowerOverlaps[1];
boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = - 1 + vecLowerOverlaps[0];
goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment