diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h index 583e22478e71a66f9909f589f3ee049b2399ff79..50ea7bde8ee87b24317363feb78e30d45128fa3b 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h @@ -365,13 +365,13 @@ __global__ void GetNeighbours( const TNL::Containers::Array< int, Devices::Cuda, m = i%numBlockX; k = i/numBlockX; if( m > 0 && blockCalculationIndicator[ i - 1 ] ){ - pom = 1;//BlockIterPom[ i ] = 1; + pom = 1;//blockCalculationIndicatorHelp[ i ] = 1; }else if( m < numBlockX -1 && blockCalculationIndicator[ i + 1 ] ){ - pom = 1;//BlockIterPom[ i ] = 1; - }else if( k > 0 && blockCalculationIndicatorHelp[ i - numBlockX ] ){ - pom = 1;// BlockIterPom[ i ] = 1; + pom = 1;//blockCalculationIndicatorHelp[ i ] = 1; + }else if( k > 0 && blockCalculationIndicator[ i - numBlockX ] ){ + pom = 1;// blockCalculationIndicatorHelp[ i ] = 1; }else if( k < numBlockY -1 && blockCalculationIndicator[ i + numBlockX ] ){ - pom = 1;//BlockIterPom[ i ] = 1; + pom = 1;//blockCalculationIndicatorHelp[ i ] = 1; } if( blockCalculationIndicator[ i ] != 1 ) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h index 91f9a0efef1fd3fedb105db239fb48eaf01e95ff..5b2a4b685b75a4bb616631b139bca7f1215b4c0b 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h @@ -109,7 +109,7 @@ initInterface( const MeshFunctionPointer& _input, output[ cell.getIndex() ] = pom; pom = pom - TNL::sign( c )*hx; if( TNL::abs( output[ e ] ) > TNL::abs( pom ) ) - output[ e ] = pom; //( hy * c )/( c - input[ n ]) - hy; + output[ e ] = pom; interfaceMap[ cell.getIndex() ] = true; interfaceMap[ e ] = true; @@ -122,7 +122,7 @@ initInterface( const MeshFunctionPointer& _input, output[ cell.getIndex() ] = pom; pom = pom - TNL::sign( c )*hz; if( TNL::abs( output[ t ] ) > TNL::abs( pom ) ) - output[ t ] = pom; //( hy * c )/( c - input[ n ]) - hy; + output[ t ] = pom; interfaceMap[ cell.getIndex() ] = true; interfaceMap[ t ] = true; @@ -736,7 +736,7 @@ updateBlocks( const InterfaceMapType interfaceMap, MeshFunctionType& helpFunc, ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ) { - //#pragma omp parallel for schedule( dynamic ) + #pragma omp parallel for schedule( dynamic ) for( IndexType i = 0; i < BlockIterHost.getSize(); i++ ) { if( BlockIterHost[ i ] ) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h index 66f9e6cdf2efcefb1b5e30b559ae301aa2af54ac..31d3f8b324a7fc2a02d94e54b5909190cacd965d 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h @@ -363,7 +363,7 @@ solve( const MeshPointer& mesh, interfaceMapPtr.template getData< Device >(), auxPtr.template getData< Device>(), helpFunc.template modifyData< Device>(), - blockCalculationIndicator, + blockCalculationIndicator, vecLowerOverlaps, vecUpperOverlaps, oddEvenBlock ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; @@ -381,15 +381,8 @@ solve( const MeshPointer& mesh, oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; - CudaParallelReduc<<< nBlocks , 1024 >>>( blockCalculationIndicator, dBlock, ( numBlocksX * numBlocksY ) ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - BlockIterD = dBlock.getElement( 0 );*/ - + calculateCudaBlocksAgain = blockCalculationIndicator.containsValue(1); + */ /**------------------------------------------------------------------------------------------------*/ @@ -441,7 +434,7 @@ solve( const MeshPointer& mesh, cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; - // "Parallel reduction" to see if we should calculate again BlockIterD + // "Parallel reduction" to see if we should calculate again calculateCudaBlocksAgain calculateCudaBlocksAgain = blockCalculationIndicator.containsValue(1); // When we change something then we should caclucate again in the next passage of MPI ( calculated = true ) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h index 2d73b174edd2ea69e1253f9eff67b9be18bd9b67..4895c7693772a9f33458a119506668a493996ec0 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h @@ -103,8 +103,30 @@ solve( const MeshPointer& mesh, calculateMPIAgain = 0; /** HERE IS FSM FOR OPENMP (NO MPI) - isnt worthy */ - /*int numThreadsPerBlock = 64; + /*int numThreadsPerBlock = -1; + numThreadsPerBlock = ( mesh->getDimensions().x()/2 + (mesh->getDimensions().x() % 2 != 0 ? 1:0)); + //printf("numThreadsPerBlock = %d\n", numThreadsPerBlock); + if( numThreadsPerBlock <= 16 ) + numThreadsPerBlock = 16; + else if(numThreadsPerBlock <= 32 ) + numThreadsPerBlock = 32; + else if(numThreadsPerBlock <= 64 ) + numThreadsPerBlock = 64; + else if(numThreadsPerBlock <= 128 ) + numThreadsPerBlock = 128; + else if(numThreadsPerBlock <= 256 ) + numThreadsPerBlock = 256; + else if(numThreadsPerBlock <= 512 ) + numThreadsPerBlock = 512; + else + numThreadsPerBlock = 1024; + //printf("numThreadsPerBlock = %d\n", numThreadsPerBlock); + + if( numThreadsPerBlock == -1 ){ + printf("Fail in setting numThreadsPerBlock.\n"); + break; + } int numBlocksX = mesh->getDimensions().x() / numThreadsPerBlock + (mesh->getDimensions().x() % numThreadsPerBlock != 0 ? 1:0); int numBlocksY = mesh->getDimensions().y() / numThreadsPerBlock + (mesh->getDimensions().y() % numThreadsPerBlock != 0 ? 1:0); @@ -140,8 +162,22 @@ solve( const MeshPointer& mesh, helpFunc1 = auxPtr; auxPtr = helpFunc; helpFunc = helpFunc1; + switch ( numThreadsPerBlock ){ + case 16: + this->template updateBlocks< 18 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock ); + case 32: + this->template updateBlocks< 34 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock ); + case 64: this->template updateBlocks< 66 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock ); - + case 128: + this->template updateBlocks< 130 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock ); + case 256: + this->template updateBlocks< 258 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock ); + case 512: + this->template updateBlocks< 514 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock ); + default: + this->template updateBlocks< 1028 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock ); + } //Reduction for( int i = 0; i < BlockIterHost.getSize(); i++ ){ if( IsCalculationDone == 0 ){ @@ -176,43 +212,43 @@ solve( const MeshPointer& mesh, // TOP, NORTH and WEST boundsFrom[2] = vecLowerOverlaps[2]; boundsTo[2] = mesh->getDimensions().z() - vecUpperOverlaps[2]; boundsFrom[1] = vecLowerOverlaps[1]; boundsTo[1] = mesh->getDimensions().y() - vecUpperOverlaps[1]; - boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = vecLowerOverlaps[0]; + boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = - 1 + vecLowerOverlaps[0]; goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy ); // TOP, SOUTH and EAST boundsFrom[2] = vecLowerOverlaps[2]; boundsTo[2] = mesh->getDimensions().z() - vecUpperOverlaps[2]; - boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = vecLowerOverlaps[1]; + boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = - 1 + vecLowerOverlaps[1]; boundsFrom[0] = vecLowerOverlaps[0]; boundsTo[0] = mesh->getDimensions().x() - vecUpperOverlaps[0]; goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy ); // TOP, SOUTH and WEST boundsFrom[2] = vecLowerOverlaps[2]; boundsTo[2] = mesh->getDimensions().z() - vecUpperOverlaps[2]; - boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = vecLowerOverlaps[1]; - boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = vecLowerOverlaps[0]; + boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = - 1 + vecLowerOverlaps[1]; + boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = - 1 + vecLowerOverlaps[0]; goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy ); // BOTTOM, NOTH and EAST - boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = vecLowerOverlaps[2]; + boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = - 1 + vecLowerOverlaps[2]; boundsFrom[1] = vecLowerOverlaps[1]; boundsTo[1] = mesh->getDimensions().y() - vecUpperOverlaps[1]; boundsFrom[0] = vecLowerOverlaps[0]; boundsTo[0] = mesh->getDimensions().x() - vecUpperOverlaps[0]; goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy ); // BOTTOM, NOTH and WEST - boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = vecLowerOverlaps[2]; + boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = - 1 + vecLowerOverlaps[2]; boundsFrom[1] = vecLowerOverlaps[1]; boundsTo[1] = mesh->getDimensions().y() - vecUpperOverlaps[1]; - boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = vecLowerOverlaps[0]; + boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = - 1 + vecLowerOverlaps[0]; goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy ); // BOTTOM, SOUTH and EAST - boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = vecLowerOverlaps[2]; - boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = vecLowerOverlaps[1]; + boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = - 1 + vecLowerOverlaps[2]; + boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = - 1 + vecLowerOverlaps[1]; boundsFrom[0] = vecLowerOverlaps[0]; boundsTo[0] = mesh->getDimensions().x() - vecUpperOverlaps[0]; goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy ); // BOTTOM, SOUTH and WEST - boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = vecLowerOverlaps[2]; - boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = vecLowerOverlaps[1]; - boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = vecLowerOverlaps[0]; + boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = - 1 + vecLowerOverlaps[2]; + boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = - 1 + vecLowerOverlaps[1]; + boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = - 1 + vecLowerOverlaps[0]; goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy );