From 337fb81371d599ccd8910923da59eb021226fa1f Mon Sep 17 00:00:00 2001 From: Tomas Sobotik <sobotto4@fjfi.cvut.cz> Date: Mon, 21 Mar 2016 15:34:03 +0100 Subject: [PATCH] 3D parallel iterative solver almost fully fixed --- examples/fast-sweeping/fastSweepingConfig.h | 2 +- examples/fast-sweeping/main.h | 6 +- examples/hamilton-jacobi-parallel/main.h | 125 ++++++++++++------ .../parallelEikonalConfig.h | 1 + .../tnlParallelEikonalSolver2D_impl.h | 18 +-- .../tnlParallelEikonalSolver3D_impl.h | 89 ++++++------- .../parallelGodunovEikonal3D_impl.h | 51 +++---- 7 files changed, 157 insertions(+), 135 deletions(-) diff --git a/examples/fast-sweeping/fastSweepingConfig.h b/examples/fast-sweeping/fastSweepingConfig.h index 02d639a14e..ac83f3ee1e 100644 --- a/examples/fast-sweeping/fastSweepingConfig.h +++ b/examples/fast-sweeping/fastSweepingConfig.h @@ -29,7 +29,7 @@ class fastSweepingConfig config.addDelimiter( "Parallel Eikonal solver settings:" ); config.addEntry < tnlString > ( "problem-name", "This defines particular problem.", "fast-sweeping" ); config.addRequiredEntry < tnlString > ( "initial-condition", "Initial condition for solver"); - config.addRequiredEntry < tnlString > ( "dim", "Dimension of problem."); + config.addRequiredEntry < int > ( "dim", "Dimension of problem."); config.addEntry < tnlString > ( "mesh", "Name of mesh.", "mesh.tnl" ); config.addEntry < tnlString > ( "exact-input", "Are the function values near the curve equal to the SDF? (yes/no)", "no" ); } diff --git a/examples/fast-sweeping/main.h b/examples/fast-sweeping/main.h index b233ca88b8..2732bf9c4e 100644 --- a/examples/fast-sweeping/main.h +++ b/examples/fast-sweeping/main.h @@ -43,9 +43,9 @@ int main( int argc, char* argv[] ) if( ! parseCommandLine( argc, argv, configDescription, parameters ) ) return false; - const tnlString& dim = parameters.getParameter< tnlString >( "dim" ); + const int& dim = parameters.getParameter< int >( "dim" ); - if(dim == "2") + if(dim == 2) { tnlFastSweeping<tnlGrid<2,double,tnlHost, int>, double, int> solver; if(!solver.init(parameters)) @@ -58,7 +58,7 @@ int main( int argc, char* argv[] ) cout << "Starting solver..." << endl; solver.run(); } - else if(dim == "3") + else if(dim == 3) { tnlFastSweeping<tnlGrid<3,double,tnlHost, int>, double, int> solver; if(!solver.init(parameters)) diff --git a/examples/hamilton-jacobi-parallel/main.h b/examples/hamilton-jacobi-parallel/main.h index a34ec1fafb..94c7675e9a 100644 --- a/examples/hamilton-jacobi-parallel/main.h +++ b/examples/hamilton-jacobi-parallel/main.h @@ -44,45 +44,94 @@ int main( int argc, char* argv[] ) tnlDeviceEnum device; device = tnlHostDevice; - typedef parallelGodunovEikonalScheme< tnlGrid<3,double,tnlHost, int>, double, int > SchemeTypeHost; -/*#ifdef HAVE_CUDA - typedef parallelGodunovEikonalScheme< tnlGrid<2,double,tnlCuda, int>, double, int > SchemeTypeDevice; -#endif -#ifndef HAVE_CUDA*/ - typedef parallelGodunovEikonalScheme< tnlGrid<3,double,tnlHost, int>, double, int > SchemeTypeDevice; -/*#endif*/ - - if(device==tnlHostDevice) - { - typedef tnlHost Device; - - - tnlParallelEikonalSolver<3,SchemeTypeHost,SchemeTypeDevice, Device> solver; - if(!solver.init(parameters)) - { - cerr << "Solver failed to initialize." << endl; - return EXIT_FAILURE; - } - cout << "-------------------------------------------------------------" << endl; - cout << "Starting solver loop..." << endl; - solver.run(); - } - else if(device==tnlCudaDevice ) - { - typedef tnlCuda Device; - //typedef parallelGodunovEikonalScheme< tnlGrid<2,double,Device, int>, double, int > SchemeType; - - tnlParallelEikonalSolver<3,SchemeTypeHost,SchemeTypeDevice, Device> solver; - if(!solver.init(parameters)) - { - cerr << "Solver failed to initialize." << endl; - return EXIT_FAILURE; - } - cout << "-------------------------------------------------------------" << endl; - cout << "Starting solver loop..." << endl; - solver.run(); - } + const int& dim = parameters.getParameter< int >( "dim" ); + + if(dim == 2) + { + + typedef parallelGodunovEikonalScheme< tnlGrid<2,double,tnlHost, int>, double, int > SchemeTypeHost; + /*#ifdef HAVE_CUDA + typedef parallelGodunovEikonalScheme< tnlGrid<2,double,tnlCuda, int>, double, int > SchemeTypeDevice; + #endif + #ifndef HAVE_CUDA*/ + typedef parallelGodunovEikonalScheme< tnlGrid<2,double,tnlHost, int>, double, int > SchemeTypeDevice; + /*#endif*/ + + if(device==tnlHostDevice) + { + typedef tnlHost Device; + + + tnlParallelEikonalSolver<2,SchemeTypeHost,SchemeTypeDevice, Device> solver; + if(!solver.init(parameters)) + { + cerr << "Solver failed to initialize." << endl; + return EXIT_FAILURE; + } + cout << "-------------------------------------------------------------" << endl; + cout << "Starting solver loop..." << endl; + solver.run(); + } + else if(device==tnlCudaDevice ) + { + typedef tnlCuda Device; + //typedef parallelGodunovEikonalScheme< tnlGrid<2,double,Device, int>, double, int > SchemeType; + + tnlParallelEikonalSolver<2,SchemeTypeHost,SchemeTypeDevice, Device> solver; + if(!solver.init(parameters)) + { + cerr << "Solver failed to initialize." << endl; + return EXIT_FAILURE; + } + cout << "-------------------------------------------------------------" << endl; + cout << "Starting solver loop..." << endl; + solver.run(); + } // } + } + else if(dim == 3) + { + + typedef parallelGodunovEikonalScheme< tnlGrid<3,double,tnlHost, int>, double, int > SchemeTypeHost; + /*#ifdef HAVE_CUDA + typedef parallelGodunovEikonalScheme< tnlGrid<2,double,tnlCuda, int>, double, int > SchemeTypeDevice; + #endif + #ifndef HAVE_CUDA*/ + typedef parallelGodunovEikonalScheme< tnlGrid<3,double,tnlHost, int>, double, int > SchemeTypeDevice; + /*#endif*/ + + if(device==tnlHostDevice) + { + typedef tnlHost Device; + + + tnlParallelEikonalSolver<3,SchemeTypeHost,SchemeTypeDevice, Device> solver; + if(!solver.init(parameters)) + { + cerr << "Solver failed to initialize." << endl; + return EXIT_FAILURE; + } + cout << "-------------------------------------------------------------" << endl; + cout << "Starting solver loop..." << endl; + solver.run(); + } + else if(device==tnlCudaDevice ) + { + typedef tnlCuda Device; + //typedef parallelGodunovEikonalScheme< tnlGrid<2,double,Device, int>, double, int > SchemeType; + + tnlParallelEikonalSolver<3,SchemeTypeHost,SchemeTypeDevice, Device> solver; + if(!solver.init(parameters)) + { + cerr << "Solver failed to initialize." << endl; + return EXIT_FAILURE; + } + cout << "-------------------------------------------------------------" << endl; + cout << "Starting solver loop..." << endl; + solver.run(); + } + // } + } time(&stop); cout << endl; diff --git a/examples/hamilton-jacobi-parallel/parallelEikonalConfig.h b/examples/hamilton-jacobi-parallel/parallelEikonalConfig.h index 76784f7a38..b79341e909 100644 --- a/examples/hamilton-jacobi-parallel/parallelEikonalConfig.h +++ b/examples/hamilton-jacobi-parallel/parallelEikonalConfig.h @@ -39,6 +39,7 @@ class parallelEikonalConfig config.addRequiredEntry < double > ( "initial-tau", " initial tau for solver" ); config.addEntry < double > ( "cfl-condition", " CFL condition", 0.0 ); config.addEntry < int > ( "subgrid-size", "Subgrid size.", 16 ); + config.addRequiredEntry < int > ( "dim", "Dimension of problem."); } }; diff --git a/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h b/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h index aaa73cfb33..9912bed9a8 100644 --- a/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h +++ b/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h @@ -21,7 +21,6 @@ #include "tnlParallelEikonalSolver.h" #include <core/mfilename.h> - template< typename SchemeHost, typename SchemeDevice, typename Device> tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::tnlParallelEikonalSolver() { @@ -1044,7 +1043,7 @@ template< typename SchemeHost, typename SchemeDevice, typename Device> __device__ void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::updateSubgridCUDA2D( const int i ,tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) { - int j = threadIdx.x + threadIdx.y * blockDim.x; +// int j = threadIdx.x + threadIdx.y * blockDim.x; int index = (blockIdx.y) * caller->n*caller->n*caller->gridCols + (blockIdx.x) * caller->n + threadIdx.y * caller->n*caller->gridCols @@ -1166,7 +1165,6 @@ void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>:: __syncthreads(); // if( time + currentTau > finalTime ) currentTau = finalTime - time; - tnlGridEntity<MeshType, 2, tnlGridEntityNoStencilStorage > Entity(subMesh); tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity); Entity.setCoordinates(tnlStaticVector<2,int>(i,j)); @@ -1312,20 +1310,8 @@ void /*tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int> subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x + 1); boundary_index = 1; } -// if(threadIdx.y == 0 && (blockIdx.y != 0) && (cudaSolver->currentStep & 1)) -// { -// u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x]; -// subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D((blockIdx.y - 1)*gridDim.x + blockIdx.x); -// boundary_index = 3; -// } -// if(threadIdx.y == blockDim.y - 1 && (blockIdx.y != gridDim.y - 1) && (cudaSolver->currentStep & 1)) -// { -// u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x]; -// subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D((blockIdx.y + 1)*gridDim.x + blockIdx.x); -// boundary_index = 0; -// } -// __threadfence(); + __threadfence(); if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) { cudaSolver->unusedCell_cuda[gid] = 0; diff --git a/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h b/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h index 5743dffd1e..70bef8e4e8 100644 --- a/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h +++ b/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h @@ -151,9 +151,9 @@ bool tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: for(int i = 0; i < this->subgridValues.getSize(); i++) { - if(! tmp[i].setSize(this->n * this->n)) + if(! tmp[i].setSize(this->n * this->n*this->n)) cout << "Could not allocate tmp["<< i <<"] array." << endl; - tmp[i] = getSubgrid(i); + tmp[i] = getSubgrid(i); containsCurve = false; for(int j = 0; j < tmp[i].getSize(); j++) @@ -825,13 +825,13 @@ __device__ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getSubgridCUDA3D( const int i ,tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) { //int j = threadIdx.x + threadIdx.y * blockDim.x; - int th = (blockIdx.z*caller->n + threadIdx.z) * caller->n*caller->n*caller->gridCols*caller->gridRows - + (blockIdx.y) * caller->n*caller->n*caller->gridCols - + (blockIdx.x) * caller->n - + threadIdx.y * caller->n*caller->gridCols + int index = (blockIdx.z*this->n + threadIdx.z) * this->n*this->n*this->gridCols*this->gridRows + + (blockIdx.y) * this->n*this->n*this->gridCols + + (blockIdx.x) * this->n + + threadIdx.y * this->n*this->gridCols + threadIdx.x; //printf("i= %d,j= %d,th= %d\n",i,j,th); - *a = caller->work_u_cuda[th]; + *a = caller->work_u_cuda[index]; //printf("Hi %f \n", *a); //return ret; } @@ -842,11 +842,11 @@ __device__ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::updateSubgridCUDA3D( const int i ,tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) { // int j = threadIdx.x + threadIdx.y * blockDim.x; - int index = (blockIdx.z*caller->n + threadIdx.z) * caller->n*caller->n*caller->gridCols*caller->gridRows - + (blockIdx.y) * caller->n*caller->n*caller->gridCols - + (blockIdx.x) * caller->n - + threadIdx.y * caller->n*caller->gridCols - + threadIdx.x; + int index = (blockIdx.z*this->n + threadIdx.z) * this->n*this->n*this->gridCols*this->gridRows + + (blockIdx.y) * this->n*this->n*this->gridCols + + (blockIdx.x) * this->n + + threadIdx.y * this->n*this->gridCols + + threadIdx.x; if( (fabs(caller->work_u_cuda[index]) > fabs(*a)) || (caller->unusedCell_cuda[index] == 1) ) { @@ -899,7 +899,7 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: int i = threadIdx.x; int j = threadIdx.y; int k = threadIdx.z; - int l = threadIdx.y * blockDim.x + threadIdx.x + blockDim.x*blockDim.y*threadIdx.z; + int l = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z*blockDim.x*blockDim.y; bool computeFU = !((i == 0 && (boundaryCondition & 4)) or (i == blockDim.x - 1 && (boundaryCondition & 2)) or (j == 0 && (boundaryCondition & 8)) or @@ -994,7 +994,7 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: // } double finalTime = this->stopTime; __syncthreads(); - if( time + currentTau > finalTime ) currentTau = finalTime - time; + //if( time + currentTau > finalTime ) currentTau = finalTime - time; tnlGridEntity<MeshType, 3, tnlGridEntityNoStencilStorage > Entity(subMesh); tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighbourEntities(Entity); @@ -1013,6 +1013,8 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: if(abs(fu) > 0.0) sharedTau[l]=abs(cfl/fu); + if(u[l]*fu < 0.0 && abs(fu*sharedTau[l]) >abs(u[l])) sharedTau[l] = 0.5*abs(u[l]/fu) + this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >(); + if(l == 0) { if(sharedTau[0] > 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()) sharedTau[0] = 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >(); @@ -1021,9 +1023,9 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: if( time + sharedTau[l] > finalTime ) sharedTau[l] = finalTime - time; - if((l < 256)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+256]); + if(l < 256) sharedTau[l] = Min(sharedTau[l],sharedTau[l+256]); __syncthreads(); - if((l < 128)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+128]); + if(l < 128) sharedTau[l] = Min(sharedTau[l],sharedTau[l+128]); __syncthreads(); if(l < 64) sharedTau[l] = Min(sharedTau[l],sharedTau[l+64]); __syncthreads(); @@ -1051,9 +1053,9 @@ int tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::g { int j = i % (this->gridCols*this->gridRows*this->n*this->n); - return ( (i / (this->gridCols*this->gridRows*this->n*this->n))/this->n + return ( (i / (this->gridCols*this->gridRows*this->n*this->n))*this->gridCols*this->gridRows + (j / (this->gridCols*this->n*this->n))*this->gridCols - + (i % (this->gridCols*this->n))/this->n); + + (j % (this->gridCols*this->n))/this->n); } @@ -1113,7 +1115,6 @@ void /*tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int> if(threadIdx.x+threadIdx.y+threadIdx.z == 0) { -// printf("z = %d, y = %d, x = %d\n",blockIdx.z,blockIdx.y,blockIdx.x); subgridValue = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y); boundary[0] = 0; boundary[1] = 0; @@ -1162,7 +1163,7 @@ void /*tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int> if(threadIdx.y == 0 && (blockIdx.y != 0)/* && (cudaSolver->currentStep & 1)*/) { u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D((blockIdx.y-1)*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y); + subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D((blockIdx.y - 1)*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y); boundary_index = 3; } if(threadIdx.y == blockDim.y - 1 && (blockIdx.y != gridDim.y - 1)/* && (cudaSolver->currentStep & 1)*/) @@ -1186,13 +1187,13 @@ void /*tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int> if(threadIdx.z == 0 && (blockIdx.z != 0)/* && (cudaSolver->currentStep & 1)*/) { u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x*blockDim.y*gridDim.y]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + (blockIdx.z-1)*gridDim.x*gridDim.y); + subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + (blockIdx.z - 1)*gridDim.x*gridDim.y); boundary_index = 5; } if(threadIdx.z == blockDim.z - 1 && (blockIdx.z != gridDim.z - 1)/* && (cudaSolver->currentStep & 1)*/) { u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x*blockDim.y*gridDim.y]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + (blockIdx.z+1)*gridDim.x*gridDim.y); + subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + (blockIdx.z + 1)*gridDim.x*gridDim.y); boundary_index = 4; } if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) @@ -1218,7 +1219,12 @@ void /*tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int> 4 * boundary[2] + 8 * boundary[3] + 16 * boundary[4] + - 32 * boundary[5]); + 32 * boundary[5] ); + if(blockIdx.x+blockIdx.y+blockIdx.z == 0) + { + cudaSolver->currentStep = cudaSolver->currentStep + 1; + *(cudaSolver->runcuda) = 0; + } } } @@ -1228,20 +1234,10 @@ template <typename SchemeHost, typename SchemeDevice, typename Device> __global__ void synchronize2CUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver) { - if(blockIdx.x+blockIdx.y+blockIdx.z == 0) - { - cudaSolver->currentStep = cudaSolver->currentStep + 1; - *(cudaSolver->runcuda) = 0; - } - int stepValue = cudaSolver->currentStep + 4; if( cudaSolver->getSubgridValueCUDA3D(blockIdx.z*gridDim.x*gridDim.y + blockIdx.y*gridDim.x + blockIdx.x) == -INT_MAX ) - { - cudaSolver->setSubgridValueCUDA3D(blockIdx.z*gridDim.x*gridDim.y + blockIdx.y*gridDim.x + blockIdx.x, stepValue); - } -// printf("%d\n",cudaSolver->getBoundaryConditionCUDA3D(blockIdx.z*gridDim.x*gridDim.y + blockIdx.y*gridDim.x + blockIdx.x)): atomicMax((cudaSolver->runcuda),cudaSolver->getBoundaryConditionCUDA3D(blockIdx.z*gridDim.x*gridDim.y + blockIdx.y*gridDim.x + blockIdx.x)); } @@ -1296,8 +1292,8 @@ void initRunCUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, extern __shared__ double u[]; - int i = blockIdx.z*gridDim.y*gridDim.y + blockIdx.y * gridDim.x + blockIdx.x; - int l = threadIdx.z*blockDim.x*blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; + int i = blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x; + int l = threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; __shared__ int containsCurve; if(l == 0) @@ -1336,11 +1332,11 @@ __global__ void runCUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller) { extern __shared__ double u[]; - int i = blockIdx.z*gridDim.y*gridDim.y + blockIdx.y * gridDim.x + blockIdx.x; - int l = threadIdx.z*blockDim.x*blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; + int i = blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x; + int l = threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; int bound = caller->getBoundaryConditionCUDA3D(i); - if(caller->getSubgridValueCUDA3D(i) != INT_MAX && bound != 0 && caller->getSubgridValueCUDA3D(i) > 0) + if(caller->getSubgridValueCUDA3D(i) != INT_MAX && bound != 0 && caller->getSubgridValueCUDA3D(i) > -10) { caller->getSubgridCUDA3D(i,caller, &u[l]); @@ -1425,26 +1421,27 @@ void runCUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, doub __syncthreads(); } } + /* 1 2 4 8 16 32 */ - if( ((bound & 19 ))) + if( ((bound & 19 ))) /* 1 1 0 0 1 0 */ { caller->runSubgridCUDA3D(19,u,i); caller->updateSubgridCUDA3D(i,caller, &u[l]); __syncthreads(); } - if( ((bound & 21 ))) + if( ((bound & 21 ))) /* 1 0 1 0 1 0 */ { caller->runSubgridCUDA3D(21,u,i); caller->updateSubgridCUDA3D(i,caller, &u[l]); __syncthreads(); } - if( ((bound & 26 ))) + if( ((bound & 26 ))) /* 0 1 0 1 1 0 */ { caller->runSubgridCUDA3D(26,u,i); caller->updateSubgridCUDA3D(i,caller, &u[l]); __syncthreads(); } - if( (bound & 28 )) + if( (bound & 28 )) /* 0 0 1 1 1 0 */ { caller->runSubgridCUDA3D(28,u,i); caller->updateSubgridCUDA3D(i,caller, &u[l]); @@ -1453,25 +1450,25 @@ void runCUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, doub - if( ((bound & 35 ))) + if( ((bound & 35 ))) /* 1 0 1 0 0 1 */ { caller->runSubgridCUDA3D(35,u,i); caller->updateSubgridCUDA3D(i,caller, &u[l]); __syncthreads(); } - if( ((bound & 37 ))) + if( ((bound & 37 ))) /* 1 0 1 0 0 1 */ { caller->runSubgridCUDA3D(37,u,i); caller->updateSubgridCUDA3D(i,caller, &u[l]); __syncthreads(); } - if( ((bound & 42 ))) + if( ((bound & 42 ))) /* 0 1 0 1 0 1 */ { caller->runSubgridCUDA3D(42,u,i); caller->updateSubgridCUDA3D(i,caller, &u[l]); __syncthreads(); } - if( (bound & 44 )) + if( (bound & 44 )) /* 0 0 1 1 0 1 */ { caller->runSubgridCUDA3D(44,u,i); caller->updateSubgridCUDA3D(i,caller, &u[l]); diff --git a/src/operators/godunov-eikonal/parallelGodunovEikonal3D_impl.h b/src/operators/godunov-eikonal/parallelGodunovEikonal3D_impl.h index ca818114ec..20063523af 100644 --- a/src/operators/godunov-eikonal/parallelGodunovEikonal3D_impl.h +++ b/src/operators/godunov-eikonal/parallelGodunovEikonal3D_impl.h @@ -53,13 +53,13 @@ Real parallelGodunovEikonalScheme< tnlGrid< 3,MeshReal, Device, MeshIndex >, Rea { if(x > eps) return 1.0; - else if (x < -eps) + if (x < -eps) return (-1.0); - if ( eps == 0.0) + if ( x == 0.0) return 0.0; - return sin((M_PI*x)/(2.0*eps)); + return sin((M_PI/2.0)*(x/eps)); } @@ -90,7 +90,7 @@ bool parallelGodunovEikonalScheme< tnlGrid< 3,MeshReal, Device, MeshIndex >, Rea epsilon = parameters. getParameter< double >( "epsilon" ); if(epsilon != 0.0) - epsilon *=sqrt( hx*hx + hy*hy +hz*hz ); + epsilon *=sqrt( hx*hx + hy*hy + hz*hz ); // dofVector. setSize( this->mesh.getDofs() ); @@ -220,9 +220,9 @@ Real parallelGodunovEikonalScheme< tnlGrid< 3, MeshReal, Device, MeshIndex >, Re yb = negativePart(yb); - zf = positivePart(yf); + yf = positivePart(yf); - yb = negativePart(zb); + zb = negativePart(zb); zf = positivePart(zf); } @@ -271,28 +271,13 @@ Real parallelGodunovEikonalScheme< tnlGrid< 3, MeshReal, Device, MeshIndex >, Re const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighbourEntities ) const { - -/* - if ( ((coordinates.x() == 0 && (boundaryCondition & 4)) or - (coordinates.x() == mesh.getDimensions().x() - 1 && (boundaryCondition & 2)) or - (coordinates.y() == 0 && (boundaryCondition & 8)) or - (coordinates.y() == mesh.getDimensions().y() - 1 && (boundaryCondition & 1))) - ) - { - return 0.0; - } - -*/ - //RealType acc = hx*hy*hx*hy; - RealType signui; - signui = sign(u[cellIndex],this->epsilon); + if(boundaryCondition == 0) + signui = sign(u[cellIndex], this->epsilon); + else + signui = Sign(u[cellIndex]); -#ifdef HAVE_CUDA - //printf("%d : %d ;;;; %d : %d , %f \n",threadIdx.x, mesh.getDimensions().x() , threadIdx.y,mesh.getDimensions().y(), epsilon ); -#endif - RealType xb = u[cellIndex]; RealType xf = -u[cellIndex]; RealType yb = u[cellIndex]; @@ -300,6 +285,8 @@ Real parallelGodunovEikonalScheme< tnlGrid< 3, MeshReal, Device, MeshIndex >, Re RealType zb = u[cellIndex]; RealType zf = -u[cellIndex]; RealType a,b,c,d; +// if(threadIdx.x+threadIdx.y+threadIdx.z == 0) +// printf("x = %d, y = %d, z = %d\n",mesh.getDimensions().x() - 1,mesh.getDimensions().y() - 1,mesh.getDimensions().z() - 1); if(coordinates.x() == mesh.getDimensions().x() - 1) @@ -363,9 +350,9 @@ Real parallelGodunovEikonalScheme< tnlGrid< 3, MeshReal, Device, MeshIndex >, Re yb = negativePart(yb); - zf = positivePart(yf); + yf = positivePart(yf); - yb = negativePart(zb); + zb = negativePart(zb); zf = positivePart(zf); } @@ -386,11 +373,13 @@ Real parallelGodunovEikonalScheme< tnlGrid< 3, MeshReal, Device, MeshIndex >, Re else c = zf; - d =(1.0 - sqrt(a*a+b*b+c*c)*ihx ); +// d =(1.0 - sqrt(a*a+b*b+c*c)*ihx ); - if(Sign(d) > 0.0 ) - return Sign(u[cellIndex])*d; - else + d = 1.0 - sqrt(xf*xf + xb*xb + yf*yf + yb*yb + zf*zf + zb*zb)*ihx; /*upwind*/ + +// if(Sign(d) > 0.0 ) +// return Sign(u[cellIndex])*d; +// else return signui*d; } -- GitLab