From 67e1ee695ea885d387c8e50d2a2ac4e3516f9959 Mon Sep 17 00:00:00 2001 From: Tomas Sobotik <sobotto4@fjfi.cvut.cz> Date: Sun, 20 Mar 2016 21:39:14 +0100 Subject: [PATCH] Merging parallel solver 5 --- .../tnlParallelEikonalSolver3D_impl.h | 258 ++---------------- 1 file changed, 24 insertions(+), 234 deletions(-) diff --git a/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h b/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h index e2171e72dd..5743dffd1e 100644 --- a/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h +++ b/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h @@ -210,7 +210,7 @@ bool tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: checkCudaDevice; synchronizeCUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver); - cudaDeviceSynchronize(); + cout << cudaGetErrorString(cudaDeviceSynchronize()) << endl; checkCudaDevice; synchronize2CUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver); cudaDeviceSynchronize(); @@ -579,7 +579,6 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: for(int i = 0; i < levelSize; i++) { - this->unusedCell[i] = 1; int diff =(this->n*this->gridCols) - idealStretch ; int k = i/this->n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff; @@ -596,7 +595,7 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: for( int j = 0; j<this->n*this->gridLevels; j++) { - + this->unusedCell[i+j*levelSize] = 1; int l = j/this->n; if(j - idealStretch >= 0) @@ -604,7 +603,7 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: l+= j - idealStretch + 1; } - this->work_u[i+(j-l)*levelSize] = this->u0[i+(j-l)*mesh.getDimensions().x()*mesh.getDimensions().y()-k]; + this->work_u[i+j*levelSize] = this->u0[i+(j-l)*mesh.getDimensions().x()*mesh.getDimensions().y()-k]; } } @@ -636,7 +635,7 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: { int l = j/this->n; if(j - idealStretch < 0) - this->u0[i+(j-l)*mesh.getDimensions().x()*mesh.getDimensions().y()-k] = this->work_u[i+(j-l)*levelSize]; + this->u0[i+(j-l)*mesh.getDimensions().x()*mesh.getDimensions().y()-k] = this->work_u[i+j*levelSize]; } } @@ -689,182 +688,6 @@ tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::runSu fu.setLike(u); fu.setValue( 0.0 ); -/* - * Insert Euler-Solver Here - */ - - /**/ - - /*for(int i = 0; i < u.getSize(); i++) - { - int x = this->subMesh.getCellCoordinates(i).x(); - int y = this->subMesh.getCellCoordinates(i).y(); - - if(x == 0 && (boundaryCondition & 4) && y ==0) - { - if((u[subMesh.getCellYSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1, 0 >() > 1.0) - { - //cout << "x = 0; y = 0" << endl; - u[i] = u[subMesh.getCellYSuccessor( i )] - subMesh.template getSpaceStepsProducts< 0, 1, 0 >(); - } - } - else if(x == 0 && (boundaryCondition & 4) && y == subMesh.getDimensions().y() - 1) - { - if((u[subMesh.getCellYPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1, 0 >() > 1.0) - { - //cout << "x = 0; y = n" << endl; - u[i] = u[subMesh.getCellYPredecessor( i )] - subMesh.template getSpaceStepsProducts< 0, 1, 0 >(); - } - } - - - else if(x == subMesh.getDimensions().x() - 1 && (boundaryCondition & 2) && y ==0) - { - if((u[subMesh.getCellYSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1, 0 >() > 1.0) - { - //cout << "x = n; y = 0" << endl; - u[i] = u[subMesh.getCellYSuccessor( i )] - subMesh.template getSpaceStepsProducts< 0, 1, 0 >(); - } - } - else if(x == subMesh.getDimensions().x() - 1 && (boundaryCondition & 2) && y == subMesh.getDimensions().y() - 1) - { - if((u[subMesh.getCellYPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1, 0 >() > 1.0) - { - //cout << "x = n; y = n" << endl; - u[i] = u[subMesh.getCellYPredecessor( i )] - subMesh.template getSpaceStepsProducts< 0, 1, 0 >(); - } - } - - - else if(y == 0 && (boundaryCondition & 8) && x ==0) - { - if((u[subMesh.getCellXSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0, 0 >() > 1.0) - { - //cout << "y = 0; x = 0" << endl; - u[i] = u[subMesh.getCellXSuccessor( i )] - subMesh.template getSpaceStepsProducts< 1, 0, 0 >(); - } - } - else if(y == 0 && (boundaryCondition & 8) && x == subMesh.getDimensions().x() - 1) - { - if((u[subMesh.getCellXPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0, 0 >() > 1.0) - { - //cout << "y = 0; x = n" << endl; - u[i] = u[subMesh.getCellXPredecessor( i )] - subMesh.template getSpaceStepsProducts< 1, 0, 0 >(); - } - } - - - else if(y == subMesh.getDimensions().y() - 1 && (boundaryCondition & 1) && x ==0) - { - if((u[subMesh.getCellXSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0, 0 >() > 1.0) { - //cout << "y = n; x = 0" << endl; - u[i] = u[subMesh.getCellXSuccessor( i )] - subMesh.template getSpaceStepsProducts< 1, 0, 0 >(); - } - } - else if(y == subMesh.getDimensions().y() - 1 && (boundaryCondition & 1) && x == subMesh.getDimensions().x() - 1) - { - if((u[subMesh.getCellXPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0, 0 >() > 1.0) - { - //cout << "y = n; x = n" << endl; - u[i] = u[subMesh.getCellXPredecessor( i )] - subMesh.template getSpaceStepsProducts< 1, 0, 0 >(); - } - } - }*/ - - /**/ - - -/* bool tmp = false; - for(int i = 0; i < u.getSize(); i++) - { - if(u[0]*u[i] <= 0.0) - tmp=true; - } - - - if(tmp) - {} - else if(boundaryCondition == 4) - { - int i; - for(i = 0; i < u.getSize() - subMesh.getDimensions().x() ; i=subMesh.getCellYSuccessor(i)) - { - int j; - for(j = i; j < subMesh.getDimensions().x() - 1; j=subMesh.getCellXSuccessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - } - int j; - for(j = i; j < subMesh.getDimensions().x() - 1; j=subMesh.getCellXSuccessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - } - else if(boundaryCondition == 8) - { - int i; - for(i = 0; i < subMesh.getDimensions().x() - 1; i=subMesh.getCellXSuccessor(i)) - { - int j; - for(j = i; j < u.getSize() - subMesh.getDimensions().x(); j=subMesh.getCellYSuccessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - } - int j; - for(j = i; j < u.getSize() - subMesh.getDimensions().x(); j=subMesh.getCellYSuccessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - - } - else if(boundaryCondition == 2) - { - int i; - for(i = subMesh.getDimensions().x() - 1; i < u.getSize() - subMesh.getDimensions().x() ; i=subMesh.getCellYSuccessor(i)) - { - int j; - for(j = i; j > (i-1)*subMesh.getDimensions().x(); j=subMesh.getCellXPredecessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - } - int j; - for(j = i; j > (i-1)*subMesh.getDimensions().x(); j=subMesh.getCellXPredecessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - } - else if(boundaryCondition == 1) - { - int i; - for(i = (subMesh.getDimensions().y() - 1)*subMesh.getDimensions().x(); i < u.getSize() - 1; i=subMesh.getCellXSuccessor(i)) - { - int j; - for(j = i; j >=subMesh.getDimensions().x(); j=subMesh.getCellYPredecessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - } - int j; - for(j = i; j >=subMesh.getDimensions().x(); j=subMesh.getCellYPredecessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - } -*/ - /**/ - - bool tmp = false; for(int i = 0; i < u.getSize(); i++) @@ -915,44 +738,6 @@ tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::runSu u[i*this->n + j] = value;// u[j]; } -/* - - else if(boundaryCondition == 5) - { - for(int i = 0; i < this->n - 1; i++) - for(int j = 1;j < this->n; j++) - //if(fabs(u[i*this->n + j]) < fabs(u[i*this->n])) - u[i*this->n + j] = value;// u[i*this->n]; - } - else if(boundaryCondition == 10) - { - for(int i = 1; i < this->n; i++) - for(int j =0 ;j < this->n -1; j++) - //if(fabs(u[i*this->n + j]) < fabs(u[(i+1)*this->n - 1])) - u[i*this->n + j] = value;// u[(i+1)*this->n - 1]; - } - else if(boundaryCondition == 3) - { - for(int j = 0; j < this->n - 1; j++) - for(int i = 0;i < this->n - 1; i++) - //if(fabs(u[i*this->n + j]) < fabs(u[j + this->n*(this->n - 1)])) - u[i*this->n + j] = value;// u[j + this->n*(this->n - 1)]; - } - else if(boundaryCondition == 12) - { - for(int j = 1; j < this->n; j++) - for(int i = 1;i < this->n; i++) - //if(fabs(u[i*this->n + j]) < fabs(u[j])) - u[i*this->n + j] = value;// u[j]; - } -*/ - - - /**/ - - /*if (u.max() > 0.0) - this->stopTime *=(double) this->gridCols;*/ - double time = 0.0; double currentTau = this->tau0; @@ -1132,8 +917,6 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: } __syncthreads(); - /*if(!tmp && (u[0]*u[l] <= 0.0)) - atomicMax( &tmp, 1);*/ __syncthreads(); if(tmp !=1) @@ -1205,7 +988,7 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: __shared__ double currentTau; double cfl = this->cflCondition; double fu = 0.0; -// if(threadIdx.x * threadIdx.y == 0) +// if(threadIdx.x * threadIdx.y * threadIdx.z == 0) // { // currentTau = this->tau0; // } @@ -1213,19 +996,22 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: __syncthreads(); if( time + currentTau > finalTime ) currentTau = finalTime - time; - tnlGridEntity<MeshType, 3, tnlGridEntityNoStencilStorage > Entity(subMesh); - tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighbourEntities(Entity); - Entity.setCoordinates(tnlStaticVector<3,int>(i,j,k)); - Entity.refresh(); - neighbourEntities.refresh(subMesh,Entity.getIndex()); + tnlGridEntity<MeshType, 3, tnlGridEntityNoStencilStorage > Entity(subMesh); + tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighbourEntities(Entity); + Entity.setCoordinates(tnlStaticVector<3,int>(i,j,k)); + Entity.refresh(); + neighbourEntities.refresh(subMesh,Entity.getIndex()); while( time < finalTime ) { + sharedTau[l]=finalTime; + if(computeFU) - fu = schemeHost.getValueDev( this->subMesh, l, tnlStaticVector<3,int>(i,j,k)/*this->subMesh.getCellCoordinates(l)*/, u, time, boundaryCondition, neighbourEntities); + fu = schemeHost.getValueDev( this->subMesh, l, tnlStaticVector<3,int>(i,j,k), u, time, boundaryCondition, neighbourEntities); - sharedTau[l]=cfl/abs(fu); + if(abs(fu) > 0.0) + sharedTau[l]=abs(cfl/fu); if(l == 0) { @@ -1235,9 +1021,9 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: if( time + sharedTau[l] > finalTime ) sharedTau[l] = finalTime - time; - if((blockDim.x == 8) && (l < 256)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+256]); + if((l < 256)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+256]); __syncthreads(); - if((blockDim.x == 8) && (l < 128)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+128]); + if((l < 128)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+128]); __syncthreads(); if(l < 64) sharedTau[l] = Min(sharedTau[l],sharedTau[l+64]); __syncthreads(); @@ -1249,6 +1035,8 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: if(l < 1) currentTau = Min(sharedTau[l],sharedTau[l+1]); __syncthreads(); +// if(abs(fu) < 10000.0) +// printf("bla"); u[l] += currentTau * fu; time += currentTau; } @@ -1325,7 +1113,7 @@ void /*tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int> if(threadIdx.x+threadIdx.y+threadIdx.z == 0) { - printf("z = %d, y = %d, x = %d\n",blockIdx.z,blockIdx.y,blockIdx.x); +// printf("z = %d, y = %d, x = %d\n",blockIdx.z,blockIdx.y,blockIdx.x); subgridValue = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y); boundary[0] = 0; boundary[1] = 0; @@ -1414,6 +1202,7 @@ void /*tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int> atomicMax(&boundary[boundary_index], 1); cudaSolver->work_u_cuda[gid] = u_cmp; } + __threadfence(); } __syncthreads(); @@ -1439,7 +1228,7 @@ template <typename SchemeHost, typename SchemeDevice, typename Device> __global__ void synchronize2CUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver) { - if(blockIdx.x+blockIdx.y+blockIdx.z ==0) + if(blockIdx.x+blockIdx.y+blockIdx.z == 0) { cudaSolver->currentStep = cudaSolver->currentStep + 1; *(cudaSolver->runcuda) = 0; @@ -1452,6 +1241,7 @@ void synchronize2CUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Dev cudaSolver->setSubgridValueCUDA3D(blockIdx.z*gridDim.x*gridDim.y + blockIdx.y*gridDim.x + blockIdx.x, stepValue); } +// printf("%d\n",cudaSolver->getBoundaryConditionCUDA3D(blockIdx.z*gridDim.x*gridDim.y + blockIdx.y*gridDim.x + blockIdx.x)): atomicMax((cudaSolver->runcuda),cudaSolver->getBoundaryConditionCUDA3D(blockIdx.z*gridDim.x*gridDim.y + blockIdx.y*gridDim.x + blockIdx.x)); } @@ -1616,7 +1406,7 @@ void runCUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, doub caller->updateSubgridCUDA3D(i,caller, &u[l]); __syncthreads(); } - if( (bound == 4)) + if((bound == 4)) { caller->runSubgridCUDA3D(4,u,i); caller->updateSubgridCUDA3D(i,caller, &u[l]); -- GitLab