Loading examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver_impl.h +49 −146 Original line number Diff line number Diff line Loading @@ -186,7 +186,7 @@ bool tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::in dim3 numBlocks(this->gridCols,this->gridRows); cudaDeviceSynchronize(); checkCudaDevice; initRunCUDA<SchemeTypeHost,SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*sizeof(double)>>>(this->cudaSolver); initRunCUDA<SchemeTypeHost,SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,2*this->n*this->n*sizeof(double)>>>(this->cudaSolver); cudaDeviceSynchronize(); // cout << "post 1 kernel" << endl; Loading Loading @@ -345,11 +345,11 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru //cout << "a" << endl; cudaDeviceSynchronize(); checkCudaDevice; //start = std::clock(); runCUDA<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*sizeof(double)>>>(this->cudaSolver); start = std::clock(); runCUDA<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,2*this->n*this->n*sizeof(double)>>>(this->cudaSolver); //cout << "a" << endl; cudaDeviceSynchronize(); time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC); //start = std::clock(); synchronizeCUDA<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver); Loading @@ -368,7 +368,7 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru cudaMemcpy(&run_host, (this->runcuda),sizeof(int), cudaMemcpyDeviceToHost); //cout << "in kernel loop" << run_host << endl; } //cout << "Solving time was: " << time_diff << endl; cout << "Solving time was: " << time_diff << endl; //cout << "b" << endl; //double* tmpu; Loading Loading @@ -1080,30 +1080,30 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru { __shared__ int tmp; double* sharedTau = &u[blockDim.x*blockDim.y]; double* sharedRes = &sharedTau[blockDim.x*blockDim.y]; //double tmpRes = 0.0; volatile double* sharedTau = &u[blockDim.x*blockDim.y]; int i = threadIdx.x; int j = threadIdx.y; int l = threadIdx.y * blockDim.x + threadIdx.x; bool computeFU = !((i == 0 && (boundaryCondition & 4)) or (i == blockDim.x - 1 && (boundaryCondition & 2)) or (j == 0 && (boundaryCondition & 8)) or (j == blockDim.y - 1 && (boundaryCondition & 1))); if(l == 0) { tmp = 0; /*for(int o = 0; o < this->n * this->n; o++) printf("%d : %f\n", o, u[o]);*/ if(this->getSubgridValueCUDA(subGridID) != this->currentStep + 4) tmp = 1; } __syncthreads(); if(u[0]*u[l] <= 0.0) atomicMax( &tmp, 1); //__syncthreads(); //printf("tmp = %d", tmp); __shared__ double value; if(l == 0) value = 0.0; __syncthreads(); Loading @@ -1113,164 +1113,65 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru value=Max(value,fabs(u[l])); __syncthreads(); } //__syncthreads(); //atomicMax(&value,fabs(u[l])) if(l == 0) value *= Sign(u[0]); __syncthreads(); if(tmp == 1) {} //north - 1, east - 2, west - 4, south - 8 else if(boundaryCondition == 4) { if(j > 0) u[i*this->n + j] = value; } else if(boundaryCondition == 2) { if(j < this->n - 1) u[i*this->n + j] = value; } else if(boundaryCondition == 1) { if(i < this->n - 1) u[i*this->n + j] = value; } else if(boundaryCondition == 8) { if(i > 0) u[i*this->n + j] = value; } //__syncthreads(); if(tmp !=1 && computeFU) u[l] = value; double time = 0.0; __shared__ double currentTau; __shared__ double maxResidue; double cfl = this->cflCondition; double fu = 0.0; if(threadIdx.x * threadIdx.y == 0) { currentTau = this->tau0; maxResidue = 0.0;//10.0 * this->subMesh.getHx(); } double finalTime = this->stopTime; __syncthreads(); if( time + currentTau > finalTime ) currentTau = finalTime - time; __syncthreads(); //printf("%d : %f\n", l, u[l]); while( time < finalTime ) { /**** * Compute the RHS */ //__syncthreads(); fu = schemeHost.getValueDev( this->subMesh, l, this->subMesh.getCellCoordinates(l), u, time, boundaryCondition); //__syncthreads(); //printf("%d : %f\n", l, fu); if(computeFU) fu = schemeHost.getValueDev( this->subMesh, l, tnlStaticVector<2,int>(i,j)/*this->subMesh.getCellCoordinates(l)*/, u, time, boundaryCondition); //atomicMax(&maxResidue,fabs(fu));//maxResidue = fu. absMax(); // sharedRes[l]=fabs(fu); /* if(l == 0) maxResidue = 0.0; __syncthreads(); //start reduction for(int o = 0; o < blockDim.x*blockDim.y; o++) { if(l == o) maxResidue=Max(maxResidue,sharedRes[l]); __syncthreads(); }*/ //__syncthreads(); //end reduction sharedTau[l]=this -> cflCondition/fabs(fu);//sharedRes[l]; if((u[l]+sharedTau[l] * fu)*u[l] < 0.0) sharedTau[l] = fabs(u[l]/(2.0*fu)); sharedTau[l]=cfl/fabs(fu); if(l == 0) if(sharedTau[l] > 1.0 * this->subMesh.getHx()) sharedTau[l] = 1.0 * this->subMesh.getHx(); if(sharedTau[0] > 1.0 * this->subMesh.getHx()) sharedTau[0] = 1.0 * this->subMesh.getHx(); if(l == 1) if(l == blockDim.x*blockDim.y - 1) if( time + sharedTau[l] > finalTime ) sharedTau[l] = finalTime - time; __syncthreads(); // __syncthreads(); /*for(unsigned int s = blockDim.x*blockDim.y/2; s>0; s>>=1) { if( l < s ) sharedTau[l] = Min(sharedTau[l],sharedTau[l+s]) ; __syncthreads(); } if(l==0) currentTau=sharedTau[l]; __syncthreads();*/ for(unsigned int s = blockDim.x*blockDim.y/2; s>0; s>>=1) { //if( l < s ) // sharedRes[l] = Max(sharedRes[l],sharedRes[l+s]); if(l >= blockDim.x*blockDim.y - s) sharedTau[l] = Min(sharedTau[l],sharedTau[l-s]); if((blockDim.x == 16) && (l < 128)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+128]); __syncthreads(); } if(l==0) { //maxResidue=sharedRes[l]; currentTau=sharedTau[blockDim.x*blockDim.y - 1]; /*if( this -> cflCondition * maxResidue != 0.0) currentTau = Min(this -> cflCondition / maxResidue, currentTau);*/ } if((blockDim.x == 16) && (l < 64)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+64]); __syncthreads(); if(l < 32) sharedTau[l] = Min(sharedTau[l],sharedTau[l+32]); //__syncthreads(); // //double tau2 = finalTime; //start reduction //atomicMin(¤tTau, tau2); /* if(l==0) currentTau=sharedTau[l]; __syncthreads(); for(int o = 0; o < blockDim.x*blockDim.y; o++) { if(l == o) currentTau=Min(currentTau,sharedTau[l]); if(l < 16) sharedTau[l] = Min(sharedTau[l],sharedTau[l+16]); //__syncthreads(); if(l < 8) sharedTau[l] = Min(sharedTau[l],sharedTau[l+8]); // __syncthreads(); if(l < 4) sharedTau[l] = Min(sharedTau[l],sharedTau[l+4]); //__syncthreads(); if(l < 2) sharedTau[l] = Min(sharedTau[l],sharedTau[l+2]); //__syncthreads(); if(l < 1) currentTau = Min(sharedTau[l],sharedTau[l+1]); __syncthreads(); }*/ //end reduction // //__syncthreads(); u[l] += currentTau * fu; //if(l==0) // printf("ct %f\n",currentTau); time += currentTau; __syncthreads(); } //printf("%d : %f\n", l, u[l]); } Loading Loading @@ -1697,10 +1598,11 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: if(caller->getSubgridValueCUDA(i) != INT_MAX) { caller->getSubgridCUDA(i,caller, &u[l]); u[2*blockDim.x*blockDim.y +l] = u[l]; int bound = caller->getBoundaryConditionCUDA(i); //if(l == 0) //printf("i = %d, bound = %d\n",i,caller->getSubgridValueCUDA(i)); //if(caller->getSubgridValueCUDA(i) == caller->currentStep+4) //{ if(bound & 1) { caller->runSubgridCUDA(1,u,i); Loading Loading @@ -1741,10 +1643,11 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: caller->updateSubgridCUDA(i,caller, &u[l]); __syncthreads(); } //} if( ((bound & 2) )) if( ((bound & 2) || (bound & 1) )) { caller->runSubgridCUDA(3,u,i); __syncthreads(); Loading @@ -1754,7 +1657,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: caller->updateSubgridCUDA(i,caller, &u[l]); __syncthreads(); } if( ((bound & 4) )) if( ((bound & 4) || (bound & 1) )) { caller->runSubgridCUDA(5,u,i); __syncthreads(); Loading @@ -1764,7 +1667,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: caller->updateSubgridCUDA(i,caller, &u[l]); __syncthreads(); } if( ((bound & 2) )) if( ((bound & 2) || (bound & 8) )) { caller->runSubgridCUDA(10,u,i); __syncthreads(); Loading @@ -1774,7 +1677,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: caller->updateSubgridCUDA(i,caller, &u[l]); __syncthreads(); } if( (bound & 4) ) if( (bound & 4) || (bound & 8) ) { caller->runSubgridCUDA(12,u,i); __syncthreads(); Loading src/operators/godunov-eikonal/parallelGodunovEikonal.h +2 −2 Original line number Diff line number Diff line Loading @@ -171,8 +171,8 @@ protected: DofVectorType dofVector; RealType hx; RealType hy; RealType hx, ihx; RealType hy, ihy; RealType epsilon; Loading src/operators/godunov-eikonal/parallelGodunovEikonal2D_impl.h +30 −44 Original line number Diff line number Diff line Loading @@ -45,7 +45,7 @@ template< typename MeshReal, Real parallelGodunovEikonalScheme< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: negativePart(const Real arg) const { if(arg < 0.0) return arg; return -arg; return 0.0; } Loading @@ -62,13 +62,13 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2,MeshReal, Device, MeshIndex >, Rea { if(x > eps) return 1.0; else if (x < -eps) if (x < -eps) return (-1.0); if ( eps == 0.0) return 0.0; return sin((M_PI*x)/(2.0*eps)); return sin(/*(M_PI*x)/(2.0*eps) */(M_PI/2.0)*(x/eps)); } Loading Loading @@ -98,7 +98,9 @@ bool parallelGodunovEikonalScheme< tnlGrid< 2,MeshReal, Device, MeshIndex >, Rea hx = originalMesh.getHx(); ihx = 1.0/hx; hy = originalMesh.getHy(); ihy = 1.0/hy; epsilon = parameters. getParameter< double >( "epsilon" ); Loading Loading @@ -207,12 +209,12 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re else yb = positivePart((u[cellIndex] - u[mesh.template getCellNextToCell<0,-1>( cellIndex )])/hy); if(xb + xf > 0.0) if(xb - xf > 0.0) xf = 0.0; else xb = 0.0; if(yb + yf > 0.0) if(yb - yf > 0.0) yf = 0.0; else yb = 0.0; Loading Loading @@ -262,15 +264,15 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re yb = negativePart((u[cellIndex] - u[mesh.template getCellNextToCell<0,-1>( cellIndex )])/hy); if(xb + xf > 0.0) xb = 0.0; else if(xb - xf > 0.0) xf = 0.0; if(yb + yf > 0.0) yb = 0.0; else xb = 0.0; if(yb - yf > 0.0) yf = 0.0; else yb = 0.0; nabla = sqrt (xf*xf + xb*xb + yf*yf + yb*yb ); Loading Loading @@ -310,7 +312,7 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re const IndexType boundaryCondition) const { /* if ( ((coordinates.x() == 0 && (boundaryCondition & 4)) or (coordinates.x() == mesh.getDimensions().x() - 1 && (boundaryCondition & 2)) or (coordinates.y() == 0 && (boundaryCondition & 8)) or Loading @@ -320,10 +322,10 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re return 0.0; } */ //RealType acc = hx*hy*hx*hy; RealType nabla, signui; RealType signui; signui = sign(u[cellIndex],epsilon); #ifdef HAVE_CUDA Loading @@ -334,6 +336,7 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re RealType xf = -u[cellIndex]; RealType yb = u[cellIndex]; RealType yf = -u[cellIndex]; RealType a,b; if(coordinates.x() == mesh.getDimensions().x() - 1) Loading @@ -357,10 +360,10 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re yb -= u[mesh.template getCellNextToCell<0,-1>( cellIndex )]; xb /= hx; xf /= hx; yb /= hy; yf /= hy; //xb *= ihx; //xf *= ihx; // yb *= ihy; //yf *= ihy; if(signui > 0.0) { Loading @@ -372,16 +375,6 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re yb = positivePart(yb); if(xb + xf > 0.0) xf = 0.0; else xb = 0.0; if(yb + yf > 0.0) yf = 0.0; else yb = 0.0; } else if(signui < 0.0) { Loading @@ -393,27 +386,20 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re yb = negativePart(yb); yf = positivePart(yf); if(xb + xf > 0.0) xb = 0.0; else xf = 0.0; if(yb + yf > 0.0) yb = 0.0; else yf = 0.0; } if(xb - xf > 0.0) a = xb; else a = xf; nabla = sqrt (xf*xf + xb*xb + yf*yf + yb*yb ); return signui*(1.0 - nabla); if(yb - yf > 0.0) b = yb; else b = yf; return signui*(1.0 - sqrt(a*a+b*b)*ihx ); } Loading Loading
examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver_impl.h +49 −146 Original line number Diff line number Diff line Loading @@ -186,7 +186,7 @@ bool tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::in dim3 numBlocks(this->gridCols,this->gridRows); cudaDeviceSynchronize(); checkCudaDevice; initRunCUDA<SchemeTypeHost,SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*sizeof(double)>>>(this->cudaSolver); initRunCUDA<SchemeTypeHost,SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,2*this->n*this->n*sizeof(double)>>>(this->cudaSolver); cudaDeviceSynchronize(); // cout << "post 1 kernel" << endl; Loading Loading @@ -345,11 +345,11 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru //cout << "a" << endl; cudaDeviceSynchronize(); checkCudaDevice; //start = std::clock(); runCUDA<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*sizeof(double)>>>(this->cudaSolver); start = std::clock(); runCUDA<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,2*this->n*this->n*sizeof(double)>>>(this->cudaSolver); //cout << "a" << endl; cudaDeviceSynchronize(); time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC); //start = std::clock(); synchronizeCUDA<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver); Loading @@ -368,7 +368,7 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru cudaMemcpy(&run_host, (this->runcuda),sizeof(int), cudaMemcpyDeviceToHost); //cout << "in kernel loop" << run_host << endl; } //cout << "Solving time was: " << time_diff << endl; cout << "Solving time was: " << time_diff << endl; //cout << "b" << endl; //double* tmpu; Loading Loading @@ -1080,30 +1080,30 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru { __shared__ int tmp; double* sharedTau = &u[blockDim.x*blockDim.y]; double* sharedRes = &sharedTau[blockDim.x*blockDim.y]; //double tmpRes = 0.0; volatile double* sharedTau = &u[blockDim.x*blockDim.y]; int i = threadIdx.x; int j = threadIdx.y; int l = threadIdx.y * blockDim.x + threadIdx.x; bool computeFU = !((i == 0 && (boundaryCondition & 4)) or (i == blockDim.x - 1 && (boundaryCondition & 2)) or (j == 0 && (boundaryCondition & 8)) or (j == blockDim.y - 1 && (boundaryCondition & 1))); if(l == 0) { tmp = 0; /*for(int o = 0; o < this->n * this->n; o++) printf("%d : %f\n", o, u[o]);*/ if(this->getSubgridValueCUDA(subGridID) != this->currentStep + 4) tmp = 1; } __syncthreads(); if(u[0]*u[l] <= 0.0) atomicMax( &tmp, 1); //__syncthreads(); //printf("tmp = %d", tmp); __shared__ double value; if(l == 0) value = 0.0; __syncthreads(); Loading @@ -1113,164 +1113,65 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru value=Max(value,fabs(u[l])); __syncthreads(); } //__syncthreads(); //atomicMax(&value,fabs(u[l])) if(l == 0) value *= Sign(u[0]); __syncthreads(); if(tmp == 1) {} //north - 1, east - 2, west - 4, south - 8 else if(boundaryCondition == 4) { if(j > 0) u[i*this->n + j] = value; } else if(boundaryCondition == 2) { if(j < this->n - 1) u[i*this->n + j] = value; } else if(boundaryCondition == 1) { if(i < this->n - 1) u[i*this->n + j] = value; } else if(boundaryCondition == 8) { if(i > 0) u[i*this->n + j] = value; } //__syncthreads(); if(tmp !=1 && computeFU) u[l] = value; double time = 0.0; __shared__ double currentTau; __shared__ double maxResidue; double cfl = this->cflCondition; double fu = 0.0; if(threadIdx.x * threadIdx.y == 0) { currentTau = this->tau0; maxResidue = 0.0;//10.0 * this->subMesh.getHx(); } double finalTime = this->stopTime; __syncthreads(); if( time + currentTau > finalTime ) currentTau = finalTime - time; __syncthreads(); //printf("%d : %f\n", l, u[l]); while( time < finalTime ) { /**** * Compute the RHS */ //__syncthreads(); fu = schemeHost.getValueDev( this->subMesh, l, this->subMesh.getCellCoordinates(l), u, time, boundaryCondition); //__syncthreads(); //printf("%d : %f\n", l, fu); if(computeFU) fu = schemeHost.getValueDev( this->subMesh, l, tnlStaticVector<2,int>(i,j)/*this->subMesh.getCellCoordinates(l)*/, u, time, boundaryCondition); //atomicMax(&maxResidue,fabs(fu));//maxResidue = fu. absMax(); // sharedRes[l]=fabs(fu); /* if(l == 0) maxResidue = 0.0; __syncthreads(); //start reduction for(int o = 0; o < blockDim.x*blockDim.y; o++) { if(l == o) maxResidue=Max(maxResidue,sharedRes[l]); __syncthreads(); }*/ //__syncthreads(); //end reduction sharedTau[l]=this -> cflCondition/fabs(fu);//sharedRes[l]; if((u[l]+sharedTau[l] * fu)*u[l] < 0.0) sharedTau[l] = fabs(u[l]/(2.0*fu)); sharedTau[l]=cfl/fabs(fu); if(l == 0) if(sharedTau[l] > 1.0 * this->subMesh.getHx()) sharedTau[l] = 1.0 * this->subMesh.getHx(); if(sharedTau[0] > 1.0 * this->subMesh.getHx()) sharedTau[0] = 1.0 * this->subMesh.getHx(); if(l == 1) if(l == blockDim.x*blockDim.y - 1) if( time + sharedTau[l] > finalTime ) sharedTau[l] = finalTime - time; __syncthreads(); // __syncthreads(); /*for(unsigned int s = blockDim.x*blockDim.y/2; s>0; s>>=1) { if( l < s ) sharedTau[l] = Min(sharedTau[l],sharedTau[l+s]) ; __syncthreads(); } if(l==0) currentTau=sharedTau[l]; __syncthreads();*/ for(unsigned int s = blockDim.x*blockDim.y/2; s>0; s>>=1) { //if( l < s ) // sharedRes[l] = Max(sharedRes[l],sharedRes[l+s]); if(l >= blockDim.x*blockDim.y - s) sharedTau[l] = Min(sharedTau[l],sharedTau[l-s]); if((blockDim.x == 16) && (l < 128)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+128]); __syncthreads(); } if(l==0) { //maxResidue=sharedRes[l]; currentTau=sharedTau[blockDim.x*blockDim.y - 1]; /*if( this -> cflCondition * maxResidue != 0.0) currentTau = Min(this -> cflCondition / maxResidue, currentTau);*/ } if((blockDim.x == 16) && (l < 64)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+64]); __syncthreads(); if(l < 32) sharedTau[l] = Min(sharedTau[l],sharedTau[l+32]); //__syncthreads(); // //double tau2 = finalTime; //start reduction //atomicMin(¤tTau, tau2); /* if(l==0) currentTau=sharedTau[l]; __syncthreads(); for(int o = 0; o < blockDim.x*blockDim.y; o++) { if(l == o) currentTau=Min(currentTau,sharedTau[l]); if(l < 16) sharedTau[l] = Min(sharedTau[l],sharedTau[l+16]); //__syncthreads(); if(l < 8) sharedTau[l] = Min(sharedTau[l],sharedTau[l+8]); // __syncthreads(); if(l < 4) sharedTau[l] = Min(sharedTau[l],sharedTau[l+4]); //__syncthreads(); if(l < 2) sharedTau[l] = Min(sharedTau[l],sharedTau[l+2]); //__syncthreads(); if(l < 1) currentTau = Min(sharedTau[l],sharedTau[l+1]); __syncthreads(); }*/ //end reduction // //__syncthreads(); u[l] += currentTau * fu; //if(l==0) // printf("ct %f\n",currentTau); time += currentTau; __syncthreads(); } //printf("%d : %f\n", l, u[l]); } Loading Loading @@ -1697,10 +1598,11 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: if(caller->getSubgridValueCUDA(i) != INT_MAX) { caller->getSubgridCUDA(i,caller, &u[l]); u[2*blockDim.x*blockDim.y +l] = u[l]; int bound = caller->getBoundaryConditionCUDA(i); //if(l == 0) //printf("i = %d, bound = %d\n",i,caller->getSubgridValueCUDA(i)); //if(caller->getSubgridValueCUDA(i) == caller->currentStep+4) //{ if(bound & 1) { caller->runSubgridCUDA(1,u,i); Loading Loading @@ -1741,10 +1643,11 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: caller->updateSubgridCUDA(i,caller, &u[l]); __syncthreads(); } //} if( ((bound & 2) )) if( ((bound & 2) || (bound & 1) )) { caller->runSubgridCUDA(3,u,i); __syncthreads(); Loading @@ -1754,7 +1657,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: caller->updateSubgridCUDA(i,caller, &u[l]); __syncthreads(); } if( ((bound & 4) )) if( ((bound & 4) || (bound & 1) )) { caller->runSubgridCUDA(5,u,i); __syncthreads(); Loading @@ -1764,7 +1667,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: caller->updateSubgridCUDA(i,caller, &u[l]); __syncthreads(); } if( ((bound & 2) )) if( ((bound & 2) || (bound & 8) )) { caller->runSubgridCUDA(10,u,i); __syncthreads(); Loading @@ -1774,7 +1677,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: caller->updateSubgridCUDA(i,caller, &u[l]); __syncthreads(); } if( (bound & 4) ) if( (bound & 4) || (bound & 8) ) { caller->runSubgridCUDA(12,u,i); __syncthreads(); Loading
src/operators/godunov-eikonal/parallelGodunovEikonal.h +2 −2 Original line number Diff line number Diff line Loading @@ -171,8 +171,8 @@ protected: DofVectorType dofVector; RealType hx; RealType hy; RealType hx, ihx; RealType hy, ihy; RealType epsilon; Loading
src/operators/godunov-eikonal/parallelGodunovEikonal2D_impl.h +30 −44 Original line number Diff line number Diff line Loading @@ -45,7 +45,7 @@ template< typename MeshReal, Real parallelGodunovEikonalScheme< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: negativePart(const Real arg) const { if(arg < 0.0) return arg; return -arg; return 0.0; } Loading @@ -62,13 +62,13 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2,MeshReal, Device, MeshIndex >, Rea { if(x > eps) return 1.0; else if (x < -eps) if (x < -eps) return (-1.0); if ( eps == 0.0) return 0.0; return sin((M_PI*x)/(2.0*eps)); return sin(/*(M_PI*x)/(2.0*eps) */(M_PI/2.0)*(x/eps)); } Loading Loading @@ -98,7 +98,9 @@ bool parallelGodunovEikonalScheme< tnlGrid< 2,MeshReal, Device, MeshIndex >, Rea hx = originalMesh.getHx(); ihx = 1.0/hx; hy = originalMesh.getHy(); ihy = 1.0/hy; epsilon = parameters. getParameter< double >( "epsilon" ); Loading Loading @@ -207,12 +209,12 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re else yb = positivePart((u[cellIndex] - u[mesh.template getCellNextToCell<0,-1>( cellIndex )])/hy); if(xb + xf > 0.0) if(xb - xf > 0.0) xf = 0.0; else xb = 0.0; if(yb + yf > 0.0) if(yb - yf > 0.0) yf = 0.0; else yb = 0.0; Loading Loading @@ -262,15 +264,15 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re yb = negativePart((u[cellIndex] - u[mesh.template getCellNextToCell<0,-1>( cellIndex )])/hy); if(xb + xf > 0.0) xb = 0.0; else if(xb - xf > 0.0) xf = 0.0; if(yb + yf > 0.0) yb = 0.0; else xb = 0.0; if(yb - yf > 0.0) yf = 0.0; else yb = 0.0; nabla = sqrt (xf*xf + xb*xb + yf*yf + yb*yb ); Loading Loading @@ -310,7 +312,7 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re const IndexType boundaryCondition) const { /* if ( ((coordinates.x() == 0 && (boundaryCondition & 4)) or (coordinates.x() == mesh.getDimensions().x() - 1 && (boundaryCondition & 2)) or (coordinates.y() == 0 && (boundaryCondition & 8)) or Loading @@ -320,10 +322,10 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re return 0.0; } */ //RealType acc = hx*hy*hx*hy; RealType nabla, signui; RealType signui; signui = sign(u[cellIndex],epsilon); #ifdef HAVE_CUDA Loading @@ -334,6 +336,7 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re RealType xf = -u[cellIndex]; RealType yb = u[cellIndex]; RealType yf = -u[cellIndex]; RealType a,b; if(coordinates.x() == mesh.getDimensions().x() - 1) Loading @@ -357,10 +360,10 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re yb -= u[mesh.template getCellNextToCell<0,-1>( cellIndex )]; xb /= hx; xf /= hx; yb /= hy; yf /= hy; //xb *= ihx; //xf *= ihx; // yb *= ihy; //yf *= ihy; if(signui > 0.0) { Loading @@ -372,16 +375,6 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re yb = positivePart(yb); if(xb + xf > 0.0) xf = 0.0; else xb = 0.0; if(yb + yf > 0.0) yf = 0.0; else yb = 0.0; } else if(signui < 0.0) { Loading @@ -393,27 +386,20 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re yb = negativePart(yb); yf = positivePart(yf); if(xb + xf > 0.0) xb = 0.0; else xf = 0.0; if(yb + yf > 0.0) yb = 0.0; else yf = 0.0; } if(xb - xf > 0.0) a = xb; else a = xf; nabla = sqrt (xf*xf + xb*xb + yf*yf + yb*yb ); return signui*(1.0 - nabla); if(yb - yf > 0.0) b = yb; else b = yf; return signui*(1.0 - sqrt(a*a+b*b)*ihx ); } Loading