Loading examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h +15 −12 Original line number Diff line number Diff line Loading @@ -186,7 +186,7 @@ bool tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: dim3 numBlocks(this->gridCols,this->gridRows,this->gridLevels); cudaDeviceSynchronize(); checkCudaDevice; initRunCUDA3D<SchemeTypeHost,SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*this->n*sizeof(double)>>>(this->cudaSolver); initRunCUDA3D<SchemeTypeHost,SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,2*this->n*this->n*this->n*sizeof(double)>>>(this->cudaSolver); cudaDeviceSynchronize(); // cout << "post 1 kernel" << endl; Loading Loading @@ -349,7 +349,7 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: cudaDeviceSynchronize(); checkCudaDevice; start = std::clock(); runCUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*this->n*sizeof(double)>>>(this->cudaSolver); runCUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,2*this->n*this->n*this->n*sizeof(double)>>>(this->cudaSolver); //cout << "a" << endl; cudaDeviceSynchronize(); time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC); Loading Loading @@ -895,7 +895,7 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: __shared__ double value; //double tmpRes = 0.0; volatile double* sharedTau = &u[blockDim.x*blockDim.y*blockDim.z]; volatile double* absVal = &u[2*blockDim.x*blockDim.y*blockDim.z]; // volatile double* absVal = &u[2*blockDim.x*blockDim.y*blockDim.z]; int i = threadIdx.x; int j = threadIdx.y; int k = threadIdx.z; Loading Loading @@ -1008,12 +1008,11 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: sharedTau[l]=finalTime; if(computeFU) { fu = schemeHost.getValueDev( this->subMesh, l, tnlStaticVector<3,int>(i,j,k), u, time, boundaryCondition, neighbourEntities); if(abs(fu) > 0.0) sharedTau[l]=abs(cfl/fu); /* if(u[l]*fu < 0.0 && abs(fu*sharedTau[l]) >abs(u[l])) sharedTau[l] = 0.9*abs(u[l]/fu)/* + this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*/; } if(l == 0) { Loading Loading @@ -1042,8 +1041,10 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: // if(abs(fu) < 10000.0) // printf("bla"); if(computeFU) u[l] += currentTau * fu; time += currentTau; __syncthreads(); } Loading Loading @@ -1317,7 +1318,9 @@ void initRunCUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, { caller->runSubgridCUDA3D(0,u,i); __syncthreads(); caller->insertSubgridCUDA3D(u[l],i); // caller->insertSubgridCUDA3D(u[l],i); caller->updateSubgridCUDA3D(i,caller, &u[l]); __syncthreads(); if(l == 0) caller->setSubgridValueCUDA3D(i, 4); Loading @@ -1339,7 +1342,7 @@ void runCUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, doub int l = threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; int bound = caller->getBoundaryConditionCUDA3D(i); if(caller->getSubgridValueCUDA3D(i) != INT_MAX && bound != 0 && caller->getSubgridValueCUDA3D(i) > -10) if(caller->getSubgridValueCUDA3D(i) != INT_MAX && bound != 0 && caller->getSubgridValueCUDA3D(i) > 0) { caller->getSubgridCUDA3D(i,caller, &u[l]); Loading src/functions/tnlTestFunction_impl.h +2 −2 Original line number Diff line number Diff line Loading @@ -76,8 +76,8 @@ configSetup( tnlConfigDescription& config, config.addEntryEnum( "sdf-sin-bumps" ); config.addEntryEnum( "sdf-sin-wave-sdf" ); config.addEntryEnum( "sdf-sin-bumps-sdf" ); config.addEntryEnum( "sdf-paraboloid" ); config.addEntryEnum( "sdf-paraboloid-sdf" ); config.addEntryEnum( "sdf-para" ); config.addEntryEnum( "sdf-para-sdf" ); config.addEntry < double >( prefix + "constant", "Value of the constant function.", 0.0 ); config.addEntry < double >( prefix + "wave-length", "Wave length of the sine based test functions.", 1.0 ); config.addEntry < double >( prefix + "wave-length-x", "Wave length of the sine based test functions.", 1.0 ); Loading src/operators/godunov-eikonal/parallelGodunovEikonal3D_impl.h +15 −15 Original line number Diff line number Diff line Loading @@ -358,21 +358,21 @@ Real parallelGodunovEikonalScheme< tnlGrid< 3, MeshReal, Device, MeshIndex >, Re } if(xb - xf > 0.0) a = xb; else a = xf; if(yb - yf > 0.0) b = yb; else b = yf; if(zb - zf > 0.0) c = zb; else c = zf; // if(xb - xf > 0.0) // a = xb; // else // a = xf; // // if(yb - yf > 0.0) // b = yb; // else // b = yf; // // if(zb - zf > 0.0) // c = zb; // else // c = zf; // // d = ( 1.0 - sqrt(a*a + b*b + c*c)*ihx ); d = 1.0 - sqrt(xf*xf + xb*xb + yf*yf + yb*yb + zf*zf + zb*zb)*ihx; /*upwind*/ Loading Loading
examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h +15 −12 Original line number Diff line number Diff line Loading @@ -186,7 +186,7 @@ bool tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: dim3 numBlocks(this->gridCols,this->gridRows,this->gridLevels); cudaDeviceSynchronize(); checkCudaDevice; initRunCUDA3D<SchemeTypeHost,SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*this->n*sizeof(double)>>>(this->cudaSolver); initRunCUDA3D<SchemeTypeHost,SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,2*this->n*this->n*this->n*sizeof(double)>>>(this->cudaSolver); cudaDeviceSynchronize(); // cout << "post 1 kernel" << endl; Loading Loading @@ -349,7 +349,7 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: cudaDeviceSynchronize(); checkCudaDevice; start = std::clock(); runCUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*this->n*sizeof(double)>>>(this->cudaSolver); runCUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,2*this->n*this->n*this->n*sizeof(double)>>>(this->cudaSolver); //cout << "a" << endl; cudaDeviceSynchronize(); time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC); Loading Loading @@ -895,7 +895,7 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: __shared__ double value; //double tmpRes = 0.0; volatile double* sharedTau = &u[blockDim.x*blockDim.y*blockDim.z]; volatile double* absVal = &u[2*blockDim.x*blockDim.y*blockDim.z]; // volatile double* absVal = &u[2*blockDim.x*blockDim.y*blockDim.z]; int i = threadIdx.x; int j = threadIdx.y; int k = threadIdx.z; Loading Loading @@ -1008,12 +1008,11 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: sharedTau[l]=finalTime; if(computeFU) { fu = schemeHost.getValueDev( this->subMesh, l, tnlStaticVector<3,int>(i,j,k), u, time, boundaryCondition, neighbourEntities); if(abs(fu) > 0.0) sharedTau[l]=abs(cfl/fu); /* if(u[l]*fu < 0.0 && abs(fu*sharedTau[l]) >abs(u[l])) sharedTau[l] = 0.9*abs(u[l]/fu)/* + this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*/; } if(l == 0) { Loading Loading @@ -1042,8 +1041,10 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: // if(abs(fu) < 10000.0) // printf("bla"); if(computeFU) u[l] += currentTau * fu; time += currentTau; __syncthreads(); } Loading Loading @@ -1317,7 +1318,9 @@ void initRunCUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, { caller->runSubgridCUDA3D(0,u,i); __syncthreads(); caller->insertSubgridCUDA3D(u[l],i); // caller->insertSubgridCUDA3D(u[l],i); caller->updateSubgridCUDA3D(i,caller, &u[l]); __syncthreads(); if(l == 0) caller->setSubgridValueCUDA3D(i, 4); Loading @@ -1339,7 +1342,7 @@ void runCUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, doub int l = threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; int bound = caller->getBoundaryConditionCUDA3D(i); if(caller->getSubgridValueCUDA3D(i) != INT_MAX && bound != 0 && caller->getSubgridValueCUDA3D(i) > -10) if(caller->getSubgridValueCUDA3D(i) != INT_MAX && bound != 0 && caller->getSubgridValueCUDA3D(i) > 0) { caller->getSubgridCUDA3D(i,caller, &u[l]); Loading
src/functions/tnlTestFunction_impl.h +2 −2 Original line number Diff line number Diff line Loading @@ -76,8 +76,8 @@ configSetup( tnlConfigDescription& config, config.addEntryEnum( "sdf-sin-bumps" ); config.addEntryEnum( "sdf-sin-wave-sdf" ); config.addEntryEnum( "sdf-sin-bumps-sdf" ); config.addEntryEnum( "sdf-paraboloid" ); config.addEntryEnum( "sdf-paraboloid-sdf" ); config.addEntryEnum( "sdf-para" ); config.addEntryEnum( "sdf-para-sdf" ); config.addEntry < double >( prefix + "constant", "Value of the constant function.", 0.0 ); config.addEntry < double >( prefix + "wave-length", "Wave length of the sine based test functions.", 1.0 ); config.addEntry < double >( prefix + "wave-length-x", "Wave length of the sine based test functions.", 1.0 ); Loading
src/operators/godunov-eikonal/parallelGodunovEikonal3D_impl.h +15 −15 Original line number Diff line number Diff line Loading @@ -358,21 +358,21 @@ Real parallelGodunovEikonalScheme< tnlGrid< 3, MeshReal, Device, MeshIndex >, Re } if(xb - xf > 0.0) a = xb; else a = xf; if(yb - yf > 0.0) b = yb; else b = yf; if(zb - zf > 0.0) c = zb; else c = zf; // if(xb - xf > 0.0) // a = xb; // else // a = xf; // // if(yb - yf > 0.0) // b = yb; // else // b = yf; // // if(zb - zf > 0.0) // c = zb; // else // c = zf; // // d = ( 1.0 - sqrt(a*a + b*b + c*c)*ihx ); d = 1.0 - sqrt(xf*xf + xb*xb + yf*yf + yb*yb + zf*zf + zb*zb)*ihx; /*upwind*/ Loading