diff --git a/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver_impl.h b/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver_impl.h index 7dfc31f6923fe059b6b905fc3286dcf444ae29d5..7eaa71277b5787d615d1f0c17ca2ccb966447e1d 100644 --- a/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver_impl.h +++ b/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver_impl.h @@ -1257,25 +1257,25 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: (threadIdx.x == blockDim.x - 1 && blockIdx.x != gridDim.x - 1 && !(cudaSolver->currentStep & 1)) || (threadIdx.y == blockDim.y - 1 && blockIdx.y != gridDim.y - 1 && (cudaSolver->currentStep & 1)) ) { - if(threadIdx.x == 0 && !(cudaSolver->currentStep & 1)) + if(threadIdx.x == 0 /*&& !(cudaSolver->currentStep & 1)*/) { u_cmp = cudaSolver->work_u_cuda[gid - 1]; subgridValue_cmp = cudaSolver->getSubgridValueCUDA(blockIdx.y*gridDim.x + blockIdx.x - 1); boundary_index = 2; } - if(threadIdx.y == 0 && (cudaSolver->currentStep & 1)) + if(threadIdx.y == 0 /*&& (cudaSolver->currentStep & 1)*/) { u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x]; subgridValue_cmp = cudaSolver->getSubgridValueCUDA((blockIdx.y - 1)*gridDim.x + blockIdx.x); boundary_index = 3; } - if(threadIdx.x == blockDim.x - 1 && !(cudaSolver->currentStep & 1)) + if(threadIdx.x == blockDim.x - 1/* && !(cudaSolver->currentStep & 1)*/) { u_cmp = cudaSolver->work_u_cuda[gid + 1]; subgridValue_cmp = cudaSolver->getSubgridValueCUDA(blockIdx.y*gridDim.x + blockIdx.x + 1); boundary_index = 1; } - if(threadIdx.y == blockDim.y - 1 && (cudaSolver->currentStep & 1)) + if(threadIdx.y == blockDim.y - 1 /*&& (cudaSolver->currentStep & 1)*/) { u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x]; subgridValue_cmp = cudaSolver->getSubgridValueCUDA((blockIdx.y + 1)*gridDim.x + blockIdx.x); @@ -1290,6 +1290,27 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: atomicMax(&boundary[boundary_index], 1); cudaSolver->work_u_cuda[gid] = u_cmp; ////// unsure } + __threadfence(); + if(threadIdx.x == blockDim.x - 1/* && !(cudaSolver->currentStep & 1)*/) + { + u_cmp = cudaSolver->work_u_cuda[gid + 1]; + subgridValue_cmp = cudaSolver->getSubgridValueCUDA(blockIdx.y*gridDim.x + blockIdx.x + 1); + boundary_index = 1; + } + if(threadIdx.x == 0 /*&& !(cudaSolver->currentStep & 1)*/) + { + u_cmp = cudaSolver->work_u_cuda[gid - 1]; + subgridValue_cmp = cudaSolver->getSubgridValueCUDA(blockIdx.y*gridDim.x + blockIdx.x - 1); + boundary_index = 2; + } + __threadfence(); + if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) + { + cudaSolver->unusedCell_cuda[gid] = 0; + atomicMax(&newSubgridValue, INT_MAX); + atomicMax(&boundary[boundary_index], 1); + cudaSolver->work_u_cuda[gid] = u_cmp; ////// unsure + } } __syncthreads();