Loading examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver_impl.h +32 −30 Original line number Diff line number Diff line Loading @@ -1234,8 +1234,8 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x; double u = cudaSolver->work_u_cuda[gid]; double u_cmp; int subgridValue_cmp; int boundary_index; int subgridValue_cmp=INT_MAX; int boundary_index=0; if(threadIdx.x+threadIdx.y == 0) Loading @@ -1252,35 +1252,36 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: if( (threadIdx.x == 0 && blockIdx.x != 0 && !(cudaSolver->currentStep & 1)) || (threadIdx.y == 0 && blockIdx.y != 0 && (cudaSolver->currentStep & 1)) || (threadIdx.x == blockDim.x - 1 && blockIdx.x != gridDim.x - 1 && !(cudaSolver->currentStep & 1)) || (threadIdx.y == blockDim.y - 1 && blockIdx.y != gridDim.y - 1 && (cudaSolver->currentStep & 1)) ) if( (threadIdx.x == 0 /* && !(cudaSolver->currentStep & 1)*/) || (threadIdx.y == 0 /* && (cudaSolver->currentStep & 1)*/) || (threadIdx.x == blockDim.x - 1 /* && !(cudaSolver->currentStep & 1)*/) || (threadIdx.y == blockDim.y - 1 /* && (cudaSolver->currentStep & 1)*/) ) { if(threadIdx.x == 0 /*&& !(cudaSolver->currentStep & 1)*/) if(threadIdx.x == 0 && (blockIdx.x != 0)/* && !(cudaSolver->currentStep & 1)*/) { u_cmp = cudaSolver->work_u_cuda[gid - 1]; subgridValue_cmp = cudaSolver->getSubgridValueCUDA(blockIdx.y*gridDim.x + blockIdx.x - 1); boundary_index = 2; } if(threadIdx.y == 0 /*&& (cudaSolver->currentStep & 1)*/) { u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x]; subgridValue_cmp = cudaSolver->getSubgridValueCUDA((blockIdx.y - 1)*gridDim.x + blockIdx.x); boundary_index = 3; } if(threadIdx.x == blockDim.x - 1/* && !(cudaSolver->currentStep & 1)*/) if(threadIdx.x == blockDim.x - 1 && (blockIdx.x != gridDim.x - 1)/* && !(cudaSolver->currentStep & 1)*/) { u_cmp = cudaSolver->work_u_cuda[gid + 1]; subgridValue_cmp = cudaSolver->getSubgridValueCUDA(blockIdx.y*gridDim.x + blockIdx.x + 1); boundary_index = 1; } if(threadIdx.y == blockDim.y - 1 /*&& (cudaSolver->currentStep & 1)*/) { u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x]; subgridValue_cmp = cudaSolver->getSubgridValueCUDA((blockIdx.y + 1)*gridDim.x + blockIdx.x); boundary_index = 0; } // if(threadIdx.y == 0 && (blockIdx.y != 0) && (cudaSolver->currentStep & 1)) // { // u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x]; // subgridValue_cmp = cudaSolver->getSubgridValueCUDA((blockIdx.y - 1)*gridDim.x + blockIdx.x); // boundary_index = 3; // } // if(threadIdx.y == blockDim.y - 1 && (blockIdx.y != gridDim.y - 1) && (cudaSolver->currentStep & 1)) // { // u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x]; // subgridValue_cmp = cudaSolver->getSubgridValueCUDA((blockIdx.y + 1)*gridDim.x + blockIdx.x); // boundary_index = 0; // } __threadfence(); if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) Loading @@ -1288,28 +1289,29 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: cudaSolver->unusedCell_cuda[gid] = 0; atomicMax(&newSubgridValue, INT_MAX); atomicMax(&boundary[boundary_index], 1); cudaSolver->work_u_cuda[gid] = u_cmp; ////// unsure cudaSolver->work_u_cuda[gid] = u_cmp; } __threadfence(); if(threadIdx.x == blockDim.x - 1/* && !(cudaSolver->currentStep & 1)*/) if(threadIdx.y == 0 && (blockIdx.y != 0)/* && (cudaSolver->currentStep & 1)*/) { u_cmp = cudaSolver->work_u_cuda[gid + 1]; subgridValue_cmp = cudaSolver->getSubgridValueCUDA(blockIdx.y*gridDim.x + blockIdx.x + 1); boundary_index = 1; u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x]; subgridValue_cmp = cudaSolver->getSubgridValueCUDA((blockIdx.y - 1)*gridDim.x + blockIdx.x); boundary_index = 3; } if(threadIdx.x == 0 /*&& !(cudaSolver->currentStep & 1)*/) if(threadIdx.y == blockDim.y - 1 && (blockIdx.y != gridDim.y - 1)/* && (cudaSolver->currentStep & 1)*/) { u_cmp = cudaSolver->work_u_cuda[gid - 1]; subgridValue_cmp = cudaSolver->getSubgridValueCUDA(blockIdx.y*gridDim.x + blockIdx.x - 1); boundary_index = 2; u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x]; subgridValue_cmp = cudaSolver->getSubgridValueCUDA((blockIdx.y + 1)*gridDim.x + blockIdx.x); boundary_index = 0; } __threadfence(); if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) { cudaSolver->unusedCell_cuda[gid] = 0; atomicMax(&newSubgridValue, INT_MAX); atomicMax(&boundary[boundary_index], 1); cudaSolver->work_u_cuda[gid] = u_cmp; ////// unsure cudaSolver->work_u_cuda[gid] = u_cmp; } } __syncthreads(); Loading Loading
examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver_impl.h +32 −30 Original line number Diff line number Diff line Loading @@ -1234,8 +1234,8 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x; double u = cudaSolver->work_u_cuda[gid]; double u_cmp; int subgridValue_cmp; int boundary_index; int subgridValue_cmp=INT_MAX; int boundary_index=0; if(threadIdx.x+threadIdx.y == 0) Loading @@ -1252,35 +1252,36 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: if( (threadIdx.x == 0 && blockIdx.x != 0 && !(cudaSolver->currentStep & 1)) || (threadIdx.y == 0 && blockIdx.y != 0 && (cudaSolver->currentStep & 1)) || (threadIdx.x == blockDim.x - 1 && blockIdx.x != gridDim.x - 1 && !(cudaSolver->currentStep & 1)) || (threadIdx.y == blockDim.y - 1 && blockIdx.y != gridDim.y - 1 && (cudaSolver->currentStep & 1)) ) if( (threadIdx.x == 0 /* && !(cudaSolver->currentStep & 1)*/) || (threadIdx.y == 0 /* && (cudaSolver->currentStep & 1)*/) || (threadIdx.x == blockDim.x - 1 /* && !(cudaSolver->currentStep & 1)*/) || (threadIdx.y == blockDim.y - 1 /* && (cudaSolver->currentStep & 1)*/) ) { if(threadIdx.x == 0 /*&& !(cudaSolver->currentStep & 1)*/) if(threadIdx.x == 0 && (blockIdx.x != 0)/* && !(cudaSolver->currentStep & 1)*/) { u_cmp = cudaSolver->work_u_cuda[gid - 1]; subgridValue_cmp = cudaSolver->getSubgridValueCUDA(blockIdx.y*gridDim.x + blockIdx.x - 1); boundary_index = 2; } if(threadIdx.y == 0 /*&& (cudaSolver->currentStep & 1)*/) { u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x]; subgridValue_cmp = cudaSolver->getSubgridValueCUDA((blockIdx.y - 1)*gridDim.x + blockIdx.x); boundary_index = 3; } if(threadIdx.x == blockDim.x - 1/* && !(cudaSolver->currentStep & 1)*/) if(threadIdx.x == blockDim.x - 1 && (blockIdx.x != gridDim.x - 1)/* && !(cudaSolver->currentStep & 1)*/) { u_cmp = cudaSolver->work_u_cuda[gid + 1]; subgridValue_cmp = cudaSolver->getSubgridValueCUDA(blockIdx.y*gridDim.x + blockIdx.x + 1); boundary_index = 1; } if(threadIdx.y == blockDim.y - 1 /*&& (cudaSolver->currentStep & 1)*/) { u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x]; subgridValue_cmp = cudaSolver->getSubgridValueCUDA((blockIdx.y + 1)*gridDim.x + blockIdx.x); boundary_index = 0; } // if(threadIdx.y == 0 && (blockIdx.y != 0) && (cudaSolver->currentStep & 1)) // { // u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x]; // subgridValue_cmp = cudaSolver->getSubgridValueCUDA((blockIdx.y - 1)*gridDim.x + blockIdx.x); // boundary_index = 3; // } // if(threadIdx.y == blockDim.y - 1 && (blockIdx.y != gridDim.y - 1) && (cudaSolver->currentStep & 1)) // { // u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x]; // subgridValue_cmp = cudaSolver->getSubgridValueCUDA((blockIdx.y + 1)*gridDim.x + blockIdx.x); // boundary_index = 0; // } __threadfence(); if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) Loading @@ -1288,28 +1289,29 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: cudaSolver->unusedCell_cuda[gid] = 0; atomicMax(&newSubgridValue, INT_MAX); atomicMax(&boundary[boundary_index], 1); cudaSolver->work_u_cuda[gid] = u_cmp; ////// unsure cudaSolver->work_u_cuda[gid] = u_cmp; } __threadfence(); if(threadIdx.x == blockDim.x - 1/* && !(cudaSolver->currentStep & 1)*/) if(threadIdx.y == 0 && (blockIdx.y != 0)/* && (cudaSolver->currentStep & 1)*/) { u_cmp = cudaSolver->work_u_cuda[gid + 1]; subgridValue_cmp = cudaSolver->getSubgridValueCUDA(blockIdx.y*gridDim.x + blockIdx.x + 1); boundary_index = 1; u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x]; subgridValue_cmp = cudaSolver->getSubgridValueCUDA((blockIdx.y - 1)*gridDim.x + blockIdx.x); boundary_index = 3; } if(threadIdx.x == 0 /*&& !(cudaSolver->currentStep & 1)*/) if(threadIdx.y == blockDim.y - 1 && (blockIdx.y != gridDim.y - 1)/* && (cudaSolver->currentStep & 1)*/) { u_cmp = cudaSolver->work_u_cuda[gid - 1]; subgridValue_cmp = cudaSolver->getSubgridValueCUDA(blockIdx.y*gridDim.x + blockIdx.x - 1); boundary_index = 2; u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x]; subgridValue_cmp = cudaSolver->getSubgridValueCUDA((blockIdx.y + 1)*gridDim.x + blockIdx.x); boundary_index = 0; } __threadfence(); if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) { cudaSolver->unusedCell_cuda[gid] = 0; atomicMax(&newSubgridValue, INT_MAX); atomicMax(&boundary[boundary_index], 1); cudaSolver->work_u_cuda[gid] = u_cmp; ////// unsure cudaSolver->work_u_cuda[gid] = u_cmp; } } __syncthreads(); Loading