Skip to content
Snippets Groups Projects
Commit bb0933c7 authored by Tomas Sobotik's avatar Tomas Sobotik
Browse files

More optimalizations

parent 24754ec9
No related branches found
No related tags found
No related merge requests found
......@@ -1234,8 +1234,8 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::
int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x;
double u = cudaSolver->work_u_cuda[gid];
double u_cmp;
int subgridValue_cmp;
int boundary_index;
int subgridValue_cmp=INT_MAX;
int boundary_index=0;
if(threadIdx.x+threadIdx.y == 0)
......@@ -1252,35 +1252,36 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::
if( (threadIdx.x == 0 && blockIdx.x != 0 && !(cudaSolver->currentStep & 1)) ||
(threadIdx.y == 0 && blockIdx.y != 0 && (cudaSolver->currentStep & 1)) ||
(threadIdx.x == blockDim.x - 1 && blockIdx.x != gridDim.x - 1 && !(cudaSolver->currentStep & 1)) ||
(threadIdx.y == blockDim.y - 1 && blockIdx.y != gridDim.y - 1 && (cudaSolver->currentStep & 1)) )
if( (threadIdx.x == 0 /* && !(cudaSolver->currentStep & 1)*/) ||
(threadIdx.y == 0 /* && (cudaSolver->currentStep & 1)*/) ||
(threadIdx.x == blockDim.x - 1 /* && !(cudaSolver->currentStep & 1)*/) ||
(threadIdx.y == blockDim.y - 1 /* && (cudaSolver->currentStep & 1)*/) )
{
if(threadIdx.x == 0 /*&& !(cudaSolver->currentStep & 1)*/)
if(threadIdx.x == 0 && (blockIdx.x != 0)/* && !(cudaSolver->currentStep & 1)*/)
{
u_cmp = cudaSolver->work_u_cuda[gid - 1];
subgridValue_cmp = cudaSolver->getSubgridValueCUDA(blockIdx.y*gridDim.x + blockIdx.x - 1);
boundary_index = 2;
}
if(threadIdx.y == 0 /*&& (cudaSolver->currentStep & 1)*/)
{
u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x];
subgridValue_cmp = cudaSolver->getSubgridValueCUDA((blockIdx.y - 1)*gridDim.x + blockIdx.x);
boundary_index = 3;
}
if(threadIdx.x == blockDim.x - 1/* && !(cudaSolver->currentStep & 1)*/)
if(threadIdx.x == blockDim.x - 1 && (blockIdx.x != gridDim.x - 1)/* && !(cudaSolver->currentStep & 1)*/)
{
u_cmp = cudaSolver->work_u_cuda[gid + 1];
subgridValue_cmp = cudaSolver->getSubgridValueCUDA(blockIdx.y*gridDim.x + blockIdx.x + 1);
boundary_index = 1;
}
if(threadIdx.y == blockDim.y - 1 /*&& (cudaSolver->currentStep & 1)*/)
{
u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x];
subgridValue_cmp = cudaSolver->getSubgridValueCUDA((blockIdx.y + 1)*gridDim.x + blockIdx.x);
boundary_index = 0;
}
// if(threadIdx.y == 0 && (blockIdx.y != 0) && (cudaSolver->currentStep & 1))
// {
// u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x];
// subgridValue_cmp = cudaSolver->getSubgridValueCUDA((blockIdx.y - 1)*gridDim.x + blockIdx.x);
// boundary_index = 3;
// }
// if(threadIdx.y == blockDim.y - 1 && (blockIdx.y != gridDim.y - 1) && (cudaSolver->currentStep & 1))
// {
// u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x];
// subgridValue_cmp = cudaSolver->getSubgridValueCUDA((blockIdx.y + 1)*gridDim.x + blockIdx.x);
// boundary_index = 0;
// }
__threadfence();
if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX))
......@@ -1288,28 +1289,29 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::
cudaSolver->unusedCell_cuda[gid] = 0;
atomicMax(&newSubgridValue, INT_MAX);
atomicMax(&boundary[boundary_index], 1);
cudaSolver->work_u_cuda[gid] = u_cmp; ////// unsure
cudaSolver->work_u_cuda[gid] = u_cmp;
}
__threadfence();
if(threadIdx.x == blockDim.x - 1/* && !(cudaSolver->currentStep & 1)*/)
if(threadIdx.y == 0 && (blockIdx.y != 0)/* && (cudaSolver->currentStep & 1)*/)
{
u_cmp = cudaSolver->work_u_cuda[gid + 1];
subgridValue_cmp = cudaSolver->getSubgridValueCUDA(blockIdx.y*gridDim.x + blockIdx.x + 1);
boundary_index = 1;
u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x];
subgridValue_cmp = cudaSolver->getSubgridValueCUDA((blockIdx.y - 1)*gridDim.x + blockIdx.x);
boundary_index = 3;
}
if(threadIdx.x == 0 /*&& !(cudaSolver->currentStep & 1)*/)
if(threadIdx.y == blockDim.y - 1 && (blockIdx.y != gridDim.y - 1)/* && (cudaSolver->currentStep & 1)*/)
{
u_cmp = cudaSolver->work_u_cuda[gid - 1];
subgridValue_cmp = cudaSolver->getSubgridValueCUDA(blockIdx.y*gridDim.x + blockIdx.x - 1);
boundary_index = 2;
u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x];
subgridValue_cmp = cudaSolver->getSubgridValueCUDA((blockIdx.y + 1)*gridDim.x + blockIdx.x);
boundary_index = 0;
}
__threadfence();
if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX))
{
cudaSolver->unusedCell_cuda[gid] = 0;
atomicMax(&newSubgridValue, INT_MAX);
atomicMax(&boundary[boundary_index], 1);
cudaSolver->work_u_cuda[gid] = u_cmp; ////// unsure
cudaSolver->work_u_cuda[gid] = u_cmp;
}
}
__syncthreads();
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment