Skip to content
Snippets Groups Projects
Commit 28491294 authored by Tomas Sobotik's avatar Tomas Sobotik
Browse files

Added reduction

parent a290ac2b
No related branches found
No related tags found
No related merge requests found
...@@ -1058,7 +1058,7 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru ...@@ -1058,7 +1058,7 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru
if(u[0]*u[l] <= 0.0) if(u[0]*u[l] <= 0.0)
atomicMax( &tmp, 1); atomicMax( &tmp, 1);
__syncthreads(); //__syncthreads();
//printf("tmp = %d", tmp); //printf("tmp = %d", tmp);
...@@ -1074,7 +1074,7 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru ...@@ -1074,7 +1074,7 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru
value=Max(value,fabs(u[l])); value=Max(value,fabs(u[l]));
__syncthreads(); __syncthreads();
} }
__syncthreads(); //__syncthreads();
//atomicMax(&value,fabs(u[l])) //atomicMax(&value,fabs(u[l]))
if(l == 0) if(l == 0)
...@@ -1106,7 +1106,7 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru ...@@ -1106,7 +1106,7 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru
if(i > 0) if(i > 0)
u[i*this->n + j] = value; u[i*this->n + j] = value;
} }
__syncthreads(); //__syncthreads();
double time = 0.0; double time = 0.0;
__shared__ double currentTau; __shared__ double currentTau;
...@@ -1129,17 +1129,17 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru ...@@ -1129,17 +1129,17 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru
/**** /****
* Compute the RHS * Compute the RHS
*/ */
__syncthreads(); //__syncthreads();
fu = schemeHost.getValueDev( this->subMesh, l, this->subMesh.getCellCoordinates(l), u, time, boundaryCondition); fu = schemeHost.getValueDev( this->subMesh, l, this->subMesh.getCellCoordinates(l), u, time, boundaryCondition);
__syncthreads(); //__syncthreads();
//printf("%d : %f\n", l, fu); //printf("%d : %f\n", l, fu);
//atomicMax(&maxResidue,fabs(fu));//maxResidue = fu. absMax(); //atomicMax(&maxResidue,fabs(fu));//maxResidue = fu. absMax();
sharedRes[l]=fabs(fu); sharedRes[l]=fabs(fu);
if(l == 0) /* if(l == 0)
maxResidue = 0.0; maxResidue = 0.0;
__syncthreads(); __syncthreads();
//start reduction //start reduction
...@@ -1148,49 +1148,69 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru ...@@ -1148,49 +1148,69 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru
if(l == o) if(l == o)
maxResidue=Max(maxResidue,sharedRes[l]); maxResidue=Max(maxResidue,sharedRes[l]);
__syncthreads(); __syncthreads();
} }*/
__syncthreads(); //__syncthreads();
//end reduction //end reduction
__syncthreads();
for(unsigned int s = blockDim.x*blockDim.y/2; s>0; s>>=1)
{
if( l < s )
sharedRes[l] = Max(sharedRes[l],sharedRes[l+s]) ;
__syncthreads();
}
if(l==0) maxResidue=sharedRes[l];
sharedTau[l]=currentTau;
__syncthreads();
// if(l == 0)
if( this -> cflCondition * maxResidue != 0.0)
sharedTau[l] = this -> cflCondition / maxResidue;
if(l == 0) if(l == 0)
{ if(sharedTau[l] > 1.0 * this->subMesh.getHx())
if( this -> cflCondition * maxResidue != 0.0) sharedTau[l] = 1.0 * this->subMesh.getHx();
currentTau = this -> cflCondition / maxResidue;
if(currentTau > 1.0 * this->subMesh.getHx()) if(l == 1)
{ if( time + sharedTau[l] > finalTime ) sharedTau[l] = finalTime - time;
currentTau = 1.0 * this->subMesh.getHx();
}
if( time + currentTau > finalTime ) currentTau = finalTime - time; //__syncthreads();
}
__syncthreads();
// //
//double tau2 = finalTime; //double tau2 = finalTime;
sharedTau[l]= finalTime;
if((u[l]+currentTau * fu)*u[l] < 0.0 && fu != 0.0 && u[l] != 0.0 ) if((u[l]+sharedTau[l] * fu)*u[l] < 0.0 && fu != 0.0 && u[l] != 0.0 )
sharedTau[l] = fabs(u[l]/(2.0*fu)); sharedTau[l] = fabs(u[l]/(2.0*fu));
//start reduction //start reduction
__syncthreads();
//atomicMin(&currentTau, tau2); //atomicMin(&currentTau, tau2);
/* if(l==0) currentTau=sharedTau[l];
__syncthreads();
for(int o = 0; o < blockDim.x*blockDim.y; o++) for(int o = 0; o < blockDim.x*blockDim.y; o++)
{ {
if(l == o) if(l == o)
currentTau=Min(currentTau,sharedTau[l]); currentTau=Min(currentTau,sharedTau[l]);
__syncthreads(); __syncthreads();
} }*/
//end reduction //end reduction
__syncthreads();
for(unsigned int s = blockDim.x*blockDim.y/2; s>0; s>>=1)
{
if( l < s )
sharedTau[l] = Min(sharedTau[l],sharedTau[l+s]) ;
__syncthreads();
}
if(l==0) currentTau=sharedTau[l];
__syncthreads();
// //
__syncthreads(); //__syncthreads();
u[l] += currentTau * fu; u[l] += currentTau * fu;
//if(l==0) //if(l==0)
// printf("ct %f\n",currentTau); // printf("ct %f\n",currentTau);
...@@ -1535,7 +1555,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: ...@@ -1535,7 +1555,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::
caller->runSubgridCUDA(1,u,i); caller->runSubgridCUDA(1,u,i);
__syncthreads(); __syncthreads();
caller->insertSubgridCUDA(u[l],i); caller->insertSubgridCUDA(u[l],i);
__syncthreads(); //__syncthreads();
caller->getSubgridCUDA(i,caller, &u[l]); caller->getSubgridCUDA(i,caller, &u[l]);
__syncthreads(); __syncthreads();
} }
...@@ -1544,7 +1564,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: ...@@ -1544,7 +1564,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::
caller->runSubgridCUDA(2,u,i); caller->runSubgridCUDA(2,u,i);
__syncthreads(); __syncthreads();
caller->insertSubgridCUDA(u[l],i); caller->insertSubgridCUDA(u[l],i);
__syncthreads(); //__syncthreads();
caller->getSubgridCUDA(i,caller, &u[l]); caller->getSubgridCUDA(i,caller, &u[l]);
__syncthreads(); __syncthreads();
} }
...@@ -1553,7 +1573,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: ...@@ -1553,7 +1573,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::
caller->runSubgridCUDA(4,u,i); caller->runSubgridCUDA(4,u,i);
__syncthreads(); __syncthreads();
caller->insertSubgridCUDA(u[l],i); caller->insertSubgridCUDA(u[l],i);
__syncthreads(); //__syncthreads();
caller->getSubgridCUDA(i,caller, &u[l]); caller->getSubgridCUDA(i,caller, &u[l]);
__syncthreads(); __syncthreads();
} }
...@@ -1562,7 +1582,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: ...@@ -1562,7 +1582,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::
caller->runSubgridCUDA(8,u,i); caller->runSubgridCUDA(8,u,i);
__syncthreads(); __syncthreads();
caller->insertSubgridCUDA(u[l],i); caller->insertSubgridCUDA(u[l],i);
__syncthreads(); //__syncthreads();
caller->getSubgridCUDA(i,caller, &u[l]); caller->getSubgridCUDA(i,caller, &u[l]);
__syncthreads(); __syncthreads();
} }
...@@ -1574,7 +1594,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: ...@@ -1574,7 +1594,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::
caller->runSubgridCUDA(3,u,i); caller->runSubgridCUDA(3,u,i);
__syncthreads(); __syncthreads();
caller->insertSubgridCUDA(u[l],i); caller->insertSubgridCUDA(u[l],i);
__syncthreads(); //__syncthreads();
caller->getSubgridCUDA(i,caller, &u[l]); caller->getSubgridCUDA(i,caller, &u[l]);
__syncthreads(); __syncthreads();
} }
...@@ -1583,7 +1603,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: ...@@ -1583,7 +1603,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::
caller->runSubgridCUDA(5,u,i); caller->runSubgridCUDA(5,u,i);
__syncthreads(); __syncthreads();
caller->insertSubgridCUDA(u[l],i); caller->insertSubgridCUDA(u[l],i);
__syncthreads(); //__syncthreads();
caller->getSubgridCUDA(i,caller, &u[l]); caller->getSubgridCUDA(i,caller, &u[l]);
__syncthreads(); __syncthreads();
} }
...@@ -1592,7 +1612,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: ...@@ -1592,7 +1612,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::
caller->runSubgridCUDA(10,u,i); caller->runSubgridCUDA(10,u,i);
__syncthreads(); __syncthreads();
caller->insertSubgridCUDA(u[l],i); caller->insertSubgridCUDA(u[l],i);
__syncthreads(); //__syncthreads();
caller->getSubgridCUDA(i,caller, &u[l]); caller->getSubgridCUDA(i,caller, &u[l]);
__syncthreads(); __syncthreads();
} }
...@@ -1601,15 +1621,17 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: ...@@ -1601,15 +1621,17 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::
caller->runSubgridCUDA(12,u,i); caller->runSubgridCUDA(12,u,i);
__syncthreads(); __syncthreads();
caller->insertSubgridCUDA(u[l],i); caller->insertSubgridCUDA(u[l],i);
__syncthreads(); //__syncthreads();
caller->getSubgridCUDA(i,caller, &u[l]); caller->getSubgridCUDA(i,caller, &u[l]);
__syncthreads(); __syncthreads();
} }
caller->setBoundaryConditionCUDA(i, 0); if(l==0)
caller->setSubgridValueCUDA(i, caller->getSubgridValueCUDA(i) - 1 ); {
caller->setBoundaryConditionCUDA(i, 0);
caller->setSubgridValueCUDA(i, caller->getSubgridValueCUDA(i) - 1 );
}
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment