Commit 28491294 authored by Tomas Sobotik's avatar Tomas Sobotik
Browse files

Added reduction

parent a290ac2b
Loading
Loading
Loading
Loading
+59 −37
Original line number Diff line number Diff line
@@ -1058,7 +1058,7 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru

	if(u[0]*u[l] <= 0.0)
		atomicMax( &tmp, 1);
	__syncthreads();
	//__syncthreads();
	//printf("tmp = %d", tmp);


@@ -1074,7 +1074,7 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru
			value=Max(value,fabs(u[l]));
		__syncthreads();
	}
	__syncthreads();
	//__syncthreads();
	//atomicMax(&value,fabs(u[l]))

	if(l == 0)
@@ -1106,7 +1106,7 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru
		if(i > 0)
			u[i*this->n + j] = value;
	}
	__syncthreads();
	//__syncthreads();

   double time = 0.0;
   __shared__ double currentTau;
@@ -1129,17 +1129,17 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru
      /****
       * Compute the RHS
       */
	   __syncthreads();
	   //__syncthreads();


	   fu = schemeHost.getValueDev( this->subMesh, l, this->subMesh.getCellCoordinates(l), u, time, boundaryCondition);
      __syncthreads();
	   //__syncthreads();
      //printf("%d : %f\n", l, fu);


      //atomicMax(&maxResidue,fabs(fu));//maxResidue = fu. absMax();
      sharedRes[l]=fabs(fu);
    if(l == 0)
  /*  if(l == 0)
    	maxResidue = 0.0;
    __syncthreads();
    																												//start reduction
@@ -1148,49 +1148,69 @@ void tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::ru
  		if(l == o)
  			maxResidue=Max(maxResidue,sharedRes[l]);
  		__syncthreads();
  	}
  	__syncthreads();
  	}*/
  	//__syncthreads();
  																													//end reduction
    __syncthreads();
	for(unsigned int s = blockDim.x*blockDim.y/2; s>0; s>>=1)
	{
		if( l < s )
			sharedRes[l] = Max(sharedRes[l],sharedRes[l+s]) ;
		__syncthreads();
	}
	if(l==0) maxResidue=sharedRes[l];



    sharedTau[l]=currentTau;
    __syncthreads();
     // if(l == 0)
    	  if( this -> cflCondition * maxResidue != 0.0)
    		  sharedTau[l] =  this -> cflCondition / maxResidue;

      if(l == 0)
      {
    	  if( this -> cflCondition * maxResidue != 0.0)
    		  currentTau =  this -> cflCondition / maxResidue;
    	  if(sharedTau[l] > 1.0 * this->subMesh.getHx())
    		  sharedTau[l] = 1.0 * this->subMesh.getHx();

    	  if(currentTau > 1.0 * this->subMesh.getHx())
    	  {
    		  currentTau = 1.0 * this->subMesh.getHx();
    	  }
      if(l == 1)
    	  if( time + sharedTau[l] > finalTime ) sharedTau[l] = finalTime - time;

    	  if( time + currentTau > finalTime ) currentTau = finalTime - time;
      }
      __syncthreads();
      //__syncthreads();
 //

      //double tau2 = finalTime;
      sharedTau[l]= finalTime;
      if((u[l]+currentTau * fu)*u[l] < 0.0 && fu != 0.0 && u[l] != 0.0 )

      if((u[l]+sharedTau[l] * fu)*u[l] < 0.0 && fu != 0.0 && u[l] != 0.0 )
    	  sharedTau[l] = fabs(u[l]/(2.0*fu));

      	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  //start reduction
      __syncthreads();

      //atomicMin(&currentTau, tau2);
     /* if(l==0) currentTau=sharedTau[l];
      __syncthreads();
    	for(int o = 0; o < blockDim.x*blockDim.y; o++)
    	{
    		if(l == o)
    			currentTau=Min(currentTau,sharedTau[l]);
    		__syncthreads();
    	}
    	}*/
    																											//end reduction



    __syncthreads();
	for(unsigned int s = blockDim.x*blockDim.y/2; s>0; s>>=1)
	{
		if( l < s )
			sharedTau[l] = Min(sharedTau[l],sharedTau[l+s]) ;
		__syncthreads();
	}
	if(l==0) currentTau=sharedTau[l];
	__syncthreads();


//


      __syncthreads();
      //__syncthreads();
      u[l] += currentTau * fu;
      //if(l==0)
    	 // printf("ct %f\n",currentTau);
@@ -1535,7 +1555,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::
			caller->runSubgridCUDA(1,u,i);
			__syncthreads();
			caller->insertSubgridCUDA(u[l],i);
			__syncthreads();
			//__syncthreads();
			caller->getSubgridCUDA(i,caller, &u[l]);
			__syncthreads();
		}
@@ -1544,7 +1564,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::
			caller->runSubgridCUDA(2,u,i);
			__syncthreads();
			caller->insertSubgridCUDA(u[l],i);
			__syncthreads();
			//__syncthreads();
			caller->getSubgridCUDA(i,caller, &u[l]);
			__syncthreads();
		}
@@ -1553,7 +1573,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::
			caller->runSubgridCUDA(4,u,i);
			__syncthreads();
			caller->insertSubgridCUDA(u[l],i);
			__syncthreads();
			//__syncthreads();
			caller->getSubgridCUDA(i,caller, &u[l]);
			__syncthreads();
		}
@@ -1562,7 +1582,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::
			caller->runSubgridCUDA(8,u,i);
			__syncthreads();
			caller->insertSubgridCUDA(u[l],i);
			__syncthreads();
			//__syncthreads();
			caller->getSubgridCUDA(i,caller, &u[l]);
			__syncthreads();
		}
@@ -1574,7 +1594,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::
			caller->runSubgridCUDA(3,u,i);
			__syncthreads();
			caller->insertSubgridCUDA(u[l],i);
			__syncthreads();
			//__syncthreads();
			caller->getSubgridCUDA(i,caller, &u[l]);
			__syncthreads();
		}
@@ -1583,7 +1603,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::
			caller->runSubgridCUDA(5,u,i);
			__syncthreads();
			caller->insertSubgridCUDA(u[l],i);
			__syncthreads();
			//__syncthreads();
			caller->getSubgridCUDA(i,caller, &u[l]);
			__syncthreads();
		}
@@ -1592,7 +1612,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::
			caller->runSubgridCUDA(10,u,i);
			__syncthreads();
			caller->insertSubgridCUDA(u[l],i);
			__syncthreads();
			//__syncthreads();
			caller->getSubgridCUDA(i,caller, &u[l]);
			__syncthreads();
		}
@@ -1601,15 +1621,17 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::
			caller->runSubgridCUDA(12,u,i);
			__syncthreads();
			caller->insertSubgridCUDA(u[l],i);
			__syncthreads();
			//__syncthreads();
			caller->getSubgridCUDA(i,caller, &u[l]);
			__syncthreads();
		}


		if(l==0)
		{
			caller->setBoundaryConditionCUDA(i, 0);
			caller->setSubgridValueCUDA(i, caller->getSubgridValueCUDA(i) - 1 );

		}

	}