Commit d40a55e3 authored by Matouš Fencl's avatar Matouš Fencl Committed by Tomáš Oberhuber
Browse files

FIM method implemented for GPU and FIM-FSM implemented for CPU (parallel).

parent 08ec37be
Loading
Loading
Loading
Loading
+10 −8
Original line number Diff line number Diff line
@@ -74,12 +74,16 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >
                                         const MeshEntity& cell,
                                         const RealType velocity = 1.0 );
      
      __cuda_callable__ bool updateCell( volatile Real sArray[18][18],
      template< int sizeSArray >
      __cuda_callable__ bool updateCell( volatile Real *sArray,
                                         int thri, int thrj, const Real hx, const Real hy,
                                         const Real velocity = 1.0 );
      
      template< int sizeSArray >
      void updateBlocks( InterfaceMapType interfaceMap,
                         MeshFunctionType aux,
                         ArrayContainer BlockIterHost, int numThreadsPerBlock );
                         MeshFunctionType helpFunc,
                         ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ );
      
      void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY  );
};
@@ -119,9 +123,6 @@ T1 meet2DCondition( T1 a, T1 b, const T2 ha, const T2 hb, const T1 value, double
template < typename T1 >
__cuda_callable__ void sortMinims( T1 pom[] );

template < typename Index >
void GetNeighbours( TNL::Containers::Array< int, Devices::Host, Index > BlockIter, int numBlockX, int numBlockY );

#ifdef HAVE_CUDA
template < typename Real, typename Device, typename Index >
__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& input, 
@@ -134,11 +135,12 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
                                      Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux,
                                      bool *BlockIterDevice );

template < typename Real, typename Device, typename Index >
template < int sizeSArray, typename Real, typename Device, typename Index >
__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr,
                                      const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap,
                                      Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux,
                                      TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int ne = 1 );
                                      const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux,
                                      Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& helpFunc,
                                      TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock =0);

template < typename Index >
__global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice,
+1036 −1009
Original line number Diff line number Diff line
@@ -92,12 +92,15 @@ initInterface( const MeshFunctionPointer& _input,
template< typename Real,
        typename Device,
        typename Index >
template< int sizeSArray >
void
tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >::
updateBlocks( InterfaceMapType interfaceMap,
        MeshFunctionType aux,
              ArrayContainer BlockIterHost, int numThreadsPerBlock )
        MeshFunctionType helpFunc,
        ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ )
{
#pragma omp parallel for schedule( dynamic )
  for( int i = 0; i < BlockIterHost.getSize(); i++ )
  {
    if( BlockIterHost[ i ] )
@@ -105,19 +108,23 @@ updateBlocks( InterfaceMapType interfaceMap,
      MeshType mesh = interfaceMap.template getMesh< Devices::Host >();
      
      int dimX = mesh.getDimensions().x(); int dimY = mesh.getDimensions().y();
      int numOfBlockx = dimY/numThreadsPerBlock + ((dimY%numThreadsPerBlock != 0) ? 1:0);
      int numOfBlocky = dimX/numThreadsPerBlock + ((dimX%numThreadsPerBlock != 0) ? 1:0);
      //std::cout << "dimX = " << dimX << " ,dimY = " << dimY << std::endl;
      int numOfBlocky = dimY/numThreadsPerBlock + ((dimY%numThreadsPerBlock != 0) ? 1:0);
      int numOfBlockx = dimX/numThreadsPerBlock + ((dimX%numThreadsPerBlock != 0) ? 1:0);
      //std::cout << "numOfBlockx = " << numOfBlockx << " ,numOfBlocky = " << numOfBlocky << std::endl;
      int xkolik = numThreadsPerBlock + 1;
      int ykolik = numThreadsPerBlock + 1;
      
      int blIdx = i%numOfBlockx;
      int blIdy = i/numOfBlocky;
      int blIdy = i/numOfBlockx;
      //std::cout << "blIdx = " << blIdx << " ,blIdy = " << blIdy << std::endl;
      
      if( numOfBlockx - 1 == blIdx )
        xkolik = dimX - (blIdx)*numThreadsPerBlock+1;
      
      if( numOfBlocky -1 == blIdy )
        ykolik = dimY - (blIdy)*numThreadsPerBlock+1;
      //std::cout << "xkolik = " << xkolik << " ,ykolik = " << ykolik << std::endl;
      
      
      /*bool changed[numThreadsPerBlock*numThreadsPerBlock];
@@ -125,114 +132,133 @@ updateBlocks( InterfaceMapType interfaceMap,
      Real hx = mesh.getSpaceSteps().x();
      Real hy = mesh.getSpaceSteps().y();
      
      Real changed1[ 16*16 ];
      /*Real changed2[ 16*16 ];
      Real changed3[ 16*16 ];
      Real changed4[ 16*16 ];*/
      Real sArray[18][18];
      bool changed = false;
      
      for( int thri = 0; thri < numThreadsPerBlock + 2; thri++ )
        for( int thrj = 0; thrj < numThreadsPerBlock + 2; thrj++ )
          sArray[thrj][thri] = std::numeric_limits< Real >::max();
      
      RealType *sArray;
      sArray = new Real[ sizeSArray * sizeSArray ];
      if( sArray == nullptr )
        std::cout << "Error while allocating memory for sArray." << std::endl;
      
      for( int thri = 0; thri < sizeSArray; thri++ ){
        for( int thrj = 0; thrj < sizeSArray; thrj++ )
          sArray/*[i]*/[ thri * sizeSArray + thrj ] = std::numeric_limits< Real >::max();
      }
      
      BlockIterHost[ blIdy * numOfBlockx + blIdx ] = 0;
      
      for( int thrj = 0; thrj < numThreadsPerBlock + 1; thrj++ )
      {        
        if( dimX > (blIdx+1) * numThreadsPerBlock  && thrj+1 < ykolik )
          sArray[thrj+1][xkolik] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX + xkolik ];
        else
         sArray[thrj+1][xkolik] = std::numeric_limits< Real >::max();
          sArray/*[i]*/[ ( thrj+1 )* sizeSArray +xkolik] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX + xkolik ];
        
        
        if( blIdx != 0 && thrj+1 < ykolik )
          sArray[thrj+1][0] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX ];
        else
          sArray[thrj+1][0] = std::numeric_limits< Real >::max();
          sArray/*[i]*/[(thrj+1)* sizeSArray] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX ];
        
        if( dimY > (blIdy+1) * numThreadsPerBlock  && thrj+1 < xkolik )
          sArray[ykolik][thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + ykolik*dimX + thrj+1 ];
        else
          sArray[ykolik][thrj+1] = std::numeric_limits< Real >::max();
          sArray/*[i]*/[ykolik * sizeSArray + thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + ykolik*dimX + thrj+1 ];
        
        if( blIdy != 0 && thrj+1 < xkolik )
          sArray[0][thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + thrj+1 ];
        else
          sArray[0][thrj+1] = std::numeric_limits< Real >::max();
          sArray/*[i]*/[thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + thrj+1 ];
      }
      
      for( int k = 0; k < numThreadsPerBlock; k++ )
      for( int k = 0; k < numThreadsPerBlock; k++ ){
        for( int l = 0; l < numThreadsPerBlock; l++ )
          sArray[k+1][l+1] = aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx  + k*dimX + l ];

      for( int k = 0; k < numThreadsPerBlock; k++ ) 
          if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX )
            sArray/*[i]*/[(k+1) * sizeSArray + l+1] = aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx  + k*dimX + l ];
      }
      bool pom = false;
      for( int k = 0; k < numThreadsPerBlock; k++ ){ 
        for( int l = 0; l < numThreadsPerBlock; l++ ){
          changed1[ k*numThreadsPerBlock + l ] = 0;
          /*changed2[ k*numThreadsPerBlock + l ] = 0;
          changed3[ k*numThreadsPerBlock + l ] = 0;
          changed4[ k*numThreadsPerBlock + l ] = 0;*/
          if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx  + k*dimX + l < dimX*dimY )
          {
          if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ){
            //std::cout << "proslo i = " << k * numThreadsPerBlock + l << std::endl;
            if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx  + k*dimX + l ] )
            {
              changed1[ k*numThreadsPerBlock + l ] = this->updateCell( sArray, l+1, k+1, hx,hy);
              pom = this->updateCell<sizeSArray>( sArray/*[i]*/, l+1, k+1, hx,hy);
              changed = changed || pom;
            }
          }
        }
      }
      /*aux.save( "aux-1pruch.tnl" );
      for( int k = 0; k < sizeSArray; k++ ){ 
        for( int l = 0; l < sizeSArray; l++ ) {
          std::cout << sArray[ k * sizeSArray + l] << " ";
        }
        std::cout << std::endl;
      }*/
           
      for( int k = numThreadsPerBlock-1; k > -1; k-- ) 
        for( int l = 0; l < numThreadsPerBlock; l++ ) { 
          if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx  + k*dimX + l < dimX*dimY )
      for( int k = 0; k < numThreadsPerBlock; k++ ) 
        for( int l = numThreadsPerBlock-1; l >-1; l-- ) { 
          if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX )
          {
            if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx  + k*dimX + l ] )
            {
              /*changed2[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy);
              this->updateCell<sizeSArray>( sArray/*[i]*/, l+1, k+1, hx,hy);
            }
          }
        }
      /*aux.save( "aux-2pruch.tnl" );
      for( int k = 0; k < sizeSArray; k++ ){ 
        for( int l = 0; l < sizeSArray; l++ ) {
          std::cout << sArray[ k * sizeSArray + l] << " ";
        }
        std::cout << std::endl;
      }*/
      
      for( int k = 0; k < numThreadsPerBlock; k++ ) 
        for( int l = numThreadsPerBlock-1; l >-1; l-- ) { 
          if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx  + k*dimX + l < dimX*dimY )
      for( int k = numThreadsPerBlock-1; k > -1; k-- ) 
        for( int l = 0; l < numThreadsPerBlock; l++ ) {
          if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX )
          {
            if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx  + k*dimX + l ] )
            {
              /*changed3[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy);
              this->updateCell<sizeSArray>( sArray/*[i]*/, l+1, k+1, hx,hy);
            }
          }
        }
      /*aux.save( "aux-3pruch.tnl" );
      for( int k = 0; k < sizeSArray; k++ ){ 
        for( int l = 0; l < sizeSArray; l++ ) {
          std::cout << sArray[ k * sizeSArray + l] << " ";
        }
        std::cout << std::endl;
      }*/
      
      for( int k = numThreadsPerBlock-1; k > -1; k-- ) 
      for( int k = numThreadsPerBlock-1; k > -1; k-- ){
        for( int l = numThreadsPerBlock-1; l >-1; l-- ) { 
          if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx  + k*dimX + l < dimX*dimY )
          if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX )
          {
            if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx  + k*dimX + l ] )
            {
              /*changed4[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy);
              this->updateCell<sizeSArray>( sArray/*[i]*/, l+1, k+1, hx,hy);
            }
          }
        }

      for( int k = numThreadsPerBlock-1; k > -1; k-- ) 
        for( int l = numThreadsPerBlock-1; l >-1; l-- ){
          changed1[ 0 ] = changed1[ 0 ] || changed1[ k*numThreadsPerBlock + l ];
          /*changed2[ 0 ] = changed2[ 0 ] || changed2[ k*numThreadsPerBlock + l ];
          changed3[ 0 ] = changed3[ 0 ] || changed3[ k*numThreadsPerBlock + l ];
          changed4[ 0 ] = changed4[ 0 ] || changed4[ k*numThreadsPerBlock + l ];*/
      }
      /*aux.save( "aux-4pruch.tnl" );
      for( int k = 0; k < sizeSArray; k++ ){ 
        for( int l = 0; l < sizeSArray; l++ ) {
          std::cout << sArray[ k * sizeSArray + l] << " ";
        }
        std::cout << std::endl;
      }*/
      
      if( changed1[ 0 ] /*|| changed2[ 0 ] ||changed3[ 0 ] ||changed4[ 0 ]*/ )
      
      if( changed ){
        BlockIterHost[ blIdy * numOfBlockx + blIdx ] = 1;
      }
      
      
      for( int k = 0; k < numThreadsPerBlock; k++ ){ 
        for( int l = 0; l < numThreadsPerBlock; l++ ) {
          if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx  + k*dimX + l < dimX*dimY &&
              (!interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx  + k*dimX + l ]) )
            aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx  + k*dimX + l ] = sArray[ k + 1 ][ l + 1 ];
          if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX )      
            helpFunc[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx  + k*dimX + l ] = sArray/*[i]*/[ (k + 1)* sizeSArray + l + 1 ];
          //std::cout<< sArray[k+1][l+1];
        }
        //std::cout<<std::endl;
      }
      //delete []sArray;
    }
  }
}
@@ -249,27 +275,27 @@ getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY )
  
  for(int i = 0; i < numBlockX * numBlockY; i++)
  {
    BlockIterPom[ i ] = 0;  
    if( BlockIterHost[ i ] )
    {
      // i = k*numBlockY + m;
    BlockIterPom[ i ] = 0;//BlockIterPom[ i ] = 0;
    int m=0, k=0;
    m = i%numBlockX;
    k = i/numBlockX;
      if( k > 0 )
        BlockIterPom[i - numBlockX] = 1;
      if( k < numBlockY - 1 )
        BlockIterPom[i + numBlockX] = 1;
      
      if( m < numBlockX - 1 )
        BlockIterPom[ i+1 ] = 1;
      if( m > 0 )
        BlockIterPom[ i-1 ] = 1;
    if( m > 0 && BlockIterHost[ i - 1 ] ){
      BlockIterPom[ i ] = 1;
    }else if( m < numBlockX -1 && BlockIterHost[ i + 1 ] ){
      BlockIterPom[ i ] = 1;
    }else if( k > 0 && BlockIterHost[ i - numBlockX ] ){
      BlockIterPom[ i ] = 1;
    }else if( k < numBlockY -1 && BlockIterHost[ i + numBlockX ] ){
      BlockIterPom[ i ] = 1;
    }
    //BlockIterPom[ i ];
  }
  
  for(int i = 0; i < numBlockX * numBlockY; i++)
      //if( !BlockIter[ i ] )
  {
    if( !BlockIterHost[ i ] )
      BlockIterHost[ i ] = BlockIterPom[ i ];
  }
  /*else
   BlockIter[ i ] = 0;*/
  /*for( int i = numBlockX-1; i > -1; i-- )
@@ -353,11 +379,11 @@ initInterface( const MeshFunctionPointer& _input,
     
     for (int i = 0; i < 320; i++)
     for (int j = 0; j < 320; j++)
                fileInit >> A[i][j];
     fileInit >> A[j];
     fileInit.close();
     for (int i = 0; i < 320; i++)
     for (int j = 0; j < 320; j++)
                input[i*320 + j] = A[i][j];*/
     input[i*320 + j] = A[j];*/
    
    
    MeshFunctionType& output = _output.modifyData();
@@ -1110,20 +1136,21 @@ __global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3
template< typename Real,
        typename Device,
        typename Index >
template< int sizeSArray >
__cuda_callable__
bool
tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >::
updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, const Real hy,
updateCell( volatile Real *sArray, int thri, int thrj, const Real hx, const Real hy,
        const Real v )
{
   const RealType value = sArray[ thrj ][ thri ];
  const RealType value = sArray[ thrj * sizeSArray + thri ];
  RealType a, b, tmp = std::numeric_limits< RealType >::max();
  
   b = TNL::argAbsMin( sArray[ thrj+1 ][ thri ],
                        sArray[ thrj-1 ][ thri ] );
  b = TNL::argAbsMin( sArray[ (thrj+1) * sizeSArray + thri ],
          sArray[ (thrj-1) * sizeSArray + thri ] );
  
   a = TNL::argAbsMin( sArray[ thrj ][ thri+1 ],
                        sArray[ thrj ][ thri-1 ] );
  a = TNL::argAbsMin( sArray[ thrj * sizeSArray + thri+1 ],
          sArray[ thrj * sizeSArray + thri-1 ] );
  
  if( fabs( a ) == std::numeric_limits< RealType >::max() && 
          fabs( b ) == std::numeric_limits< RealType >::max() )
@@ -1136,8 +1163,8 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con
  
  if( fabs( tmp ) < fabs( pom[ 1 ] ) ) 
  {
        sArray[ thrj ][ thri ] = argAbsMin( value, tmp );
        tmp = value - sArray[ thrj ][ thri ];
    sArray[ thrj * sizeSArray + thri ] = argAbsMin( value, tmp );
    tmp = value - sArray[ thrj * sizeSArray + thri ];
    if ( fabs( tmp ) >  0.001*hx )
      return true;
    else
@@ -1148,8 +1175,8 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con
    tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + 
            TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] +  pom[ 4 ] *  pom[ 4 ] )/( v * v ) - 
            ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] );
        sArray[ thrj ][ thri ] = argAbsMin( value, tmp );
        tmp = value - sArray[ thrj ][ thri ];
    sArray[ thrj * sizeSArray + thri ] = argAbsMin( value, tmp );
    tmp = value - sArray[ thrj * sizeSArray + thri ];
    if ( fabs( tmp ) > 0.001*hx )
      return true;
    else
+394 −226

File changed.

Preview size limit exceeded, changes collapsed.