Commit 204f7f1d authored by Matouš Fencl's avatar Matouš Fencl Committed by Tomáš Oberhuber
Browse files

2D and 3D solvers extended with MPI (3D has issue on biggest mesh)

parent df7abcac
Loading
Loading
Loading
Loading
+76 −51
Original line number Diff line number Diff line
#!/bin/bash

device="host"
dimensions="2D 3D"
#dimensions="3D"
#dimensions="2D 3D"
dimensions="2D"
sizes1D="16 32 64 128 256 512 1024 2048 4096"
#sizes1D="256"
sizes2D="16 32 64 128 256 512 1024"
#sizes2D="8"
sizes3D="16 32 64 128 256"
sizes2D="16 32 64 128 256 512 1024 2048 4096"
#sizes2D="16"
sizes3D="8 16 32 64 128 256"
#sizes3D="128 256"
testFunctions="paraboloid"
#testFunctions="sin-wave-sdf"
#testFunctions="sin-bumps-sdf"
snapshotPeriod=0.1
finalTime=1.5
solverName="tnl-direct-eikonal-solver"
#solverName="gdb --args tnl-direct-eikonal-solver-dbg --catch-exceptions no"
#
realType="double"
#mpiRun="mpirun -np 4 -oversubscribe "
mpiRun=""

## CAREFULL: If you set LocalCopy of MPI, you have to start with mpiRun even tnl-init
## 	     This isnt problem with MpiIO.
## CAREFULL: For smoothly calculated error, you have to choose the right output function which
##           is for both MpiIO, LocalCopy different.

solverName=${mpiRun}"tnl-direct-eikonal-solver"
#solverName="gdb --args "${mpiRun}"tnl-direct-eikonal-solver-dbg --catch-exceptions no --mpi-gdb-debug false"
#scale=2.0
#finalSdf="aux-0.tnl aux-1.tnl"
finalSdf="aux-final.tnl"

setupTestFunction()
{
@@ -22,16 +36,17 @@ setupTestFunction()
#   then
      origin=-1.0
      proportions=2.0
#      origin=-1.0
#      proportions=2.0
      amplitude=1.0
      waveLength=0.2
      waveLengthX=0.2
      waveLengthY=0.2
      waveLengthZ=0.2
      wavesNumber=3.0
      wavesNumberX=0.5
      wavesNumberY=2.0
      waveLength=0.4
      waveLengthX=0.5
      waveLengthY=0.5
      waveLengthZ=0.5
      wavesNumber=1.25
      wavesNumberX=0.5      wavesNumberY=2.0
      wavesNumberZ=3.0
      phase=0.1
      phase=-1.5
      phaseX=0.0
      phaseY=0.0
      phaseZ=0.0
@@ -44,6 +59,7 @@ setupGrid()
{
   dimensions=$1
   gridSize=$2
   #scale=$3
   tnl-grid-setup --dimensions ${dimensions} \
                  --origin-x ${origin} \
                  --origin-y ${origin} \
@@ -53,13 +69,16 @@ setupGrid()
                  --proportions-z ${proportions} \
                  --size-x ${gridSize} \
                  --size-y ${gridSize} \
	    	  --real-type ${realType} \
                  --size-z ${gridSize}
#$((2*${gridSize})) 
}

setInitialCondition()
{
   testFunction=$1
   tnl-init --test-function ${testFunction} \
	    		 --real-type ${realType} \
            		 --output-file initial-u.tnl \
            		 --amplitude ${amplitude} \
            		 --wave-length ${waveLength} \
@@ -78,6 +97,7 @@ setInitialCondition()
            		 --radius ${radius}
   
   tnl-init --test-function ${testFunction}-sdf \
	    		 --real-type ${realType} \
            		 --output-file exact-u.tnl \
            		 --amplitude ${amplitude} \
            		 --wave-length ${waveLength} \
@@ -89,11 +109,11 @@ setInitialCondition()
            		 --waves-number-y ${wavesNumberY} \
            		 --waves-number-z ${wavesNumberZ} \
            		 --phase ${phase} \
            --phase-x ${phaseX} \
            --phase-y ${phaseY} \
            		 --phase-x ${phaseZ} \
            		 --phase-y ${phaseZ} \
            		 --phase-z ${phaseZ} \
            		 --sigma ${sigma} \
            --radius ${radius}
            		 --radius ${radius} \

}

@@ -111,17 +131,22 @@ solve()
                 --min-iterations 20 \
                 --convergence-residue 1.0e-12 \
                 --snapshot-period ${snapshotPeriod} \
		 --real-type ${realType} \
                 --final-time ${finalTime}
}
               
computeError()
{
for sweep in ${finalSdf}
do
   tnl-diff --mesh mesh.tnl \
            --input-files aux-final.tnl exact-u.tnl \
	    --input-files exact-u.tnl u-00000.tnl \
            --mode sequence \
            --snapshot-period ${snapshotPeriod} \
            --output-file errors.txt \
            --write-difference yes
#aux-final.tnl \
done
}

runTest()
+14 −4
Original line number Diff line number Diff line
@@ -62,12 +62,15 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >
    typedef Functions::MeshFunction< MeshType > MeshFunctionType;
    typedef Functions::MeshFunction< MeshType, 2, bool > InterfaceMapType;
    typedef TNL::Containers::Array< int, Device, IndexType > ArrayContainer;
    typedef Containers::StaticVector< 2, Index > StaticVector;
    using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >;
    using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >;
    
    
    void initInterface( const MeshFunctionPointer& input,
            MeshFunctionPointer& output,
            InterfaceMapPointer& interfaceMap );
            InterfaceMapPointer& interfaceMap,
            StaticVector vLower, StaticVector vUpper );
    
    template< typename MeshEntity >
    __cuda_callable__ bool updateCell( MeshFunctionType& u,
@@ -101,15 +104,17 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >
    typedef Functions::MeshFunction< MeshType > MeshFunctionType;
    typedef Functions::MeshFunction< MeshType, 3, bool > InterfaceMapType;
    typedef TNL::Containers::Array< int, Device, IndexType > ArrayContainer;
    typedef Containers::StaticVector< 3, Index > StaticVector;
    using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >;
    using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >;      
    
    void initInterface( const MeshFunctionPointer& input,
            MeshFunctionPointer& output,
            InterfaceMapPointer& interfaceMap );
            InterfaceMapPointer& interfaceMap,
            StaticVector vLower, StaticVector vUpper );
    
    template< typename MeshEntity >
    __cuda_callable__ void updateCell( MeshFunctionType& u,
    __cuda_callable__ bool updateCell( MeshFunctionType& u,
            const MeshEntity& cell,
            const RealType velocity = 1.0);
    
@@ -154,6 +159,10 @@ template< typename Real, typename Device, typename Index >
__global__ void DeepCopy( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux,
        Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& helpFunc, int copy, int k );

template< typename Real, typename Device, typename Index >
__global__ void DeepCopy( const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux,
        Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& helpFunc, int copy, int k );

template < typename Index >
__global__ void CudaParallelReduc( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice,
        TNL::Containers::ArrayView< int, Devices::Cuda, Index > dBlock, int nBlocks );
@@ -171,7 +180,8 @@ __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2,
template < typename Real, typename Device, typename Index >
__global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& input, 
        Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& output,
        Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap );
        Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap,
        Containers::StaticVector< 3, Index > vLower, Containers::StaticVector< 3, Index > vUpper );

template < int sizeSArray, typename Real, typename Device, typename Index >
__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr,
+113 −91
Original line number Diff line number Diff line
@@ -715,17 +715,14 @@ void
tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >::
initInterface( const MeshFunctionPointer& _input,
        MeshFunctionPointer& _output,
        InterfaceMapPointer& _interfaceMap )
        InterfaceMapPointer& _interfaceMap, 
        StaticVector vLower, StaticVector vUpper )
{
  
  if( std::is_same< Device, Devices::Cuda >::value )
  {
#ifdef HAVE_CUDA
    const MeshType& mesh = _input->getMesh();
    Meshes::DistributedMeshes::DistributedMesh< MeshType >* meshPom = mesh.getDistributedMesh();
    
    Containers::StaticVector< 2, Index > vLower = meshPom->getLowerOverlap();
    Containers::StaticVector< 2, Index > vUpper = meshPom->getUpperOverlap();
    
    const int cudaBlockSize( 16 );
    int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize );
@@ -748,7 +745,7 @@ initInterface( const MeshFunctionPointer& _input,
    InterfaceMapType& interfaceMap = _interfaceMap.modifyData();
    const MeshType& mesh = input.getMesh();
/*#ifdef HAVE_MPI
    int i = Communicators::MpiCommunicator::GetRank( Communicators::MpiCommunicator::AllGroup );
    int i>s::>::GetRan>s::>::AllGroup );
    if( i == 0 )
    {
      printf( "0: mesh x: %d\n", mesh.getDimensions().x() );
@@ -778,11 +775,11 @@ initInterface( const MeshFunctionPointer& _input,
    
    const RealType& hx = mesh.getSpaceSteps().x();
    const RealType& hy = mesh.getSpaceSteps().y();     
    for( cell.getCoordinates().y() = 0;
            cell.getCoordinates().y() < mesh.getDimensions().y();
    for( cell.getCoordinates().y() = 0 + vLower[1];
            cell.getCoordinates().y() < mesh.getDimensions().y() - vUpper[1];
            cell.getCoordinates().y() ++ )
      for( cell.getCoordinates().x() = 0;
              cell.getCoordinates().x() < mesh.getDimensions().x();
      for( cell.getCoordinates().x() = 0 + vLower[0];
              cell.getCoordinates().x() < mesh.getDimensions().x() - vUpper[0];
              cell.getCoordinates().x() ++ )
      {
        cell.refresh();
@@ -856,7 +853,7 @@ initInterface( const MeshFunctionPointer& _input,
        }
      }
#ifdef HAVE_MPI
    //int i = Communicators::MpiCommunicator::GetRank( Communicators::MpiCommunicator::AllGroup );
    //int i>s::>::GetRan>s::>::AllGroup );
    /*if( i == 0 )
    {
      printf( "0: mesh x: %d\n", mesh.getDimensions().x() );
@@ -951,7 +948,8 @@ void
tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >::
initInterface( const MeshFunctionPointer& _input,
        MeshFunctionPointer& _output,
        InterfaceMapPointer& _interfaceMap  )
        InterfaceMapPointer& _interfaceMap, 
        StaticVector vLower, StaticVector vUpper )
{
  if( std::is_same< Device, Devices::Cuda >::value )
  {
@@ -969,7 +967,7 @@ initInterface( const MeshFunctionPointer& _input,
    Devices::Cuda::synchronizeDevice();
    CudaInitCaller3d<<< gridSize, blockSize >>>( _input.template getData< Device >(),
            _output.template modifyData< Device >(),
            _interfaceMap.template modifyData< Device >() );
            _interfaceMap.template modifyData< Device >(), vLower, vUpper );
    cudaDeviceSynchronize();
    TNL_CHECK_CUDA_DEVICE;
#endif
@@ -979,8 +977,10 @@ initInterface( const MeshFunctionPointer& _input,
    const MeshFunctionType& input =  _input.getData();
    MeshFunctionType& output =  _output.modifyData();
    InterfaceMapType& interfaceMap =  _interfaceMap.modifyData();
    
    const MeshType& mesh = input.getMesh();
    typedef typename MeshType::Cell Cell;
    
    Cell cell( mesh );
    for( cell.getCoordinates().z() = 0;
            cell.getCoordinates().z() < mesh.getDimensions().z();
@@ -1002,14 +1002,14 @@ initInterface( const MeshFunctionPointer& _input,
    const RealType& hx = mesh.getSpaceSteps().x();
    const RealType& hy = mesh.getSpaceSteps().y();
    const RealType& hz = mesh.getSpaceSteps().z();
    for( cell.getCoordinates().z() = 0;
            cell.getCoordinates().z() < mesh.getDimensions().z();
    for( cell.getCoordinates().z() = 0 + vLower[2];
            cell.getCoordinates().z() < mesh.getDimensions().z() - vUpper[2];
            cell.getCoordinates().z() ++ )   
      for( cell.getCoordinates().y() = 0;
              cell.getCoordinates().y() < mesh.getDimensions().y();
      for( cell.getCoordinates().y() = 0 + vLower[1];
              cell.getCoordinates().y() < mesh.getDimensions().y() - vUpper[1];
              cell.getCoordinates().y() ++ )
        for( cell.getCoordinates().x() = 0;
                cell.getCoordinates().x() < mesh.getDimensions().x();
        for( cell.getCoordinates().x() = 0 + vLower[0];
                cell.getCoordinates().x() < mesh.getDimensions().x() - vUpper[0];
                cell.getCoordinates().x() ++ )
        {
          cell.refresh();
@@ -1062,10 +1062,6 @@ initInterface( const MeshFunctionPointer& _input,
              interfaceMap[ t ] = true;
            }  
          }
          /*output[ cell.getIndex() ] =
           c > 0 ? TypeInfo< RealType >::getMaxValue() :
           -TypeInfo< RealType >::getMaxValue();
           interfaceMap[ cell.getIndex() ] = false;*/ //is on line 245
        }
  }
}
@@ -1075,7 +1071,7 @@ template< typename Real,
        typename Index >
template< typename MeshEntity >
__cuda_callable__
void
bool
tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >::
updateCell( MeshFunctionType& u,
        const MeshEntity& cell, 
@@ -1101,6 +1097,7 @@ updateCell( MeshFunctionType& u,
    a = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< -1, 0, 0 >() ],
            u[ neighborEntities.template getEntityIndex< 1, 0, 0 >() ] );
  }
  
  if( cell.getCoordinates().y() == 0 )
    b = u[ neighborEntities.template getEntityIndex< 0, 1, 0 >() ];
  else if( cell.getCoordinates().y() == mesh.getDimensions().y() - 1 )
@@ -1109,7 +1106,9 @@ updateCell( MeshFunctionType& u,
  {
    b = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< 0, -1, 0 >() ],
            u[ neighborEntities.template getEntityIndex< 0, 1, 0 >() ] );
  }if( cell.getCoordinates().z() == 0 )
  }
  
  if( cell.getCoordinates().z() == 0 )
    c = u[ neighborEntities.template getEntityIndex< 0, 0, 1 >() ];
  else if( cell.getCoordinates().z() == mesh.getDimensions().z() - 1 )
    c = u[ neighborEntities.template getEntityIndex< 0, 0, -1 >() ];
@@ -1118,17 +1117,25 @@ updateCell( MeshFunctionType& u,
    c = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< 0, 0, -1 >() ],
            u[ neighborEntities.template getEntityIndex< 0, 0, 1 >() ] );
  }
  
  if( fabs( a ) == std::numeric_limits< RealType >::max() && 
          fabs( b ) == std::numeric_limits< RealType >::max() &&
          fabs( c ) == std::numeric_limits< RealType >::max() )
    return;
    return false;
  
  RealType pom[6] = { a, b, c, (RealType)hx, (RealType)hy, (RealType)hz};
  sortMinims( pom );   
  tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ];
  
  if( fabs( tmp ) < fabs( pom[ 1 ] ) )
  {
    u[ cell.getIndex() ] = argAbsMin( value, tmp );
    tmp = value - u[ cell.getIndex() ];
    if ( fabs( tmp ) > 0.001*hx ){
      return true;
    }else{
      return false;
    }
  }
  else
  {
@@ -1138,6 +1145,12 @@ updateCell( MeshFunctionType& u,
    if( fabs( tmp ) < fabs( pom[ 2 ]) ) 
    {
      u[ cell.getIndex() ] = argAbsMin( value, tmp );
      tmp = value - u[ cell.getIndex() ];
      if ( fabs( tmp ) > 0.001*hx ){
        return true;
      }else{
        return false;
      }
    }
    else
    {
@@ -1146,6 +1159,12 @@ updateCell( MeshFunctionType& u,
              hz * hz * ( a - b ) * ( a - b ) - hy * hy * ( a - c ) * ( a - c ) -
              hx * hx * ( b - c ) * ( b - c ) ) )/( hx * hx * hy * hy + hy * hy * hz * hz + hz * hz * hx *hx );
      u[ cell.getIndex() ] = argAbsMin( value, tmp );
      tmp = value - u[ cell.getIndex() ];
      if ( fabs( tmp ) > 0.001*hx ){
        return true;
      }else{
        return false;
      }
    }
  }
}
@@ -1391,7 +1410,7 @@ __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2,
              - std::numeric_limits< Real >::max();
    interfaceMap[ cind ] = false; 
    
    if( i < mesh.getDimensions().x() - vUpper[0] && j < mesh.getDimensions().y() - vUpper[1] && i>vLower[0] && j> vLower[0] )
    if( i < mesh.getDimensions().x() - vUpper[0] && j < mesh.getDimensions().y() - vUpper[1] && i>vLower[0] -1 && j> vLower[0]-1 )
    {
      const Real& hx = mesh.getSpaceSteps().x();
      const Real& hy = mesh.getSpaceSteps().y();
@@ -1446,7 +1465,8 @@ __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2,
template < typename Real, typename Device, typename Index >
__global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& input, 
        Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& output,
        Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap )
        Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap,
        Containers::StaticVector< 3, Index > vLower, Containers::StaticVector< 3, Index > vUpper )
{
  int i = threadIdx.x + blockDim.x*blockIdx.x;
  int j = blockDim.y*blockIdx.y + threadIdx.y;
@@ -1468,6 +1488,9 @@ __global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3
    interfaceMap[ cind ] = false; 
    cell.refresh();
    
    if( i < mesh.getDimensions().x() - vUpper[0] && j < mesh.getDimensions().y() - vUpper[1] &&
            k < mesh.getDimensions().y() - vUpper[2] && i>vLower[0]-1 && j> vLower[1]-1 && k>vLower[2]-1 )
    {
      const Real& hx = mesh.getSpaceSteps().x();
      const Real& hy = mesh.getSpaceSteps().y();
      const Real& hz = mesh.getSpaceSteps().z();
@@ -1534,8 +1557,7 @@ __global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3
      }
    }
  }


}


template< typename Real,
+1 −13
Original line number Diff line number Diff line
@@ -124,7 +124,6 @@ setInitialCondition( const Config::ParameterContainer& parameters,
  this->bindDofs( dofs );
  String inputFile = parameters.getParameter< String >( "input-file" );
  this->initialData->setMesh( this->getMesh() ); 
  std::cout<<"setInitialCondition" <<std::endl; 
  if( CommunicatorType::isDistributed() )
  {
    std::cout<<"Nodes Distribution: " << initialData->getMesh().getDistributedMesh()->printProcessDistr() << std::endl;
@@ -191,20 +190,9 @@ bool
tnlDirectEikonalProblem< Mesh, Communicator, Anisotropy, Real, Index >::
solve( DofVectorPointer& dofs )
{
   std::cout << "We are in solve()." << std::endl;
   FastSweepingMethod< MeshType, Communicator,AnisotropyType > fsm;
   fsm.solve( this->getMesh(), u, anisotropy, initialData );
   
   /*int i = Communicators::MpiCommunicator::GetRank( Communicators::MpiCommunicator::AllGroup );
   const MeshPointer msh = this->getMesh();
   if( i == 0 &&  msh->getMeshDimension() == 2 )
   {
     for( int k = 0; k < 9; k++ ){
       for( int l = 0; l < msh->getDimensions().x(); l++ )
         printf("%.2f\t",(*initialData)[ k * msh->getDimensions().x() + l ] );
       printf("\n");
     }
   }*/
   makeSnapshot();
   return true;
}
+4 −0
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
#include <TNL/Functions/Analytic/Constant.h>
#include <TNL/Pointers/SharedPointer.h>
#include "tnlDirectEikonalMethodsBase.h"
#define ForDebug false // false <=> off


template< typename Mesh,
@@ -132,8 +133,11 @@ class FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Communicator,
    typedef Index IndexType;
    typedef Anisotropy AnisotropyType;
    typedef tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > BaseType;
    typedef Communicator CommunicatorType;
    
    using MeshPointer = Pointers::SharedPointer<  MeshType >;
    using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >;
    using MPI = Communicators::MpiCommunicator;
    
    using typename BaseType::InterfaceMapType;
    using typename BaseType::MeshFunctionType;
Loading