Commit b8ae1e27 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

MPI refactoring: moved setup and configSetup from MpiCommunicator into a separate header

parent 3ef7f564
Loading
Loading
Loading
Loading
+30 −105
Original line number Diff line number Diff line
@@ -10,26 +10,12 @@

#pragma once

#include <iostream>

#ifdef HAVE_MPI
#ifdef OMPI_MAJOR_VERSION
   // header specific to OpenMPI (needed for CUDA-aware detection)
   #include <mpi-ext.h>
#endif

#include <unistd.h>  // getpid
#endif

#include <TNL/String.h>
#include <TNL/Logger.h>
#include <TNL/MPI/Wrappers.h>
#include <TNL/MPI/DummyDefs.h>
#include <TNL/MPI/Utils.h>
#include <TNL/Config/ConfigDescription.h>
#include <TNL/MPI/Config.h>
#include <TNL/Exceptions/MPIDimsCreateError.h>


namespace TNL {
//! \brief Namespace for TNL communicators.
namespace Communicators {
@@ -58,75 +44,13 @@ class MpiCommunicator

      static void configSetup( Config::ConfigDescription& config, const String& prefix = "" )
      {
#ifdef HAVE_MPI
         config.addEntry< bool >( "redirect-mpi-output", "Only process with rank 0 prints to console. Other processes are redirected to files.", true );
         config.addEntry< String >( "redirect-mpi-output-dir", "Directory where ranks will store the files if their output is redirected.", "." );
         config.addEntry< bool >( "mpi-gdb-debug", "Wait for GDB to attach the master MPI process.", false );
         config.addEntry< int >( "mpi-process-to-attach", "Number of the MPI process to be attached by GDB. Set -1 for all processes.", 0 );
#endif
         MPI::configSetup( config, prefix );
      }

      static bool setup( const Config::ParameterContainer& parameters,
                         const String& prefix = "" )
      {
#ifdef HAVE_MPI
         if(IsInitialized())//i.e. - isUsed
         {
            const bool redirect = parameters.getParameter< bool >( "redirect-mpi-output" );
            const String outputDirectory = parameters.getParameter< String >( "redirect-mpi-output-dir" );
            if( redirect )
               MPI::setupRedirection( outputDirectory );
#ifdef HAVE_CUDA
            int size;
            MPI_Comm_size( MPI_COMM_WORLD, &size );
            if( size > 1 )
            {
   #if defined( MPIX_CUDA_AWARE_SUPPORT ) && MPIX_CUDA_AWARE_SUPPORT
               std::cout << "CUDA-aware MPI detected on this system ... " << std::endl;
   #elif defined( MPIX_CUDA_AWARE_SUPPORT ) && !MPIX_CUDA_AWARE_SUPPORT
               std::cerr << "MPI is not CUDA-aware. Please install correct version of MPI." << std::endl;
               return false;
   #else
               std::cerr << "WARNING: TNL cannot detect if you have CUDA-aware MPI. Some problems may occur." << std::endl;
   #endif
            }
#endif // HAVE_CUDA
            bool gdbDebug = parameters.getParameter< bool >( "mpi-gdb-debug" );
            int processToAttach = parameters.getParameter< int >( "mpi-process-to-attach" );

            if( gdbDebug )
            {
               int rank = GetRank( MPI_COMM_WORLD );
               int pid = getpid();

               volatile int tnlMPIDebugAttached = 0;
               MPI_Send( &pid, 1, MPI_INT, 0, 0, MPI_COMM_WORLD );
               MPI_Barrier( MPI_COMM_WORLD );
               if( rank == 0 )
               {
                  std::cout << "Attach GDB to MPI process(es) by entering:" << std::endl;
                  for( int i = 0; i < GetSize( MPI_COMM_WORLD ); i++ )
                  {
                     MPI_Status status;
                     int recvPid;
                     MPI_Recv( &recvPid, 1, MPI_INT, i, 0, MPI_COMM_WORLD, &status );

                     if( i == processToAttach || processToAttach == -1 )
                     {
                        std::cout << "  For MPI process " << i << ": gdb -q -ex \"attach " << recvPid << "\""
                                  << " -ex \"set variable tnlMPIDebugAttached=1\""
                                  << " -ex \"continue\"" << std::endl;
                     }
                  }
                  std::cout << std::flush;
               }
               if( rank == processToAttach || processToAttach == -1 )
                  while( ! tnlMPIDebugAttached );
               MPI_Barrier( MPI_COMM_WORLD );
            }
         }
#endif // HAVE_MPI
         return true;
         return MPI::setup( parameters, prefix );
      }

      static void Init( int& argc, char**& argv, int required_thread_level = MPI_THREAD_SINGLE )
@@ -157,32 +81,6 @@ class MpiCommunicator
         return MPI::GetSize( group );
      }

      //dim-number of dimensions, distr array of guess distr - 0 for computation
      //distr array will be filled by computed distribution
      //more information in MPI documentation
      static void DimsCreate(int nproc, int dim, int *distr)
      {
#ifdef HAVE_MPI
         int sum = 0, prod = 1;
         for( int i = 0;i < dim; i++ ) {
            sum += distr[ i ];
            prod *= distr[ i ];
         }
         if( prod != 0 && prod != GetSize( AllGroup ) )
            throw Exceptions::MPIDimsCreateError();
         if(sum==0) {
            for(int i=0;i<dim-1;i++)
               distr[i]=1;
            distr[dim-1]=0;
         }

         MPI_Dims_create(nproc, dim, distr);
#else
         for(int i=0;i<dim;i++)
            distr[i]=1;
#endif
      }

      static void Barrier( CommunicationGroup group = AllGroup )
      {
         MPI::Barrier( group );
@@ -278,6 +176,33 @@ class MpiCommunicator
         MPI::Alltoall( sendData, sendCount, receiveData, receiveCount, group );
      }


      //dim-number of dimensions, distr array of guess distr - 0 for computation
      //distr array will be filled by computed distribution
      //more information in MPI documentation
      static void DimsCreate(int nproc, int dim, int *distr)
      {
#ifdef HAVE_MPI
         int sum = 0, prod = 1;
         for( int i = 0;i < dim; i++ ) {
            sum += distr[ i ];
            prod *= distr[ i ];
         }
         if( prod != 0 && prod != GetSize( AllGroup ) )
            throw Exceptions::MPIDimsCreateError();
         if(sum==0) {
            for(int i=0;i<dim-1;i++)
               distr[i]=1;
            distr[dim-1]=0;
         }

         MPI_Dims_create(nproc, dim, distr);
#else
         for(int i=0;i<dim;i++)
            distr[i]=1;
#endif
      }

      static void CreateNewGroup( bool meToo, int myRank, CommunicationGroup &oldGroup, CommunicationGroup &newGroup )
      {
#ifdef HAVE_MPI
+1 −0
Original line number Diff line number Diff line
@@ -26,4 +26,5 @@
#include "MPI/Wrappers.h"
#include "MPI/Utils.h"
#include "MPI/ScopedInitializer.h"
#include "MPI/Config.h"
#include "MPI/Print.h"

src/TNL/MPI/Config.h

0 → 100644
+103 −0
Original line number Diff line number Diff line
/***************************************************************************
                          MPI/Config.h  -  description
                             -------------------
    begin                : Apr 23, 2005
    copyright            : (C) 2005 by Tomas Oberhuber et al.
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

/* See Copyright Notice in tnl/Copyright */

#pragma once

#include <iostream>

#ifdef HAVE_MPI
#ifdef OMPI_MAJOR_VERSION
   // header specific to OpenMPI (needed for CUDA-aware detection)
   #include <mpi-ext.h>
#endif

#include <unistd.h>  // getpid
#endif

#include <TNL/Config/ConfigDescription.h>
#include <TNL/Config/ParameterContainer.h>
#include "Utils.h"

namespace TNL {
namespace MPI {

inline void configSetup( Config::ConfigDescription& config, const String& prefix = "" )
{
#ifdef HAVE_MPI
   config.addEntry< bool >( "redirect-mpi-output", "Only process with rank 0 prints to console. Other processes are redirected to files.", true );
   config.addEntry< String >( "redirect-mpi-output-dir", "Directory where ranks will store the files if their output is redirected.", "." );
   config.addEntry< bool >( "mpi-gdb-debug", "Wait for GDB to attach the master MPI process.", false );
   config.addEntry< int >( "mpi-process-to-attach", "Number of the MPI process to be attached by GDB. Set -1 for all processes.", 0 );
#endif
}

inline bool setup( const Config::ParameterContainer& parameters,
                   const String& prefix = "" )
{
#ifdef HAVE_MPI
   if( Initialized() && ! Finalized() )
   {
      const bool redirect = parameters.getParameter< bool >( "redirect-mpi-output" );
      const String outputDirectory = parameters.getParameter< String >( "redirect-mpi-output-dir" );
      if( redirect )
         MPI::setupRedirection( outputDirectory );
#ifdef HAVE_CUDA
      if( GetSize() > 1 )
      {
#if defined( MPIX_CUDA_AWARE_SUPPORT ) && MPIX_CUDA_AWARE_SUPPORT
         std::cout << "CUDA-aware MPI detected on this system ... " << std::endl;
#elif defined( MPIX_CUDA_AWARE_SUPPORT ) && !MPIX_CUDA_AWARE_SUPPORT
         std::cerr << "MPI is not CUDA-aware. Please install correct version of MPI." << std::endl;
         return false;
#else
         std::cerr << "WARNING: TNL cannot detect if you have CUDA-aware MPI. Some problems may occur." << std::endl;
#endif
      }
#endif // HAVE_CUDA
      bool gdbDebug = parameters.getParameter< bool >( "mpi-gdb-debug" );
      int processToAttach = parameters.getParameter< int >( "mpi-process-to-attach" );

      if( gdbDebug )
      {
         int rank = GetRank( MPI_COMM_WORLD );
         int pid = getpid();

         volatile int tnlMPIDebugAttached = 0;
         MPI_Send( &pid, 1, MPI_INT, 0, 0, MPI_COMM_WORLD );
         MPI_Barrier( MPI_COMM_WORLD );
         if( rank == 0 )
         {
            std::cout << "Attach GDB to MPI process(es) by entering:" << std::endl;
            for( int i = 0; i < GetSize(); i++ )
            {
               MPI_Status status;
               int recvPid;
               MPI_Recv( &recvPid, 1, MPI_INT, i, 0, MPI_COMM_WORLD, &status );

               if( i == processToAttach || processToAttach == -1 )
               {
                  std::cout << "  For MPI process " << i << ": gdb -q -ex \"attach " << recvPid << "\""
                            << " -ex \"set variable tnlMPIDebugAttached=1\""
                            << " -ex \"continue\"" << std::endl;
               }
            }
            std::cout << std::flush;
         }
         if( rank == processToAttach || processToAttach == -1 )
            while( ! tnlMPIDebugAttached );
         MPI_Barrier( MPI_COMM_WORLD );
      }
   }
#endif // HAVE_MPI
   return true;
}

} // namespace MPI
} // namespace TNL