Skip to content
Snippets Groups Projects
MpiCommunicator.h 22.7 KiB
Newer Older
/***************************************************************************
                          MpiCommunicator.h  -  description
                             -------------------
    begin                : 2005/04/23
    copyright            : (C) 2005 by Tomas Oberhuber
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

/* See Copyright Notice in tnl/Copyright */

#pragma once

#include <iostream>
#include <fstream>
#ifdef OMPI_MAJOR_VERSION
   // header specific to OpenMPI (needed for CUDA-aware detection)
   #include <mpi-ext.h>
#endif
#ifdef HAVE_CUDA
    #include <TNL/Devices/Cuda.h>

    typedef struct __attribute__((__packed__))  {
       char name[MPI_MAX_PROCESSOR_NAME];
    } procName;
#endif

Tomáš Oberhuber's avatar
Tomáš Oberhuber committed
#include <TNL/String.h>
#include <TNL/Logger.h>
#include <TNL/Communicators/MpiDefs.h>
#include <TNL/Config/ConfigDescription.h>
#include <TNL/Exceptions/MPISupportMissing.h>
#include <TNL/Communicators/MPITypeResolver.h>
namespace TNL {
namespace Communicators {
Tomáš Oberhuber's avatar
Tomáš Oberhuber committed
class MpiCommunicator
{

   public: // TODO: this was private
      /*inline static MPI_Datatype MPIDataType( const signed char* ) { return MPI_CHAR; };
Tomáš Oberhuber's avatar
Tomáš Oberhuber committed
      inline static MPI_Datatype MPIDataType( const signed short int* ) { return MPI_SHORT; };
      inline static MPI_Datatype MPIDataType( const signed int* ) { return MPI_INT; };
      inline static MPI_Datatype MPIDataType( const signed long int* ) { return MPI_LONG; };
      inline static MPI_Datatype MPIDataType( const unsigned char *) { return MPI_UNSIGNED_CHAR; };
      inline static MPI_Datatype MPIDataType( const unsigned short int* ) { return MPI_UNSIGNED_SHORT; };
      inline static MPI_Datatype MPIDataType( const unsigned int* ) { return MPI_UNSIGNED; };
      inline static MPI_Datatype MPIDataType( const unsigned long int* ) { return MPI_UNSIGNED_LONG; };
      inline static MPI_Datatype MPIDataType( const float* ) { return MPI_FLOAT; };
      inline static MPI_Datatype MPIDataType( const double* ) { return MPI_DOUBLE; };
      inline static MPI_Datatype MPIDataType( const long double* ) { return MPI_LONG_DOUBLE; };
      // TODO: tested with MPI_LOR and MPI_LAND, but there should probably be unit tests for all operations
      inline static MPI_Datatype MPIDataType( const bool* )
      {
         // sizeof(bool) is implementation-defined: https://stackoverflow.com/a/4897859
         static_assert( sizeof(bool) == 1, "The programmer did not count with systems where sizeof(bool) != 1." );
         return MPI_CHAR;
      using Request = MPI_Request;
      using CommunicationGroup = MPI_Comm;
#else
      using Request = int;
Tomáš Oberhuber's avatar
Tomáš Oberhuber committed
      static bool isDistributed()
      {
      static void configSetup( Config::ConfigDescription& config, const String& prefix = "" )
      {
         config.addEntry< bool >( "redirect-mpi-output", "Only process with rank 0 prints to console. Other processes are redirected to files.", true );
         config.addEntry< bool >( "mpi-gdb-debug", "Wait for GDB to attach the master MPI process.", false );
         config.addEntry< int >( "mpi-process-to-attach", "Number of the MPI process to be attached by GDB. Set -1 for all processes.", 0 );
      static bool setup( const Config::ParameterContainer& parameters,
                         const String& prefix = "" )
      {
         {
            redirect = parameters.getParameter< bool >( "redirect-mpi-output" );
            int size;
            MPI_Comm_size( MPI_COMM_WORLD, &size );
            if( size > 1 )
            {
   #if defined( MPIX_CUDA_AWARE_SUPPORT ) && MPIX_CUDA_AWARE_SUPPORT
               std::cout << "CUDA-aware MPI detected on this system ... " << std::endl;
   #elif defined( MPIX_CUDA_AWARE_SUPPORT ) && !MPIX_CUDA_AWARE_SUPPORT
               std::cerr << "MPI is not CUDA-aware. Please install correct version of MPI." << std::endl;
               return false;
               std::cerr << "WARNING: TNL cannot detect if you have CUDA-aware MPI. Some problems may occur." << std::endl;
#endif // HAVE_CUDA
            bool gdbDebug = parameters.getParameter< bool >( "mpi-gdb-debug" );
            int processToAttach = parameters.getParameter< int >( "mpi-process-to-attach" );

            if( gdbDebug )
            {
               int rank = GetRank( MPI_COMM_WORLD );
               int pid = getpid();
               volatile int tnlMPIDebugAttached = 0;
               MPI_Send( &pid, 1, MPI_INT, 0, 0, MPI_COMM_WORLD );
               MPI_Barrier( MPI_COMM_WORLD );
                  std::cout << "Attach GDB to MPI process(es) by entering:" << std::endl;
                  for( int i = 0; i < GetSize( MPI_COMM_WORLD ); i++ )
                     MPI_Status status;
                     int recvPid;
                     MPI_Recv( &recvPid, 1, MPI_INT, i, 0, MPI_COMM_WORLD, &status );

                     if( i == processToAttach || processToAttach == -1 )
                     {
                        std::cout << "  For MPI process " << i << ": gdb -q -ex \"attach " << recvPid << "\""
                                  << " -ex \"set variable tnlMPIDebugAttached=1\""
                                  << " -ex \"finish\"" << std::endl;
                     }
                  std::cout << std::flush;
               }
               if( rank == processToAttach || processToAttach == -1 )
                  while( ! tnlMPIDebugAttached );
               MPI_Barrier( MPI_COMM_WORLD );
            }
         return true;
      }
      static void Init(int& argc, char**& argv )
Tomáš Oberhuber's avatar
Tomáš Oberhuber committed
      static void setRedirection( bool redirect_ )
      {
         redirect = redirect_;
      }
      static void setupRedirection()
      {
         if(isDistributed() && redirect )
Tomáš Oberhuber's avatar
Tomáš Oberhuber committed
         {
            //redirect all stdout to files, only 0 take to go to console
            backup=std::cout.rdbuf();

            //redirect output to files...
               std::cout << GetRank(AllGroup) << ": Redirecting std::cout to file" << std::endl;
               const String stdoutFile = String("./stdout-") + convertToString(GetRank(AllGroup)) + String(".txt");
               filestr.open(stdoutFile.getString());
               psbuf = filestr.rdbuf();
Tomáš Oberhuber's avatar
Tomáš Oberhuber committed
               std::cout.rdbuf(psbuf);
#else
         throw Exceptions::MPISupportMissing();
Tomáš Oberhuber's avatar
Tomáš Oberhuber committed

      static void Finalize()
      {
Tomáš Oberhuber's avatar
Tomáš Oberhuber committed
         if(isDistributed())
         {
Tomáš Oberhuber's avatar
Tomáš Oberhuber committed
               std::cout.rdbuf(backup);
Tomáš Oberhuber's avatar
Tomáš Oberhuber committed
      static bool IsInitialized()
      {
         int initialized, finalized;
         MPI_Initialized(&initialized);
         return initialized && !finalized;
         throw Exceptions::MPISupportMissing();
      static int GetRank(CommunicationGroup group)
        TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
        TNL_ASSERT_NE(group, NullGroup, "GetRank cannot be called with NullGroup");
        int rank;
        MPI_Comm_rank(group,&rank);
        return rank;
         throw Exceptions::MPISupportMissing();
      static int GetSize(CommunicationGroup group)
         TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
         TNL_ASSERT_NE(group, NullGroup, "GetSize cannot be called with NullGroup");
         int size;
         MPI_Comm_size(group,&size);
         return size;
         throw Exceptions::MPISupportMissing();
#ifdef HAVE_MPI
      template< typename T >
      static MPI_Datatype getDataType( const T& t )
      { 
         return MPITypeResolver< T >::getType();
      };
#endif
      
      //dim-number of dimensions, distr array of guess distr - 0 for computation
      //distr array will be filled by computed distribution
      //more information in MPI documentation
      static void DimsCreate(int nproc, int dim, int *distr)
      {
           int sum = 0, prod = 1;
           for( int i = 0;i < dim; i++ )
           {
               sum += distr[ i ];
               prod *= distr[ i ];
           }
           if( prod != 0 && prod != GetSize( AllGroup ) )
              throw Exceptions::MPIDimsCreateError();
           {
               for(int i=0;i<dim-1;i++)
               {
                    distr[i]=1;
               }
               distr[dim-1]=0;
            MPI_Dims_create(nproc, dim, distr);
#else
            throw Exceptions::MPISupportMissing();
         static void Barrier(CommunicationGroup group)
            TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
            TNL_ASSERT_NE(group, NullGroup, "Barrier cannot be called with NullGroup");
            MPI_Barrier(group);
            throw Exceptions::MPISupportMissing();
         template <typename T>
         static Request ISend( const T* data, int count, int dest, int tag, CommunicationGroup group)
            TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
            TNL_ASSERT_NE(group, NullGroup, "ISend cannot be called with NullGroup");
            MPI_Isend( const_cast< void* >( ( const void* ) data ), count, MPITypeResolver< T >::getType(), dest, tag, group, &req);
            throw Exceptions::MPISupportMissing();
         template <typename T>
         static Request IRecv( T* data, int count, int src, int tag, CommunicationGroup group)
            TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
            TNL_ASSERT_NE(group, NullGroup, "IRecv cannot be called with NullGroup");
            MPI_Irecv((void*) data, count, MPITypeResolver< T >::getType() , src, tag, group, &req);
            throw Exceptions::MPISupportMissing();
         static void WaitAll(Request *reqs, int length)
         {
            TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
            MPI_Waitall(length, reqs, MPI_STATUSES_IGNORE);
            throw Exceptions::MPISupportMissing();
#endif
        template< typename T >
        static void Bcast( T* data, int count, int root, CommunicationGroup group)
           TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
           TNL_ASSERT_NE(group, NullGroup, "BCast cannot be called with NullGroup");
           MPI_Bcast((void*) data, count, MPITypeResolver< T >::getType(), root, group);
Tomáš Oberhuber's avatar
Tomáš Oberhuber committed
        template< typename T >
        static void Allreduce( const T* data,
Tomáš Oberhuber's avatar
Tomáš Oberhuber committed
                               T* reduced_data,
                               int count,
                               const MPI_Op &op,
                               CommunicationGroup group)
#ifdef HAVE_MPI
            TNL_ASSERT_NE(group, NullGroup, "Allreduce cannot be called with NullGroup");
            MPI_Allreduce( const_cast< void* >( ( void* ) data ), (void*) reduced_data,count,MPITypeResolver< T >::getType(),op,group);
#else
            throw Exceptions::MPISupportMissing();
        // in-place variant of Allreduce
        template< typename T >
        static void Allreduce( T* data,
                               int count,
                               const MPI_Op &op,
                               CommunicationGroup group)
        {
#ifdef HAVE_MPI
            TNL_ASSERT_NE(group, NullGroup, "Allreduce cannot be called with NullGroup");
            MPI_Allreduce( MPI_IN_PLACE, (void*) data,count,MPITypeResolver< T >::getType(),op,group);
#else
            throw Exceptions::MPISupportMissing();
#endif
        }


         template< typename T >
         static void Reduce( const T* data,
Tomáš Oberhuber's avatar
Tomáš Oberhuber committed
                    T* reduced_data,
         {
#ifdef HAVE_MPI
            TNL_ASSERT_NE(group, NullGroup, "Reduce cannot be called with NullGroup");
            MPI_Reduce( const_cast< void* >( ( void*) data ), (void*) reduced_data,count,MPITypeResolver< T >::getType(),op,root,group);
#else
            throw Exceptions::MPISupportMissing();
#endif
         static void SendReceive( const T* sendData,
                                  int sendCount,
                                  int destination,
                                  int sendTag,
                                  T* receiveData,
                                  int receiveCount,
                                  int source,
                                  int receiveTag,
                                  CommunicationGroup group )
         {
#ifdef HAVE_MPI
            TNL_ASSERT_NE(group, NullGroup, "SendReceive cannot be called with NullGroup");
            MPI_Sendrecv( const_cast< void* >( ( void* ) sendData ),
                          MPITypeResolver< T >::getType(),
                          MPITypeResolver< T >::getType(),
                          source,
                          receiveTag,
                          group,
                          &status );
#else
            throw Exceptions::MPISupportMissing();
         template< typename T >
         static void Alltoall( const T* sendData,
                               int sendCount,
                               T* receiveData,
                               int receiveCount,
                               CommunicationGroup group )
         {
#ifdef HAVE_MPI
            TNL_ASSERT_NE(group, NullGroup, "SendReceive cannot be called with NullGroup");
            MPI_Alltoall( const_cast< void* >( ( void* ) sendData ),
                          MPITypeResolver< T >::getType(),
                          ( void* ) receiveData,
                          receiveCount,
                          MPITypeResolver< T >::getType(),
                          group );
#else
            throw Exceptions::MPISupportMissing();
#endif
         }

      static void writeProlog( Logger& logger )
Tomáš Oberhuber's avatar
Tomáš Oberhuber committed
      {
         if( isDistributed() )
         {
            logger.writeParameter( "MPI processes:", GetSize(AllGroup) );
      static void CreateNewGroup( bool meToo, int myRank, CommunicationGroup &oldGroup, CommunicationGroup &newGroup )
      {
#ifdef HAVE_MPI
        if(meToo)
        {
            MPI_Comm_split(oldGroup, 1, myRank, &newGroup);
        }
        else
        {
            MPI_Comm_split(oldGroup, MPI_UNDEFINED, GetRank(oldGroup), &newGroup);
        }
#else
         throw Exceptions::MPISupportMissing();
      static MPI_Request NullRequest;
      static MPI_Comm AllGroup;
      static constexpr int NullRequest = -1;
      static constexpr int AllGroup = 1;
      static constexpr int NullGroup = 0;
      static std::streambuf* psbuf;
      static std::streambuf* backup;
      static std::ofstream filestr;
      static bool redirect;
Vít Hanousek's avatar
Vít Hanousek committed
#ifdef HAVE_MPI
    #ifdef HAVE_CUDA
         const int count = GetSize(AllGroup);
         const int rank = GetRank(AllGroup);
         int gpuCount;
         cudaGetDeviceCount(&gpuCount);
         int i=0;
         int len;
         MPI_Get_processor_name(names[rank].name, &len);
         for(i=0;i<count;i++)
            std::memcpy(names[i].name,names[rank].name,len+1);
         MPI_Alltoall( (void*)names ,MPI_MAX_PROCESSOR_NAME,MPI_CHAR,
            (void*)names,MPI_MAX_PROCESSOR_NAME,MPI_CHAR,
                     MPI_COMM_WORLD);
         int nodeRank=0;
         for(i=0;i<rank;i++)
         {
            if(std::strcmp(names[rank].name,names[i].name)==0)
               nodeRank++;
         }
         const int gpuNumber = nodeRank % gpuCount;
         cudaSetDevice(gpuNumber);
         TNL_CHECK_CUDA_DEVICE;
         //std::cout<<"Node: " << rank << " gpu: " << gpuNumber << std::endl;
Vít Hanousek's avatar
Vít Hanousek committed
    #endif
MPI_Request MpiCommunicator::NullRequest = MPI_REQUEST_NULL;
MPI_Comm MpiCommunicator::AllGroup = MPI_COMM_WORLD;
MPI_Comm MpiCommunicator::NullGroup = MPI_COMM_NULL;
std::streambuf* MpiCommunicator::psbuf = nullptr;
std::streambuf* MpiCommunicator::backup = nullptr;
std::ofstream MpiCommunicator::filestr;
bool MpiCommunicator::redirect = true;
} // namespace <unnamed>
} // namespace Communicators
#ifdef HAVE_MPI
#define TNL_MPI_PRINT( message )                                                                                                 \
if( ! TNL::Communicators::MpiCommunicator::IsInitialized() )                                                                     \
   std::cerr << message << std::endl;                                                                                            \
else                                                                                                                             \
   for( int __tnl_mpi_print_j = 0;                                                                                               \
        __tnl_mpi_print_j < TNL::Communicators::MpiCommunicator::GetSize( TNL::Communicators::MpiCommunicator::AllGroup );       \
        __tnl_mpi_print_j++ )                                                                                                    \
   {                                                                                                                             \
      if( __tnl_mpi_print_j == TNL::Communicators::MpiCommunicator::GetRank( TNL::Communicators::MpiCommunicator::AllGroup ) )   \
      {                                                                                                                          \
         std::cerr << "Node " << __tnl_mpi_print_j << " of "                                                                     \
                   << TNL::Communicators::MpiCommunicator::GetSize( TNL::Communicators::MpiCommunicator::AllGroup )              \
                   << " : " << message << std::endl << std::flush;                                                                    \
      }                                                                                                                          \
      TNL::Communicators::MpiCommunicator::Barrier( TNL::Communicators::MpiCommunicator::AllGroup );                             \
   }
#else
#define TNL_MPI_PRINT( message )                                                                                                 \
   std::cerr << message << std::endl;
#endif
#ifdef HAVE_MPI
#define TNL_MPI_PRINT_COND( condition, message )                                                                                 \
if( ! TNL::Communicators::MpiCommunicator::IsInitialized() )                                                                     \
{                                                                                                                                \
   if( condition ) std::cerr << message << std::endl;                                                                            \
}                                                                                                                                \
else                                                                                                                             \
{                                                                                                                                \
   for( int __tnl_mpi_print_j = 0;                                                                                               \
        __tnl_mpi_print_j < TNL::Communicators::MpiCommunicator::GetSize( TNL::Communicators::MpiCommunicator::AllGroup );       \
        __tnl_mpi_print_j++ )                                                                                                    \
   {                                                                                                                             \
      if( __tnl_mpi_print_j == TNL::Communicators::MpiCommunicator::GetRank( TNL::Communicators::MpiCommunicator::AllGroup ) )   \
      {                                                                                                                          \
         if( condition )                                                                                                         \
            std::cerr << "Node " << __tnl_mpi_print_j << " of "                                                                  \
                      << TNL::Communicators::MpiCommunicator::GetSize( TNL::Communicators::MpiCommunicator::AllGroup )           \
                      << " : " << message << std::endl << std::flush;                                                                          \
      }                                                                                                                          \
      TNL::Communicators::MpiCommunicator::Barrier( TNL::Communicators::MpiCommunicator::AllGroup );                             \
   }                                                                                                                             \
}
#else
#define TNL_MPI_PRINT_COND( condition, message )                                                                                 \
   if( condition ) std::cerr << message << std::endl;
#endif