Commit 9a88469e authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

MPI refactoring: split MpiCommunicator into plain functions in the TNL::MPI namespace

parent cff4ab33
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -20,7 +20,7 @@
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
#include <TNL/Communicators/MpiCommunicator.h>
#include <TNL/Communicators/ScopedInitializer.h>
#include <TNL/MPI/ScopedInitializer.h>
#include <TNL/Containers/Partitioner.h>
#include <TNL/Containers/DistributedVector.h>
#include <TNL/Matrices/DistributedMatrix.h>
@@ -309,7 +309,7 @@ main( int argc, char* argv[] )

   configSetup( conf_desc );

   Communicators::ScopedInitializer< CommunicatorType > scopedInit(argc, argv);
   TNL::MPI::ScopedInitializer mpi(argc, argv);
   const int rank = CommunicatorType::GetRank( CommunicatorType::AllGroup );

   if( ! parseCommandLine( argc, argv, conf_desc, parameters ) )
+2 −2
Original line number Diff line number Diff line
@@ -25,7 +25,7 @@
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
#include <TNL/Communicators/MpiCommunicator.h>
#include <TNL/Communicators/ScopedInitializer.h>
#include <TNL/MPI/ScopedInitializer.h>
#include <TNL/Containers/Partitioner.h>
#include <TNL/Containers/DistributedVector.h>
#include <TNL/Matrices/DistributedMatrix.h>
@@ -592,7 +592,7 @@ main( int argc, char* argv[] )

   configSetup( conf_desc );

   Communicators::ScopedInitializer< CommunicatorType > scopedInit(argc, argv);
   TNL::MPI::ScopedInitializer mpi(argc, argv);
   const int rank = CommunicatorType::GetRank( CommunicatorType::AllGroup );

   if( ! parseCommandLine( argc, argv, conf_desc, parameters ) )
+5 −5
Original line number Diff line number Diff line
@@ -24,7 +24,7 @@
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
#include <TNL/Communicators/MpiCommunicator.h>
#include <TNL/Communicators/ScopedInitializer.h>
#include <TNL/MPI/ScopedInitializer.h>
#include <TNL/Solvers/ODE/Euler.h>
#include <TNL/Solvers/ODE/Merson.h>

@@ -225,7 +225,7 @@ main( int argc, char* argv[] )

   configSetup( conf_desc );

   Communicators::ScopedInitializer< CommunicatorType > scopedInit(argc, argv);
   TNL::MPI::ScopedInitializer mpi(argc, argv);
   const int rank = CommunicatorType::GetRank( CommunicatorType::AllGroup );

   if( ! parseCommandLine( argc, argv, conf_desc, parameters ) )
+23 −250
Original line number Diff line number Diff line
@@ -11,36 +11,23 @@
#pragma once

#include <iostream>
#include <fstream>
#include <cstring>

#ifdef HAVE_MPI
#include <mpi.h>
#ifdef OMPI_MAJOR_VERSION
   // header specific to OpenMPI (needed for CUDA-aware detection)
   #include <mpi-ext.h>
#endif

#include <unistd.h>  // getpid

#ifdef HAVE_CUDA
    #include <TNL/Cuda/CheckDevice.h>

    typedef struct __attribute__((__packed__))  {
       char name[MPI_MAX_PROCESSOR_NAME];
    } procName;
#endif

#endif

#include <TNL/String.h>
#include <TNL/Logger.h>
#include <TNL/Debugging/OutputRedirection.h>
#include <TNL/Communicators/MpiDefs.h>
#include <TNL/MPI/Wrappers.h>
#include <TNL/MPI/DummyDefs.h>
#include <TNL/MPI/Utils.h>
#include <TNL/Config/ConfigDescription.h>
#include <TNL/Exceptions/MPISupportMissing.h>
#include <TNL/Exceptions/MPIDimsCreateError.h>
#include <TNL/Communicators/MPITypeResolver.h>


namespace TNL {
@@ -88,7 +75,7 @@ class MpiCommunicator
            const bool redirect = parameters.getParameter< bool >( "redirect-mpi-output" );
            const String outputDirectory = parameters.getParameter< String >( "redirect-mpi-output-dir" );
            if( redirect )
               setupRedirection( outputDirectory );
               MPI::setupRedirection( outputDirectory );
#ifdef HAVE_CUDA
            int size;
            MPI_Comm_size( MPI_COMM_WORLD, &size );
@@ -144,125 +131,32 @@ class MpiCommunicator

      static void Init( int& argc, char**& argv, int required_thread_level = MPI_THREAD_SINGLE )
      {
#ifdef HAVE_MPI
         switch( required_thread_level ) {
            case MPI_THREAD_SINGLE:
            case MPI_THREAD_FUNNELED:
            case MPI_THREAD_SERIALIZED:
            case MPI_THREAD_MULTIPLE:
               break;
            default:
               printf("ERROR: invalid argument for the 'required' thread level support: %d\n", required_thread_level);
               MPI_Abort(MPI_COMM_WORLD, 1);
         }

         int provided;
         MPI_Init_thread( &argc, &argv, required_thread_level, &provided );
         if( provided < required_thread_level ) {
            const char* level = "";
            switch( required_thread_level ) {
               case MPI_THREAD_SINGLE:
                  level = "MPI_THREAD_SINGLE";
                  break;
               case MPI_THREAD_FUNNELED:
                  level = "MPI_THREAD_FUNNELED";
                  break;
               case MPI_THREAD_SERIALIZED:
                  level = "MPI_THREAD_SERIALIZED";
                  break;
               case MPI_THREAD_MULTIPLE:
                  level = "MPI_THREAD_MULTIPLE";
                  break;
            }
            printf("ERROR: The MPI library does not have the required level of thread support: %s\n", level);
            MPI_Abort(MPI_COMM_WORLD, 1);
         }

         selectGPU();
#endif
         MPI::Init( argc, argv, required_thread_level );

         // silence warnings about (potentially) unused variables
         (void) NullGroup;
         (void) NullRequest;
      }

      static void setupRedirection( std::string outputDirectory )
      {
#ifdef HAVE_MPI
         if(isDistributed() )
         {
            if(GetRank(AllGroup)!=0)
            {
               const std::string stdoutFile = outputDirectory + "/stdout_" + std::to_string(GetRank(AllGroup)) + ".txt";
               const std::string stderrFile = outputDirectory + "/stderr_" + std::to_string(GetRank(AllGroup)) + ".txt";
               std::cout << GetRank(AllGroup) << ": Redirecting stdout and stderr to files " << stdoutFile << " and " << stderrFile << std::endl;
               Debugging::redirect_stdout_stderr( stdoutFile, stderrFile );
            }
         }
#endif
      }

      static void Finalize()
      {
#ifdef HAVE_MPI
         if(isDistributed())
         {
            if(GetRank(AllGroup)!=0)
            {
               // restore redirection (not necessary, it uses RAII internally...)
               Debugging::redirect_stdout_stderr( "", "", true );
            }
         }
         MPI_Finalize();
#endif
         MPI::Finalize();
      }

      static bool IsInitialized()
      {
#ifdef HAVE_MPI
         int initialized, finalized;
         MPI_Initialized(&initialized);
         MPI_Finalized(&finalized);
         return initialized && !finalized;
#else
         return true;
#endif
         return MPI::isInitialized();
      }

      static int GetRank(CommunicationGroup group = AllGroup )
      {
#ifdef HAVE_MPI
         TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
         TNL_ASSERT_NE(group, NullGroup, "GetRank cannot be called with NullGroup");
         int rank;
         MPI_Comm_rank(group,&rank);
         return rank;
#else
         return 0;
#endif
         return MPI::GetRank( group );
      }

      static int GetSize(CommunicationGroup group = AllGroup )
      {
#ifdef HAVE_MPI
         TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
         TNL_ASSERT_NE(group, NullGroup, "GetSize cannot be called with NullGroup");
         int size;
         MPI_Comm_size(group,&size);
         return size;
#else
         return 1;
#endif
         return MPI::GetSize( group );
      }

#ifdef HAVE_MPI
      template< typename T >
      static MPI_Datatype getDataType( const T& t )
      {
         return MPITypeResolver< T >::getType();
      }
#endif

      //dim-number of dimensions, distr array of guess distr - 0 for computation
      //distr array will be filled by computed distribution
      //more information in MPI documentation
@@ -291,78 +185,42 @@ class MpiCommunicator

      static void Barrier( CommunicationGroup group = AllGroup )
      {
#ifdef HAVE_MPI
         TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
         TNL_ASSERT_NE(group, NullGroup, "Barrier cannot be called with NullGroup");
         MPI_Barrier(group);
#endif
         MPI::Barrier( group );
      }

      template <typename T>
      static void Send( const T* data, int count, int dest, int tag, CommunicationGroup group = AllGroup )
      {
#ifdef HAVE_MPI
         TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
         TNL_ASSERT_NE(group, NullGroup, "Send cannot be called with NullGroup");
         MPI_Send( const_cast< void* >( ( const void* ) data ), count, MPITypeResolver< T >::getType(), dest, tag, group );
#endif
         MPI::Send( data, count, dest, tag, group );
      }

      template <typename T>
      static void Recv( T* data, int count, int src, int tag, CommunicationGroup group = AllGroup )
      {
#ifdef HAVE_MPI
         TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
         TNL_ASSERT_NE(group, NullGroup, "Recv cannot be called with NullGroup");
         MPI_Status status;
         MPI_Recv( const_cast< void* >( ( const void* ) data ), count, MPITypeResolver< T >::getType() , src, tag, group, &status );
#endif
         MPI::Recv( data, count, src, tag, group );
      }

      template <typename T>
      static Request ISend( const T* data, int count, int dest, int tag, CommunicationGroup group = AllGroup )
      {
#ifdef HAVE_MPI
         TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
         TNL_ASSERT_NE(group, NullGroup, "ISend cannot be called with NullGroup");
         Request req;
         MPI_Isend( const_cast< void* >( ( const void* ) data ), count, MPITypeResolver< T >::getType(), dest, tag, group, &req);
         return req;
#else
         return 1;
#endif
         return MPI::Isend( data, count, dest, tag, group );
      }

      template <typename T>
      static Request IRecv( T* data, int count, int src, int tag, CommunicationGroup group = AllGroup )
      {
#ifdef HAVE_MPI
         TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
         TNL_ASSERT_NE(group, NullGroup, "IRecv cannot be called with NullGroup");
         Request req;
         MPI_Irecv( const_cast< void* >( ( const void* ) data ), count, MPITypeResolver< T >::getType() , src, tag, group, &req);
         return req;
#else
         return 1;
#endif
         return MPI::Irecv( data, count, src, tag, group );
      }

      static void WaitAll(Request *reqs, int length)
      {
#ifdef HAVE_MPI
         TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
         MPI_Waitall(length, reqs, MPI_STATUSES_IGNORE);
#endif
         MPI::Waitall( reqs, length );
      }

      template< typename T >
      static void Bcast( T* data, int count, int root, CommunicationGroup group)
      {
#ifdef HAVE_MPI
         TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
         TNL_ASSERT_NE(group, NullGroup, "BCast cannot be called with NullGroup");
         MPI_Bcast((void*) data, count, MPITypeResolver< T >::getType(), root, group);
#endif
         MPI::Bcast( data, count, root, group );
      }

      template< typename T >
@@ -372,12 +230,7 @@ class MpiCommunicator
                             const MPI_Op &op,
                             CommunicationGroup group)
      {
#ifdef HAVE_MPI
         TNL_ASSERT_NE(group, NullGroup, "Allreduce cannot be called with NullGroup");
         MPI_Allreduce( const_cast< void* >( ( void* ) data ), (void*) reduced_data,count,MPITypeResolver< T >::getType(),op,group);
#else
         memcpy( ( void* ) reduced_data, ( const void* ) data, count * sizeof( T ) );
#endif
         MPI::Allreduce( data, reduced_data, count, op, group );
      }

      // in-place variant of Allreduce
@@ -387,27 +240,18 @@ class MpiCommunicator
                             const MPI_Op &op,
                             CommunicationGroup group)
      {
#ifdef HAVE_MPI
         TNL_ASSERT_NE(group, NullGroup, "Allreduce cannot be called with NullGroup");
         MPI_Allreduce( MPI_IN_PLACE, (void*) data,count,MPITypeResolver< T >::getType(),op,group);
#endif
         MPI::Allreduce( data, count, op, group );
      }


      template< typename T >
      static void Reduce( const T* data,
                          T* reduced_data,
                          int count,
                          MPI_Op &op,
                          const MPI_Op &op,
                          int root,
                          CommunicationGroup group)
      {
#ifdef HAVE_MPI
         TNL_ASSERT_NE(group, NullGroup, "Reduce cannot be called with NullGroup");
         MPI_Reduce( const_cast< void* >( ( void*) data ), (void*) reduced_data,count,MPITypeResolver< T >::getType(),op,root,group);
#else
         memcpy( ( void* ) reduced_data, ( void* ) data, count * sizeof( T ) );
#endif
         MPI::Reduce( data, reduced_data, count, op, root, group );
      }

      template< typename T >
@@ -421,24 +265,7 @@ class MpiCommunicator
                               int receiveTag,
                               CommunicationGroup group )
      {
#ifdef HAVE_MPI
         TNL_ASSERT_NE(group, NullGroup, "SendReceive cannot be called with NullGroup");
         MPI_Status status;
         MPI_Sendrecv( const_cast< void* >( ( void* ) sendData ),
                       sendCount,
                       MPITypeResolver< T >::getType(),
                       destination,
                       sendTag,
                       ( void* ) receiveData,
                       receiveCount,
                       MPITypeResolver< T >::getType(),
                       source,
                       receiveTag,
                       group,
                       &status );
#else
         throw Exceptions::MPISupportMissing();
#endif
         MPI::Sendrecv( sendData, sendCount, destination, sendTag, receiveData, receiveCount, source, receiveTag, group );
      }

      template< typename T >
@@ -448,19 +275,7 @@ class MpiCommunicator
                            int receiveCount,
                            CommunicationGroup group )
      {
#ifdef HAVE_MPI
         TNL_ASSERT_NE(group, NullGroup, "SendReceive cannot be called with NullGroup");
         MPI_Alltoall( const_cast< void* >( ( void* ) sendData ),
                       sendCount,
                       MPITypeResolver< T >::getType(),
                       ( void* ) receiveData,
                       receiveCount,
                       MPITypeResolver< T >::getType(),
                       group );
#else
         TNL_ASSERT_EQ( sendCount, receiveCount, "sendCount must be equal to receiveCount when running without MPI." );
         memcpy( (void*) receiveData, (const void*) sendData, sendCount * sizeof( T ) );
#endif
         MPI::Alltoall( sendData, sendCount, receiveData, receiveCount, group );
      }


@@ -485,58 +300,16 @@ class MpiCommunicator
      }

#ifdef HAVE_MPI
      static MPI_Request NullRequest;
      static MPI_Comm AllGroup;
      static MPI_Comm NullGroup;
#else
      static constexpr int NullRequest = -1;
      static constexpr int AllGroup = 1;
      static constexpr int NullGroup = 0;
#endif
   private:

      static void selectGPU(void)
      {
#ifdef HAVE_MPI
    #ifdef HAVE_CUDA
         const int count = GetSize(AllGroup);
         const int rank = GetRank(AllGroup);
         int gpuCount;
         cudaGetDeviceCount(&gpuCount);

         procName names[count];

         int i=0;
         int len;
         MPI_Get_processor_name(names[rank].name, &len);

         for(i=0;i<count;i++)
            std::memcpy(names[i].name,names[rank].name,len+1);

         MPI_Alltoall( (void*)names ,MPI_MAX_PROCESSOR_NAME,MPI_CHAR,
            (void*)names,MPI_MAX_PROCESSOR_NAME,MPI_CHAR,
                     MPI_COMM_WORLD);

         int nodeRank=0;
         for(i=0;i<rank;i++)
         {
            if(std::strcmp(names[rank].name,names[i].name)==0)
               nodeRank++;
         }

         const int gpuNumber = nodeRank % gpuCount;

         cudaSetDevice(gpuNumber);
         TNL_CHECK_CUDA_DEVICE;

         //std::cout<<"Node: " << rank << " gpu: " << gpuNumber << std::endl;
    #endif
#endif
      }
};

#ifdef HAVE_MPI
MPI_Request MpiCommunicator::NullRequest = MPI_REQUEST_NULL;
MPI_Comm MpiCommunicator::AllGroup = MPI_COMM_WORLD;
MPI_Comm MpiCommunicator::NullGroup = MPI_COMM_NULL;
#endif
+0 −1
Original line number Diff line number Diff line
@@ -15,7 +15,6 @@
#include "DistributedArray.h"

#include <TNL/Algorithms/ParallelFor.h>
#include <TNL/Communicators/MpiDefs.h>  // important only when MPI is disabled

namespace TNL {
namespace Containers {
Loading