Newer
Older
/***************************************************************************
MpiCommunicator.h - description
-------------------
begin : 2005/04/23
copyright : (C) 2005 by Tomas Oberhuber
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
#pragma once
#include <iostream>
#include <fstream>

Vít Hanousek
committed
#include <cstring>
Hanousek Vít
committed
#include <mpi.h>
#ifdef OMPI_MAJOR_VERSION
// header specific to OpenMPI (needed for CUDA-aware detection)
#include <mpi-ext.h>
#endif
Jakub Klinkovský
committed
#include <unistd.h> // getpid
#ifdef HAVE_CUDA
#include <TNL/Devices/Cuda.h>
typedef struct __attribute__((__packed__)) {
char name[MPI_MAX_PROCESSOR_NAME];
#include <TNL/Communicators/MpiDefs.h>
#include <TNL/Exceptions/MPISupportMissing.h>
Tomáš Oberhuber
committed
#include <TNL/Exceptions/MPIDimsCreateError.h>
#include <TNL/Communicators/MPITypeResolver.h>

Vít Hanousek
committed
namespace TNL {
namespace Communicators {
public: // TODO: this was private
/*inline static MPI_Datatype MPIDataType( const signed char* ) { return MPI_CHAR; };
inline static MPI_Datatype MPIDataType( const signed short int* ) { return MPI_SHORT; };
inline static MPI_Datatype MPIDataType( const signed int* ) { return MPI_INT; };
inline static MPI_Datatype MPIDataType( const signed long int* ) { return MPI_LONG; };
inline static MPI_Datatype MPIDataType( const unsigned char *) { return MPI_UNSIGNED_CHAR; };
inline static MPI_Datatype MPIDataType( const unsigned short int* ) { return MPI_UNSIGNED_SHORT; };
inline static MPI_Datatype MPIDataType( const unsigned int* ) { return MPI_UNSIGNED; };
inline static MPI_Datatype MPIDataType( const unsigned long int* ) { return MPI_UNSIGNED_LONG; };
inline static MPI_Datatype MPIDataType( const float* ) { return MPI_FLOAT; };
inline static MPI_Datatype MPIDataType( const double* ) { return MPI_DOUBLE; };
inline static MPI_Datatype MPIDataType( const long double* ) { return MPI_LONG_DOUBLE; };
// TODO: tested with MPI_LOR and MPI_LAND, but there should probably be unit tests for all operations
inline static MPI_Datatype MPIDataType( const bool* )
{
// sizeof(bool) is implementation-defined: https://stackoverflow.com/a/4897859
static_assert( sizeof(bool) == 1, "The programmer did not count with systems where sizeof(bool) != 1." );
return MPI_CHAR;
Tomáš Oberhuber
committed
using Request = MPI_Request;
using CommunicationGroup = MPI_Comm;
#else
using Request = int;
using CommunicationGroup = int;
#ifdef HAVE_MPI
return GetSize(AllGroup)>1;
#else
return false;
#endif
Hanousek Vít
committed
static void configSetup( Config::ConfigDescription& config, const String& prefix = "" )
{
Hanousek Vít
committed
#ifdef HAVE_MPI
config.addEntry< bool >( "redirect-mpi-output", "Only process with rank 0 prints to console. Other processes are redirected to files.", true );
config.addEntry< bool >( "mpi-gdb-debug", "Wait for GDB to attach the master MPI process.", false );
config.addEntry< int >( "mpi-process-to-attach", "Number of the MPI process to be attached by GDB. Set -1 for all processes.", 0 );
Hanousek Vít
committed
#endif
Hanousek Vít
committed
static bool setup( const Config::ParameterContainer& parameters,
const String& prefix = "" )
{
Hanousek Vít
committed
#ifdef HAVE_MPI

Vít Hanousek
committed
if(IsInitialized())//i.e. - isUsed
{
redirect = parameters.getParameter< bool >( "redirect-mpi-output" );
#ifdef HAVE_CUDA
#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
std::cout << "CUDA-aware MPI detected on this system ... " << std::endl;
#elif defined(MPIX_CUDA_AWARE_SUPPORT) && !MPIX_CUDA_AWARE_SUPPORT
std::cerr << "MPI is not CUDA-aware. Please install correct version of MPI." << std::endl;
return false;
#else
std::cerr << "WARNING: TNL cannot detect if you have CUDA-aware MPI. Some problems may occur." << std::endl;
#endif
#endif // HAVE_CUDA
bool gdbDebug = parameters.getParameter< bool >( "mpi-gdb-debug" );
int processToAttach = parameters.getParameter< int >( "mpi-process-to-attach" );
if( gdbDebug )
{
int rank = GetRank( MPI_COMM_WORLD );
int pid = getpid();
volatile int tnlMPIDebugAttached = 0;
MPI_Send( &pid, 1, MPI_INT, 0, 0, MPI_COMM_WORLD );
MPI_Barrier( MPI_COMM_WORLD );
if( rank == 0 )
{
std::cout << "Attach GDB to MPI process(es) by entering:" << std::endl;
for( int i = 0; i < GetSize( MPI_COMM_WORLD ); i++ )
MPI_Status status;
int recvPid;
MPI_Recv( &recvPid, 1, MPI_INT, i, 0, MPI_COMM_WORLD, &status );
if( i == processToAttach || processToAttach == -1 )
{
std::cout << " For MPI process " << i << ": gdb -q -ex \"attach " << recvPid << "\""
<< " -ex \"set variable tnlMPIDebugAttached=1\""
<< " -ex \"finish\"" << std::endl;
}
std::cout << std::flush;
}
if( rank == processToAttach || processToAttach == -1 )
while( ! tnlMPIDebugAttached );
MPI_Barrier( MPI_COMM_WORLD );
}
#endif // HAVE_MPI
static void Init(int& argc, char**& argv )
Hanousek Vít
committed
#ifdef HAVE_MPI
MPI_Init( &argc, &argv );

Vít Hanousek
committed
selectGPU();
Hanousek Vít
committed
#endif
Hanousek Vít
committed
static void setRedirection( bool redirect_ )
{
redirect = redirect_;
}
Hanousek Vít
committed
#ifdef HAVE_MPI
{
//redirect all stdout to files, only 0 take to go to console
backup=std::cout.rdbuf();
//redirect output to files...
if(GetRank(AllGroup)!=0)
std::cout << GetRank(AllGroup) << ": Redirecting std::cout to file" << std::endl;
const String stdoutFile = String("./stdout-") + convertToString(GetRank(AllGroup)) + String(".txt");
filestr.open(stdoutFile.getString());
psbuf = filestr.rdbuf();
#else
throw Exceptions::MPISupportMissing();

Vít Hanousek
committed
#endif

Vít Hanousek
committed
#ifdef HAVE_MPI
if(GetRank(AllGroup)!=0)
Hanousek Vít
committed
{
Hanousek Vít
committed
filestr.close();
MPI_Finalize();
Hanousek Vít
committed
#ifdef HAVE_MPI
int initialized, finalized;
MPI_Initialized(&initialized);
MPI_Finalized(&finalized);
return initialized && !finalized;
throw Exceptions::MPISupportMissing();
static int GetRank(CommunicationGroup group)
TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
Jakub Klinkovský
committed
TNL_ASSERT_NE(group, NullGroup, "GetRank cannot be called with NullGroup");
int rank;
MPI_Comm_rank(group,&rank);
return rank;
throw Exceptions::MPISupportMissing();
static int GetSize(CommunicationGroup group)
Jakub Klinkovský
committed
TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
TNL_ASSERT_NE(group, NullGroup, "GetSize cannot be called with NullGroup");
int size;
MPI_Comm_size(group,&size);
return size;
throw Exceptions::MPISupportMissing();
#ifdef HAVE_MPI
template< typename T >
static MPI_Datatype getDataType( const T& t )
{
return MPITypeResolver< T >::getType();
};
#endif
//dim-number of dimensions, distr array of guess distr - 0 for computation
//distr array will be filled by computed distribution
//more information in MPI documentation
static void DimsCreate(int nproc, int dim, int *distr)
{
Tomáš Oberhuber
committed
int sum = 0, prod = 1;
for( int i = 0;i < dim; i++ )
{
sum += distr[ i ];
prod *= distr[ i ];
}
if( prod != 0 && prod != GetSize( AllGroup ) )
throw Exceptions::MPIDimsCreateError();
{
for(int i=0;i<dim-1;i++)
{
distr[i]=1;
}
distr[dim-1]=0;
Hanousek Vít
committed
}
MPI_Dims_create(nproc, dim, distr);
#else
throw Exceptions::MPISupportMissing();
Jakub Klinkovský
committed
static void Barrier(CommunicationGroup group)
TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
Jakub Klinkovský
committed
TNL_ASSERT_NE(group, NullGroup, "Barrier cannot be called with NullGroup");
MPI_Barrier(group);
throw Exceptions::MPISupportMissing();
Hanousek Vít
committed
#endif
static Request ISend( const T* data, int count, int dest, int tag, CommunicationGroup group)
TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
Jakub Klinkovský
committed
TNL_ASSERT_NE(group, NullGroup, "ISend cannot be called with NullGroup");
Request req;
MPI_Isend( const_cast< void* >( ( const void* ) data ), count, MPITypeResolver< T >::getType(), dest, tag, group, &req);
return req;
throw Exceptions::MPISupportMissing();
Hanousek Vít
committed
#endif
}
static Request IRecv( T* data, int count, int src, int tag, CommunicationGroup group)
TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
Jakub Klinkovský
committed
TNL_ASSERT_NE(group, NullGroup, "IRecv cannot be called with NullGroup");
Request req;
MPI_Irecv((void*) data, count, MPITypeResolver< T >::getType() , src, tag, group, &req);
return req;
throw Exceptions::MPISupportMissing();
Hanousek Vít
committed
#endif

Vít Hanousek
committed
}
static void WaitAll(Request *reqs, int length)
{
TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
MPI_Waitall(length, reqs, MPI_STATUSES_IGNORE);
throw Exceptions::MPISupportMissing();
#endif
static void Bcast( T* data, int count, int root, CommunicationGroup group)
Jakub Klinkovský
committed
TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
TNL_ASSERT_NE(group, NullGroup, "BCast cannot be called with NullGroup");
MPI_Bcast((void*) data, count, MPITypeResolver< T >::getType(), root, group);
Jakub Klinkovský
committed
throw Exceptions::MPISupportMissing();
Hanousek Vít
committed
#endif

Vít Hanousek
committed
}
static void Allreduce( const T* data,
const MPI_Op &op,
CommunicationGroup group)
Jakub Klinkovský
committed
TNL_ASSERT_NE(group, NullGroup, "Allreduce cannot be called with NullGroup");
MPI_Allreduce( const_cast< void* >( ( void* ) data ), (void*) reduced_data,count,MPITypeResolver< T >::getType(),op,group);
#else
throw Exceptions::MPISupportMissing();
Hanousek Vít
committed
#endif
// in-place variant of Allreduce
template< typename T >
static void Allreduce( T* data,
int count,
const MPI_Op &op,
CommunicationGroup group)
{
#ifdef HAVE_MPI
TNL_ASSERT_NE(group, NullGroup, "Allreduce cannot be called with NullGroup");
MPI_Allreduce( MPI_IN_PLACE, (void*) data,count,MPITypeResolver< T >::getType(),op,group);
#else
throw Exceptions::MPISupportMissing();
#endif
}
static void Reduce( const T* data,
int count,
MPI_Op &op,
int root,
CommunicationGroup group)
Jakub Klinkovský
committed
TNL_ASSERT_NE(group, NullGroup, "Reduce cannot be called with NullGroup");
MPI_Reduce( const_cast< void* >( ( void*) data ), (void*) reduced_data,count,MPITypeResolver< T >::getType(),op,root,group);
#else
throw Exceptions::MPISupportMissing();
#endif
Tomáš Oberhuber
committed
template< typename T >
static void SendReceive( const T* sendData,
Tomáš Oberhuber
committed
int sendCount,
int destination,
int sendTag,
T* receiveData,
int receiveCount,
int source,
int receiveTag,
CommunicationGroup group )
{
#ifdef HAVE_MPI
Jakub Klinkovský
committed
TNL_ASSERT_NE(group, NullGroup, "SendReceive cannot be called with NullGroup");
Tomáš Oberhuber
committed
MPI_Status status;
MPI_Sendrecv( const_cast< void* >( ( void* ) sendData ),
Tomáš Oberhuber
committed
sendCount,
MPITypeResolver< T >::getType(),
Tomáš Oberhuber
committed
destination,
sendTag,
( void* ) receiveData,
receiveCount,
MPITypeResolver< T >::getType(),
Tomáš Oberhuber
committed
source,
receiveTag,
group,
&status );
#else
throw Exceptions::MPISupportMissing();
Tomáš Oberhuber
committed
}
template< typename T >
static void Alltoall( const T* sendData,
int sendCount,
T* receiveData,
int receiveCount,
CommunicationGroup group )
{
#ifdef HAVE_MPI
TNL_ASSERT_NE(group, NullGroup, "SendReceive cannot be called with NullGroup");
MPI_Alltoall( const_cast< void* >( ( void* ) sendData ),
sendCount,
MPITypeResolver< T >::getType(),
( void* ) receiveData,
receiveCount,
MPITypeResolver< T >::getType(),
group );
#else
throw Exceptions::MPISupportMissing();
#endif
}
static void writeProlog( Logger& logger )
logger.writeParameter( "MPI processes:", GetSize(AllGroup) );
Hanousek Vít
committed
static void CreateNewGroup( bool meToo, int myRank, CommunicationGroup &oldGroup, CommunicationGroup &newGroup )
{
#ifdef HAVE_MPI
if(meToo)
{
MPI_Comm_split(oldGroup, 1, myRank, &newGroup);
}
else
{
MPI_Comm_split(oldGroup, MPI_UNDEFINED, GetRank(oldGroup), &newGroup);
}
#else
throw Exceptions::MPISupportMissing();
Jakub Klinkovský
committed
#endif
static MPI_Request NullRequest;
static MPI_Comm AllGroup;
Jakub Klinkovský
committed
static MPI_Comm NullGroup;
static constexpr int NullRequest = -1;
static constexpr int AllGroup = 1;
static constexpr int NullGroup = 0;

Vít Hanousek
committed
private :
static std::streambuf* psbuf;
static std::streambuf* backup;
static std::ofstream filestr;
static bool redirect;

Vít Hanousek
committed
static void selectGPU(void)
{
const int count = GetSize(AllGroup);
const int rank = GetRank(AllGroup);
int gpuCount;
cudaGetDeviceCount(&gpuCount);

Vít Hanousek
committed
procName names[count];

Vít Hanousek
committed
int i=0;
int len;
MPI_Get_processor_name(names[rank].name, &len);

Vít Hanousek
committed
for(i=0;i<count;i++)
std::memcpy(names[i].name,names[rank].name,len+1);

Vít Hanousek
committed
MPI_Alltoall( (void*)names ,MPI_MAX_PROCESSOR_NAME,MPI_CHAR,
(void*)names,MPI_MAX_PROCESSOR_NAME,MPI_CHAR,
MPI_COMM_WORLD);

Vít Hanousek
committed
int nodeRank=0;
for(i=0;i<rank;i++)
{
if(std::strcmp(names[rank].name,names[i].name)==0)
nodeRank++;
}

Vít Hanousek
committed
const int gpuNumber = nodeRank % gpuCount;

Vít Hanousek
committed
cudaSetDevice(gpuNumber);
TNL_CHECK_CUDA_DEVICE;

Vít Hanousek
committed
//std::cout<<"Node: " << rank << " gpu: " << gpuNumber << std::endl;
Hanousek Vít
committed

Vít Hanousek
committed
#endif
}
Hanousek Vít
committed
Hanousek Vít
committed
#ifdef HAVE_MPI
MPI_Request MpiCommunicator::NullRequest = MPI_REQUEST_NULL;
MPI_Comm MpiCommunicator::AllGroup = MPI_COMM_WORLD;
MPI_Comm MpiCommunicator::NullGroup = MPI_COMM_NULL;
std::streambuf* MpiCommunicator::psbuf = nullptr;
std::streambuf* MpiCommunicator::backup = nullptr;
bool MpiCommunicator::redirect = true;
} // namespace <unnamed>
} // namespace Communicators
#ifdef HAVE_MPI
#define TNL_MPI_PRINT( message ) \
if( ! TNL::Communicators::MpiCommunicator::IsInitialized() ) \
std::cerr << message << std::endl; \
else \
for( int __tnl_mpi_print_j = 0; \
__tnl_mpi_print_j < TNL::Communicators::MpiCommunicator::GetSize( TNL::Communicators::MpiCommunicator::AllGroup ); \
__tnl_mpi_print_j++ ) \
{ \
if( __tnl_mpi_print_j == TNL::Communicators::MpiCommunicator::GetRank( TNL::Communicators::MpiCommunicator::AllGroup ) ) \
{ \
std::cerr << "Node " << __tnl_mpi_print_j << " of " \
<< TNL::Communicators::MpiCommunicator::GetSize( TNL::Communicators::MpiCommunicator::AllGroup ) \
<< " : " << message << std::endl << std::flush; \
} \
TNL::Communicators::MpiCommunicator::Barrier( TNL::Communicators::MpiCommunicator::AllGroup ); \
}
#else
#define TNL_MPI_PRINT( message ) \
std::cerr << message << std::endl;
#endif
#ifdef HAVE_MPI
#define TNL_MPI_PRINT_COND( condition, message ) \
if( ! TNL::Communicators::MpiCommunicator::IsInitialized() ) \
{ \
if( condition ) std::cerr << message << std::endl; \
} \
else \
{ \
for( int __tnl_mpi_print_j = 0; \
__tnl_mpi_print_j < TNL::Communicators::MpiCommunicator::GetSize( TNL::Communicators::MpiCommunicator::AllGroup ); \
__tnl_mpi_print_j++ ) \
{ \
if( __tnl_mpi_print_j == TNL::Communicators::MpiCommunicator::GetRank( TNL::Communicators::MpiCommunicator::AllGroup ) ) \
{ \
if( condition ) \
std::cerr << "Node " << __tnl_mpi_print_j << " of " \
<< TNL::Communicators::MpiCommunicator::GetSize( TNL::Communicators::MpiCommunicator::AllGroup ) \
<< " : " << message << std::endl << std::flush; \
} \
TNL::Communicators::MpiCommunicator::Barrier( TNL::Communicators::MpiCommunicator::AllGroup ); \
} \
}
#else
#define TNL_MPI_PRINT_COND( condition, message ) \
if( condition ) std::cerr << message << std::endl;
#endif