Newer
Older
/***************************************************************************
MpiCommunicator.h - description
-------------------
begin : 2005/04/23
copyright : (C) 2005 by Tomas Oberhuber
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
#pragma once
#include <iostream>
#include <fstream>

Vít Hanousek
committed
#include <cstring>
Hanousek Vít
committed
#include <mpi.h>
#ifdef OMPI_MAJOR_VERSION
// header specific to OpenMPI (needed for CUDA-aware detection)
#include <mpi-ext.h>
#endif
Jakub Klinkovský
committed
#include <unistd.h> // getpid
#ifdef HAVE_CUDA
#include <TNL/Devices/Cuda.h>
typedef struct __attribute__((__packed__)) {
char name[MPI_MAX_PROCESSOR_NAME];
#include <TNL/Communicators/MpiDefs.h>
#include <TNL/Exceptions/MPISupportMissing.h>
Tomáš Oberhuber
committed
#include <TNL/Exceptions/MPIDimsCreateError.h>
#include <TNL/Communicators/MPITypeResolver.h>

Vít Hanousek
committed
namespace TNL {
namespace Communicators {
public: // TODO: this was private
/*inline static MPI_Datatype MPIDataType( const signed char* ) { return MPI_CHAR; };
inline static MPI_Datatype MPIDataType( const signed short int* ) { return MPI_SHORT; };
inline static MPI_Datatype MPIDataType( const signed int* ) { return MPI_INT; };
inline static MPI_Datatype MPIDataType( const signed long int* ) { return MPI_LONG; };
inline static MPI_Datatype MPIDataType( const unsigned char *) { return MPI_UNSIGNED_CHAR; };
inline static MPI_Datatype MPIDataType( const unsigned short int* ) { return MPI_UNSIGNED_SHORT; };
inline static MPI_Datatype MPIDataType( const unsigned int* ) { return MPI_UNSIGNED; };
inline static MPI_Datatype MPIDataType( const unsigned long int* ) { return MPI_UNSIGNED_LONG; };
inline static MPI_Datatype MPIDataType( const float* ) { return MPI_FLOAT; };
inline static MPI_Datatype MPIDataType( const double* ) { return MPI_DOUBLE; };
inline static MPI_Datatype MPIDataType( const long double* ) { return MPI_LONG_DOUBLE; };
// TODO: tested with MPI_LOR and MPI_LAND, but there should probably be unit tests for all operations
inline static MPI_Datatype MPIDataType( const bool* )
{
// sizeof(bool) is implementation-defined: https://stackoverflow.com/a/4897859
static_assert( sizeof(bool) == 1, "The programmer did not count with systems where sizeof(bool) != 1." );
return MPI_CHAR;
Tomáš Oberhuber
committed
using Request = MPI_Request;
using CommunicationGroup = MPI_Comm;
#else
using Request = int;
using CommunicationGroup = int;
#ifdef HAVE_MPI
return GetSize(AllGroup)>1;
#else
return false;
#endif
Hanousek Vít
committed
static void configSetup( Config::ConfigDescription& config, const String& prefix = "" )
{
Hanousek Vít
committed
#ifdef HAVE_MPI
config.addEntry< bool >( "redirect-mpi-output", "Only process with rank 0 prints to console. Other processes are redirected to files.", true );
config.addEntry< bool >( "mpi-gdb-debug", "Wait for GDB to attach the master MPI process.", false );
config.addEntry< int >( "mpi-process-to-attach", "Number of the MPI process to be attached by GDB. Set -1 for all processes.", 0 );
Hanousek Vít
committed
#endif
Hanousek Vít
committed
static bool setup( const Config::ParameterContainer& parameters,
const String& prefix = "" )
{
Hanousek Vít
committed
#ifdef HAVE_MPI

Vít Hanousek
committed
if(IsInitialized())//i.e. - isUsed
{
redirect = parameters.getParameter< bool >( "redirect-mpi-output" );
#ifdef HAVE_CUDA
Tomáš Oberhuber
committed
int size;
MPI_Comm_size( MPI_COMM_WORLD, &size );
if( size > 1 )
{
#if defined( MPIX_CUDA_AWARE_SUPPORT ) && MPIX_CUDA_AWARE_SUPPORT
std::cout << "CUDA-aware MPI detected on this system ... " << std::endl;
#elif defined( MPIX_CUDA_AWARE_SUPPORT ) && !MPIX_CUDA_AWARE_SUPPORT
std::cerr << "MPI is not CUDA-aware. Please install correct version of MPI." << std::endl;
return false;
Tomáš Oberhuber
committed
std::cerr << "WARNING: TNL cannot detect if you have CUDA-aware MPI. Some problems may occur." << std::endl;
Tomáš Oberhuber
committed
}
#endif // HAVE_CUDA
bool gdbDebug = parameters.getParameter< bool >( "mpi-gdb-debug" );
int processToAttach = parameters.getParameter< int >( "mpi-process-to-attach" );
if( gdbDebug )
{
int rank = GetRank( MPI_COMM_WORLD );
int pid = getpid();
volatile int tnlMPIDebugAttached = 0;
MPI_Send( &pid, 1, MPI_INT, 0, 0, MPI_COMM_WORLD );
MPI_Barrier( MPI_COMM_WORLD );
if( rank == 0 )
{
std::cout << "Attach GDB to MPI process(es) by entering:" << std::endl;
for( int i = 0; i < GetSize( MPI_COMM_WORLD ); i++ )
MPI_Status status;
int recvPid;
MPI_Recv( &recvPid, 1, MPI_INT, i, 0, MPI_COMM_WORLD, &status );
if( i == processToAttach || processToAttach == -1 )
{
std::cout << " For MPI process " << i << ": gdb -q -ex \"attach " << recvPid << "\""
<< " -ex \"set variable tnlMPIDebugAttached=1\""
<< " -ex \"finish\"" << std::endl;
}
std::cout << std::flush;
}
if( rank == processToAttach || processToAttach == -1 )
while( ! tnlMPIDebugAttached );
MPI_Barrier( MPI_COMM_WORLD );
}
#endif // HAVE_MPI
static void Init(int& argc, char**& argv )
Hanousek Vít
committed
#ifdef HAVE_MPI
MPI_Init( &argc, &argv );

Vít Hanousek
committed
selectGPU();
Hanousek Vít
committed
#endif
Hanousek Vít
committed
static void setRedirection( bool redirect_ )
{
redirect = redirect_;
}
Hanousek Vít
committed
#ifdef HAVE_MPI
{
//redirect all stdout to files, only 0 take to go to console
backup=std::cout.rdbuf();
//redirect output to files...
if(GetRank(AllGroup)!=0)
std::cout << GetRank(AllGroup) << ": Redirecting std::cout to file" << std::endl;
const String stdoutFile = String("./stdout-") + convertToString(GetRank(AllGroup)) + String(".txt");
filestr.open(stdoutFile.getString());
psbuf = filestr.rdbuf();
#else
throw Exceptions::MPISupportMissing();

Vít Hanousek
committed
#endif

Vít Hanousek
committed
#ifdef HAVE_MPI
if(GetRank(AllGroup)!=0)
Hanousek Vít
committed
{
Hanousek Vít
committed
filestr.close();
MPI_Finalize();
Hanousek Vít
committed
#ifdef HAVE_MPI
int initialized, finalized;
MPI_Initialized(&initialized);
MPI_Finalized(&finalized);
return initialized && !finalized;
throw Exceptions::MPISupportMissing();
static int GetRank(CommunicationGroup group = AllGroup )
TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
Jakub Klinkovský
committed
TNL_ASSERT_NE(group, NullGroup, "GetRank cannot be called with NullGroup");
int rank;
MPI_Comm_rank(group,&rank);
return rank;
throw Exceptions::MPISupportMissing();
static int GetSize(CommunicationGroup group = AllGroup )
Jakub Klinkovský
committed
TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
TNL_ASSERT_NE(group, NullGroup, "GetSize cannot be called with NullGroup");
int size;
MPI_Comm_size(group,&size);
return size;
throw Exceptions::MPISupportMissing();
#ifdef HAVE_MPI
template< typename T >
static MPI_Datatype getDataType( const T& t )
{
return MPITypeResolver< T >::getType();
#endif
//dim-number of dimensions, distr array of guess distr - 0 for computation
//distr array will be filled by computed distribution
//more information in MPI documentation
static void DimsCreate(int nproc, int dim, int *distr)
{
Tomáš Oberhuber
committed
int sum = 0, prod = 1;
for( int i = 0;i < dim; i++ )
{
sum += distr[ i ];
prod *= distr[ i ];
}
if( prod != 0 && prod != GetSize( AllGroup ) )
throw Exceptions::MPIDimsCreateError();
{
for(int i=0;i<dim-1;i++)
{
distr[i]=1;
}
distr[dim-1]=0;
Hanousek Vít
committed
}
MPI_Dims_create(nproc, dim, distr);
#else
throw Exceptions::MPISupportMissing();
static void Barrier( CommunicationGroup group = AllGroup )
TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
Jakub Klinkovský
committed
TNL_ASSERT_NE(group, NullGroup, "Barrier cannot be called with NullGroup");
MPI_Barrier(group);
throw Exceptions::MPISupportMissing();
Hanousek Vít
committed
#endif
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
static void Send( const T* data, int count, int dest, int tag, CommunicationGroup group = AllGroup )
{
#ifdef HAVE_MPI
TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
TNL_ASSERT_NE(group, NullGroup, "Send cannot be called with NullGroup");
MPI_Send( const_cast< void* >( ( const void* ) data ), count, MPITypeResolver< T >::getType(), dest, tag, group );
#else
throw Exceptions::MPISupportMissing();
#endif
}
template <typename T>
static void Recv( T* data, int count, int src, int tag, CommunicationGroup group = AllGroup )
{
#ifdef HAVE_MPI
TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
TNL_ASSERT_NE(group, NullGroup, "Recv cannot be called with NullGroup");
MPI_Status status;
MPI_Recv( const_cast< void* >( ( const void* ) data ), count, MPITypeResolver< T >::getType() , src, tag, group, &status );
#else
throw Exceptions::MPISupportMissing();
#endif
}
template <typename T>
static Request ISend( const T* data, int count, int dest, int tag, CommunicationGroup group = AllGroup )
TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
Jakub Klinkovský
committed
TNL_ASSERT_NE(group, NullGroup, "ISend cannot be called with NullGroup");
Request req;
MPI_Isend( const_cast< void* >( ( const void* ) data ), count, MPITypeResolver< T >::getType(), dest, tag, group, &req);
return req;
throw Exceptions::MPISupportMissing();
Hanousek Vít
committed
#endif
}
static Request IRecv( T* data, int count, int src, int tag, CommunicationGroup group = AllGroup )
TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
Jakub Klinkovský
committed
TNL_ASSERT_NE(group, NullGroup, "IRecv cannot be called with NullGroup");
Request req;
MPI_Irecv( const_cast< void* >( ( const void* ) data ), count, MPITypeResolver< T >::getType() , src, tag, group, &req);
return req;
throw Exceptions::MPISupportMissing();
Hanousek Vít
committed
#endif

Vít Hanousek
committed
}
static void WaitAll(Request *reqs, int length)
{
TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
MPI_Waitall(length, reqs, MPI_STATUSES_IGNORE);
throw Exceptions::MPISupportMissing();
#endif
static void Bcast( T* data, int count, int root, CommunicationGroup group)
Jakub Klinkovský
committed
TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
TNL_ASSERT_NE(group, NullGroup, "BCast cannot be called with NullGroup");
MPI_Bcast((void*) data, count, MPITypeResolver< T >::getType(), root, group);
Jakub Klinkovský
committed
throw Exceptions::MPISupportMissing();
Hanousek Vít
committed
#endif

Vít Hanousek
committed
}
static void Allreduce( const T* data,
const MPI_Op &op,
CommunicationGroup group)
Jakub Klinkovský
committed
TNL_ASSERT_NE(group, NullGroup, "Allreduce cannot be called with NullGroup");
MPI_Allreduce( const_cast< void* >( ( void* ) data ), (void*) reduced_data,count,MPITypeResolver< T >::getType(),op,group);
#else
throw Exceptions::MPISupportMissing();
Hanousek Vít
committed
#endif
// in-place variant of Allreduce
template< typename T >
static void Allreduce( T* data,
int count,
const MPI_Op &op,
CommunicationGroup group)
{
#ifdef HAVE_MPI
TNL_ASSERT_NE(group, NullGroup, "Allreduce cannot be called with NullGroup");
MPI_Allreduce( MPI_IN_PLACE, (void*) data,count,MPITypeResolver< T >::getType(),op,group);
#else
throw Exceptions::MPISupportMissing();
#endif
}
static void Reduce( const T* data,
int count,
MPI_Op &op,
int root,
CommunicationGroup group)
Jakub Klinkovský
committed
TNL_ASSERT_NE(group, NullGroup, "Reduce cannot be called with NullGroup");
MPI_Reduce( const_cast< void* >( ( void*) data ), (void*) reduced_data,count,MPITypeResolver< T >::getType(),op,root,group);
#else
throw Exceptions::MPISupportMissing();
#endif
Tomáš Oberhuber
committed
template< typename T >
static void SendReceive( const T* sendData,
Tomáš Oberhuber
committed
int sendCount,
int destination,
int sendTag,
T* receiveData,
int receiveCount,
int source,
int receiveTag,
CommunicationGroup group )
{
#ifdef HAVE_MPI
Jakub Klinkovský
committed
TNL_ASSERT_NE(group, NullGroup, "SendReceive cannot be called with NullGroup");
Tomáš Oberhuber
committed
MPI_Status status;
MPI_Sendrecv( const_cast< void* >( ( void* ) sendData ),
Tomáš Oberhuber
committed
sendCount,
MPITypeResolver< T >::getType(),
Tomáš Oberhuber
committed
destination,
sendTag,
( void* ) receiveData,
receiveCount,
MPITypeResolver< T >::getType(),
Tomáš Oberhuber
committed
source,
receiveTag,
group,
&status );
#else
throw Exceptions::MPISupportMissing();
Tomáš Oberhuber
committed
}
template< typename T >
static void Alltoall( const T* sendData,
int sendCount,
T* receiveData,
int receiveCount,
CommunicationGroup group )
{
#ifdef HAVE_MPI
TNL_ASSERT_NE(group, NullGroup, "SendReceive cannot be called with NullGroup");
MPI_Alltoall( const_cast< void* >( ( void* ) sendData ),
sendCount,
MPITypeResolver< T >::getType(),
( void* ) receiveData,
receiveCount,
MPITypeResolver< T >::getType(),
group );
#else
throw Exceptions::MPISupportMissing();
#endif
}
static void writeProlog( Logger& logger )
logger.writeParameter( "MPI processes:", GetSize(AllGroup) );
Hanousek Vít
committed
static void CreateNewGroup( bool meToo, int myRank, CommunicationGroup &oldGroup, CommunicationGroup &newGroup )
{
#ifdef HAVE_MPI
if(meToo)
{
MPI_Comm_split(oldGroup, 1, myRank, &newGroup);
}
else
{
MPI_Comm_split(oldGroup, MPI_UNDEFINED, GetRank(oldGroup), &newGroup);
}
#else
throw Exceptions::MPISupportMissing();
Jakub Klinkovský
committed
#endif
static MPI_Request NullRequest;
static MPI_Comm AllGroup;
Jakub Klinkovský
committed
static MPI_Comm NullGroup;
static constexpr int NullRequest = -1;
static constexpr int AllGroup = 1;
static constexpr int NullGroup = 0;

Vít Hanousek
committed
private :
static std::streambuf* psbuf;
static std::streambuf* backup;
static std::ofstream filestr;
static bool redirect;

Vít Hanousek
committed
static void selectGPU(void)
{
const int count = GetSize(AllGroup);
const int rank = GetRank(AllGroup);
int gpuCount;
cudaGetDeviceCount(&gpuCount);

Vít Hanousek
committed
procName names[count];

Vít Hanousek
committed
int i=0;
int len;
MPI_Get_processor_name(names[rank].name, &len);

Vít Hanousek
committed
for(i=0;i<count;i++)
std::memcpy(names[i].name,names[rank].name,len+1);

Vít Hanousek
committed
MPI_Alltoall( (void*)names ,MPI_MAX_PROCESSOR_NAME,MPI_CHAR,
(void*)names,MPI_MAX_PROCESSOR_NAME,MPI_CHAR,
MPI_COMM_WORLD);

Vít Hanousek
committed
int nodeRank=0;
for(i=0;i<rank;i++)
{
if(std::strcmp(names[rank].name,names[i].name)==0)
nodeRank++;
}

Vít Hanousek
committed
const int gpuNumber = nodeRank % gpuCount;

Vít Hanousek
committed
cudaSetDevice(gpuNumber);
TNL_CHECK_CUDA_DEVICE;

Vít Hanousek
committed
//std::cout<<"Node: " << rank << " gpu: " << gpuNumber << std::endl;
Hanousek Vít
committed

Vít Hanousek
committed
#endif
}
Hanousek Vít
committed
#ifdef HAVE_MPI
MPI_Request MpiCommunicator::NullRequest = MPI_REQUEST_NULL;
MPI_Comm MpiCommunicator::AllGroup = MPI_COMM_WORLD;
MPI_Comm MpiCommunicator::NullGroup = MPI_COMM_NULL;
std::streambuf* MpiCommunicator::psbuf = nullptr;
std::streambuf* MpiCommunicator::backup = nullptr;
bool MpiCommunicator::redirect = true;
} // namespace <unnamed>
} // namespace Communicators