Loading src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h +2 −2 Original line number Diff line number Diff line Loading @@ -20,7 +20,7 @@ #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Communicators/MpiCommunicator.h> #include <TNL/Communicators/ScopedInitializer.h> #include <TNL/MPI/ScopedInitializer.h> #include <TNL/Containers/Partitioner.h> #include <TNL/Containers/DistributedVector.h> #include <TNL/Matrices/DistributedMatrix.h> Loading Loading @@ -309,7 +309,7 @@ main( int argc, char* argv[] ) configSetup( conf_desc ); Communicators::ScopedInitializer< CommunicatorType > scopedInit(argc, argv); TNL::MPI::ScopedInitializer mpi(argc, argv); const int rank = CommunicatorType::GetRank( CommunicatorType::AllGroup ); if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) Loading src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h +2 −2 Original line number Diff line number Diff line Loading @@ -25,7 +25,7 @@ #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Communicators/MpiCommunicator.h> #include <TNL/Communicators/ScopedInitializer.h> #include <TNL/MPI/ScopedInitializer.h> #include <TNL/Containers/Partitioner.h> #include <TNL/Containers/DistributedVector.h> #include <TNL/Matrices/DistributedMatrix.h> Loading Loading @@ -592,7 +592,7 @@ main( int argc, char* argv[] ) configSetup( conf_desc ); Communicators::ScopedInitializer< CommunicatorType > scopedInit(argc, argv); TNL::MPI::ScopedInitializer mpi(argc, argv); const int rank = CommunicatorType::GetRank( CommunicatorType::AllGroup ); if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) Loading src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h +5 −5 Original line number Diff line number Diff line Loading @@ -24,7 +24,7 @@ #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Communicators/MpiCommunicator.h> #include <TNL/Communicators/ScopedInitializer.h> #include <TNL/MPI/ScopedInitializer.h> #include <TNL/Solvers/ODE/Euler.h> #include <TNL/Solvers/ODE/Merson.h> Loading Loading @@ -225,7 +225,7 @@ main( int argc, char* argv[] ) configSetup( conf_desc ); Communicators::ScopedInitializer< CommunicatorType > scopedInit(argc, argv); TNL::MPI::ScopedInitializer mpi(argc, argv); const int rank = CommunicatorType::GetRank( CommunicatorType::AllGroup ); if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) Loading src/TNL/Communicators/MpiCommunicator.h +23 −250 Original line number Diff line number Diff line Loading @@ -11,36 +11,23 @@ #pragma once #include <iostream> #include <fstream> #include <cstring> #ifdef HAVE_MPI #include <mpi.h> #ifdef OMPI_MAJOR_VERSION // header specific to OpenMPI (needed for CUDA-aware detection) #include <mpi-ext.h> #endif #include <unistd.h> // getpid #ifdef HAVE_CUDA #include <TNL/Cuda/CheckDevice.h> typedef struct __attribute__((__packed__)) { char name[MPI_MAX_PROCESSOR_NAME]; } procName; #endif #endif #include <TNL/String.h> #include <TNL/Logger.h> #include <TNL/Debugging/OutputRedirection.h> #include <TNL/Communicators/MpiDefs.h> #include <TNL/MPI/Wrappers.h> #include <TNL/MPI/DummyDefs.h> #include <TNL/MPI/Utils.h> #include <TNL/Config/ConfigDescription.h> #include <TNL/Exceptions/MPISupportMissing.h> #include <TNL/Exceptions/MPIDimsCreateError.h> #include <TNL/Communicators/MPITypeResolver.h> namespace TNL { Loading Loading @@ -88,7 +75,7 @@ class MpiCommunicator const bool redirect = parameters.getParameter< bool >( "redirect-mpi-output" ); const String outputDirectory = parameters.getParameter< String >( "redirect-mpi-output-dir" ); if( redirect ) setupRedirection( outputDirectory ); MPI::setupRedirection( outputDirectory ); #ifdef HAVE_CUDA int size; MPI_Comm_size( MPI_COMM_WORLD, &size ); Loading Loading @@ -144,125 +131,32 @@ class MpiCommunicator static void Init( int& argc, char**& argv, int required_thread_level = MPI_THREAD_SINGLE ) { #ifdef HAVE_MPI switch( required_thread_level ) { case MPI_THREAD_SINGLE: case MPI_THREAD_FUNNELED: case MPI_THREAD_SERIALIZED: case MPI_THREAD_MULTIPLE: break; default: printf("ERROR: invalid argument for the 'required' thread level support: %d\n", required_thread_level); MPI_Abort(MPI_COMM_WORLD, 1); } int provided; MPI_Init_thread( &argc, &argv, required_thread_level, &provided ); if( provided < required_thread_level ) { const char* level = ""; switch( required_thread_level ) { case MPI_THREAD_SINGLE: level = "MPI_THREAD_SINGLE"; break; case MPI_THREAD_FUNNELED: level = "MPI_THREAD_FUNNELED"; break; case MPI_THREAD_SERIALIZED: level = "MPI_THREAD_SERIALIZED"; break; case MPI_THREAD_MULTIPLE: level = "MPI_THREAD_MULTIPLE"; break; } printf("ERROR: The MPI library does not have the required level of thread support: %s\n", level); MPI_Abort(MPI_COMM_WORLD, 1); } selectGPU(); #endif MPI::Init( argc, argv, required_thread_level ); // silence warnings about (potentially) unused variables (void) NullGroup; (void) NullRequest; } static void setupRedirection( std::string outputDirectory ) { #ifdef HAVE_MPI if(isDistributed() ) { if(GetRank(AllGroup)!=0) { const std::string stdoutFile = outputDirectory + "/stdout_" + std::to_string(GetRank(AllGroup)) + ".txt"; const std::string stderrFile = outputDirectory + "/stderr_" + std::to_string(GetRank(AllGroup)) + ".txt"; std::cout << GetRank(AllGroup) << ": Redirecting stdout and stderr to files " << stdoutFile << " and " << stderrFile << std::endl; Debugging::redirect_stdout_stderr( stdoutFile, stderrFile ); } } #endif } static void Finalize() { #ifdef HAVE_MPI if(isDistributed()) { if(GetRank(AllGroup)!=0) { // restore redirection (not necessary, it uses RAII internally...) Debugging::redirect_stdout_stderr( "", "", true ); } } MPI_Finalize(); #endif MPI::Finalize(); } static bool IsInitialized() { #ifdef HAVE_MPI int initialized, finalized; MPI_Initialized(&initialized); MPI_Finalized(&finalized); return initialized && !finalized; #else return true; #endif return MPI::isInitialized(); } static int GetRank(CommunicationGroup group = AllGroup ) { #ifdef HAVE_MPI TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); TNL_ASSERT_NE(group, NullGroup, "GetRank cannot be called with NullGroup"); int rank; MPI_Comm_rank(group,&rank); return rank; #else return 0; #endif return MPI::GetRank( group ); } static int GetSize(CommunicationGroup group = AllGroup ) { #ifdef HAVE_MPI TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); TNL_ASSERT_NE(group, NullGroup, "GetSize cannot be called with NullGroup"); int size; MPI_Comm_size(group,&size); return size; #else return 1; #endif return MPI::GetSize( group ); } #ifdef HAVE_MPI template< typename T > static MPI_Datatype getDataType( const T& t ) { return MPITypeResolver< T >::getType(); } #endif //dim-number of dimensions, distr array of guess distr - 0 for computation //distr array will be filled by computed distribution //more information in MPI documentation Loading Loading @@ -291,78 +185,42 @@ class MpiCommunicator static void Barrier( CommunicationGroup group = AllGroup ) { #ifdef HAVE_MPI TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); TNL_ASSERT_NE(group, NullGroup, "Barrier cannot be called with NullGroup"); MPI_Barrier(group); #endif MPI::Barrier( group ); } template <typename T> static void Send( const T* data, int count, int dest, int tag, CommunicationGroup group = AllGroup ) { #ifdef HAVE_MPI TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); TNL_ASSERT_NE(group, NullGroup, "Send cannot be called with NullGroup"); MPI_Send( const_cast< void* >( ( const void* ) data ), count, MPITypeResolver< T >::getType(), dest, tag, group ); #endif MPI::Send( data, count, dest, tag, group ); } template <typename T> static void Recv( T* data, int count, int src, int tag, CommunicationGroup group = AllGroup ) { #ifdef HAVE_MPI TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); TNL_ASSERT_NE(group, NullGroup, "Recv cannot be called with NullGroup"); MPI_Status status; MPI_Recv( const_cast< void* >( ( const void* ) data ), count, MPITypeResolver< T >::getType() , src, tag, group, &status ); #endif MPI::Recv( data, count, src, tag, group ); } template <typename T> static Request ISend( const T* data, int count, int dest, int tag, CommunicationGroup group = AllGroup ) { #ifdef HAVE_MPI TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); TNL_ASSERT_NE(group, NullGroup, "ISend cannot be called with NullGroup"); Request req; MPI_Isend( const_cast< void* >( ( const void* ) data ), count, MPITypeResolver< T >::getType(), dest, tag, group, &req); return req; #else return 1; #endif return MPI::Isend( data, count, dest, tag, group ); } template <typename T> static Request IRecv( T* data, int count, int src, int tag, CommunicationGroup group = AllGroup ) { #ifdef HAVE_MPI TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); TNL_ASSERT_NE(group, NullGroup, "IRecv cannot be called with NullGroup"); Request req; MPI_Irecv( const_cast< void* >( ( const void* ) data ), count, MPITypeResolver< T >::getType() , src, tag, group, &req); return req; #else return 1; #endif return MPI::Irecv( data, count, src, tag, group ); } static void WaitAll(Request *reqs, int length) { #ifdef HAVE_MPI TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); MPI_Waitall(length, reqs, MPI_STATUSES_IGNORE); #endif MPI::Waitall( reqs, length ); } template< typename T > static void Bcast( T* data, int count, int root, CommunicationGroup group) { #ifdef HAVE_MPI TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); TNL_ASSERT_NE(group, NullGroup, "BCast cannot be called with NullGroup"); MPI_Bcast((void*) data, count, MPITypeResolver< T >::getType(), root, group); #endif MPI::Bcast( data, count, root, group ); } template< typename T > Loading @@ -372,12 +230,7 @@ class MpiCommunicator const MPI_Op &op, CommunicationGroup group) { #ifdef HAVE_MPI TNL_ASSERT_NE(group, NullGroup, "Allreduce cannot be called with NullGroup"); MPI_Allreduce( const_cast< void* >( ( void* ) data ), (void*) reduced_data,count,MPITypeResolver< T >::getType(),op,group); #else memcpy( ( void* ) reduced_data, ( const void* ) data, count * sizeof( T ) ); #endif MPI::Allreduce( data, reduced_data, count, op, group ); } // in-place variant of Allreduce Loading @@ -387,27 +240,18 @@ class MpiCommunicator const MPI_Op &op, CommunicationGroup group) { #ifdef HAVE_MPI TNL_ASSERT_NE(group, NullGroup, "Allreduce cannot be called with NullGroup"); MPI_Allreduce( MPI_IN_PLACE, (void*) data,count,MPITypeResolver< T >::getType(),op,group); #endif MPI::Allreduce( data, count, op, group ); } template< typename T > static void Reduce( const T* data, T* reduced_data, int count, MPI_Op &op, const MPI_Op &op, int root, CommunicationGroup group) { #ifdef HAVE_MPI TNL_ASSERT_NE(group, NullGroup, "Reduce cannot be called with NullGroup"); MPI_Reduce( const_cast< void* >( ( void*) data ), (void*) reduced_data,count,MPITypeResolver< T >::getType(),op,root,group); #else memcpy( ( void* ) reduced_data, ( void* ) data, count * sizeof( T ) ); #endif MPI::Reduce( data, reduced_data, count, op, root, group ); } template< typename T > Loading @@ -421,24 +265,7 @@ class MpiCommunicator int receiveTag, CommunicationGroup group ) { #ifdef HAVE_MPI TNL_ASSERT_NE(group, NullGroup, "SendReceive cannot be called with NullGroup"); MPI_Status status; MPI_Sendrecv( const_cast< void* >( ( void* ) sendData ), sendCount, MPITypeResolver< T >::getType(), destination, sendTag, ( void* ) receiveData, receiveCount, MPITypeResolver< T >::getType(), source, receiveTag, group, &status ); #else throw Exceptions::MPISupportMissing(); #endif MPI::Sendrecv( sendData, sendCount, destination, sendTag, receiveData, receiveCount, source, receiveTag, group ); } template< typename T > Loading @@ -448,19 +275,7 @@ class MpiCommunicator int receiveCount, CommunicationGroup group ) { #ifdef HAVE_MPI TNL_ASSERT_NE(group, NullGroup, "SendReceive cannot be called with NullGroup"); MPI_Alltoall( const_cast< void* >( ( void* ) sendData ), sendCount, MPITypeResolver< T >::getType(), ( void* ) receiveData, receiveCount, MPITypeResolver< T >::getType(), group ); #else TNL_ASSERT_EQ( sendCount, receiveCount, "sendCount must be equal to receiveCount when running without MPI." ); memcpy( (void*) receiveData, (const void*) sendData, sendCount * sizeof( T ) ); #endif MPI::Alltoall( sendData, sendCount, receiveData, receiveCount, group ); } Loading @@ -485,58 +300,16 @@ class MpiCommunicator } #ifdef HAVE_MPI static MPI_Request NullRequest; static MPI_Comm AllGroup; static MPI_Comm NullGroup; #else static constexpr int NullRequest = -1; static constexpr int AllGroup = 1; static constexpr int NullGroup = 0; #endif private: static void selectGPU(void) { #ifdef HAVE_MPI #ifdef HAVE_CUDA const int count = GetSize(AllGroup); const int rank = GetRank(AllGroup); int gpuCount; cudaGetDeviceCount(&gpuCount); procName names[count]; int i=0; int len; MPI_Get_processor_name(names[rank].name, &len); for(i=0;i<count;i++) std::memcpy(names[i].name,names[rank].name,len+1); MPI_Alltoall( (void*)names ,MPI_MAX_PROCESSOR_NAME,MPI_CHAR, (void*)names,MPI_MAX_PROCESSOR_NAME,MPI_CHAR, MPI_COMM_WORLD); int nodeRank=0; for(i=0;i<rank;i++) { if(std::strcmp(names[rank].name,names[i].name)==0) nodeRank++; } const int gpuNumber = nodeRank % gpuCount; cudaSetDevice(gpuNumber); TNL_CHECK_CUDA_DEVICE; //std::cout<<"Node: " << rank << " gpu: " << gpuNumber << std::endl; #endif #endif } }; #ifdef HAVE_MPI MPI_Request MpiCommunicator::NullRequest = MPI_REQUEST_NULL; MPI_Comm MpiCommunicator::AllGroup = MPI_COMM_WORLD; MPI_Comm MpiCommunicator::NullGroup = MPI_COMM_NULL; #endif Loading src/TNL/Containers/DistributedArray.hpp +0 −1 Original line number Diff line number Diff line Loading @@ -15,7 +15,6 @@ #include "DistributedArray.h" #include <TNL/Algorithms/ParallelFor.h> #include <TNL/Communicators/MpiDefs.h> // important only when MPI is disabled namespace TNL { namespace Containers { Loading Loading
src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h +2 −2 Original line number Diff line number Diff line Loading @@ -20,7 +20,7 @@ #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Communicators/MpiCommunicator.h> #include <TNL/Communicators/ScopedInitializer.h> #include <TNL/MPI/ScopedInitializer.h> #include <TNL/Containers/Partitioner.h> #include <TNL/Containers/DistributedVector.h> #include <TNL/Matrices/DistributedMatrix.h> Loading Loading @@ -309,7 +309,7 @@ main( int argc, char* argv[] ) configSetup( conf_desc ); Communicators::ScopedInitializer< CommunicatorType > scopedInit(argc, argv); TNL::MPI::ScopedInitializer mpi(argc, argv); const int rank = CommunicatorType::GetRank( CommunicatorType::AllGroup ); if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) Loading
src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h +2 −2 Original line number Diff line number Diff line Loading @@ -25,7 +25,7 @@ #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Communicators/MpiCommunicator.h> #include <TNL/Communicators/ScopedInitializer.h> #include <TNL/MPI/ScopedInitializer.h> #include <TNL/Containers/Partitioner.h> #include <TNL/Containers/DistributedVector.h> #include <TNL/Matrices/DistributedMatrix.h> Loading Loading @@ -592,7 +592,7 @@ main( int argc, char* argv[] ) configSetup( conf_desc ); Communicators::ScopedInitializer< CommunicatorType > scopedInit(argc, argv); TNL::MPI::ScopedInitializer mpi(argc, argv); const int rank = CommunicatorType::GetRank( CommunicatorType::AllGroup ); if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) Loading
src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h +5 −5 Original line number Diff line number Diff line Loading @@ -24,7 +24,7 @@ #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Communicators/MpiCommunicator.h> #include <TNL/Communicators/ScopedInitializer.h> #include <TNL/MPI/ScopedInitializer.h> #include <TNL/Solvers/ODE/Euler.h> #include <TNL/Solvers/ODE/Merson.h> Loading Loading @@ -225,7 +225,7 @@ main( int argc, char* argv[] ) configSetup( conf_desc ); Communicators::ScopedInitializer< CommunicatorType > scopedInit(argc, argv); TNL::MPI::ScopedInitializer mpi(argc, argv); const int rank = CommunicatorType::GetRank( CommunicatorType::AllGroup ); if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) Loading
src/TNL/Communicators/MpiCommunicator.h +23 −250 Original line number Diff line number Diff line Loading @@ -11,36 +11,23 @@ #pragma once #include <iostream> #include <fstream> #include <cstring> #ifdef HAVE_MPI #include <mpi.h> #ifdef OMPI_MAJOR_VERSION // header specific to OpenMPI (needed for CUDA-aware detection) #include <mpi-ext.h> #endif #include <unistd.h> // getpid #ifdef HAVE_CUDA #include <TNL/Cuda/CheckDevice.h> typedef struct __attribute__((__packed__)) { char name[MPI_MAX_PROCESSOR_NAME]; } procName; #endif #endif #include <TNL/String.h> #include <TNL/Logger.h> #include <TNL/Debugging/OutputRedirection.h> #include <TNL/Communicators/MpiDefs.h> #include <TNL/MPI/Wrappers.h> #include <TNL/MPI/DummyDefs.h> #include <TNL/MPI/Utils.h> #include <TNL/Config/ConfigDescription.h> #include <TNL/Exceptions/MPISupportMissing.h> #include <TNL/Exceptions/MPIDimsCreateError.h> #include <TNL/Communicators/MPITypeResolver.h> namespace TNL { Loading Loading @@ -88,7 +75,7 @@ class MpiCommunicator const bool redirect = parameters.getParameter< bool >( "redirect-mpi-output" ); const String outputDirectory = parameters.getParameter< String >( "redirect-mpi-output-dir" ); if( redirect ) setupRedirection( outputDirectory ); MPI::setupRedirection( outputDirectory ); #ifdef HAVE_CUDA int size; MPI_Comm_size( MPI_COMM_WORLD, &size ); Loading Loading @@ -144,125 +131,32 @@ class MpiCommunicator static void Init( int& argc, char**& argv, int required_thread_level = MPI_THREAD_SINGLE ) { #ifdef HAVE_MPI switch( required_thread_level ) { case MPI_THREAD_SINGLE: case MPI_THREAD_FUNNELED: case MPI_THREAD_SERIALIZED: case MPI_THREAD_MULTIPLE: break; default: printf("ERROR: invalid argument for the 'required' thread level support: %d\n", required_thread_level); MPI_Abort(MPI_COMM_WORLD, 1); } int provided; MPI_Init_thread( &argc, &argv, required_thread_level, &provided ); if( provided < required_thread_level ) { const char* level = ""; switch( required_thread_level ) { case MPI_THREAD_SINGLE: level = "MPI_THREAD_SINGLE"; break; case MPI_THREAD_FUNNELED: level = "MPI_THREAD_FUNNELED"; break; case MPI_THREAD_SERIALIZED: level = "MPI_THREAD_SERIALIZED"; break; case MPI_THREAD_MULTIPLE: level = "MPI_THREAD_MULTIPLE"; break; } printf("ERROR: The MPI library does not have the required level of thread support: %s\n", level); MPI_Abort(MPI_COMM_WORLD, 1); } selectGPU(); #endif MPI::Init( argc, argv, required_thread_level ); // silence warnings about (potentially) unused variables (void) NullGroup; (void) NullRequest; } static void setupRedirection( std::string outputDirectory ) { #ifdef HAVE_MPI if(isDistributed() ) { if(GetRank(AllGroup)!=0) { const std::string stdoutFile = outputDirectory + "/stdout_" + std::to_string(GetRank(AllGroup)) + ".txt"; const std::string stderrFile = outputDirectory + "/stderr_" + std::to_string(GetRank(AllGroup)) + ".txt"; std::cout << GetRank(AllGroup) << ": Redirecting stdout and stderr to files " << stdoutFile << " and " << stderrFile << std::endl; Debugging::redirect_stdout_stderr( stdoutFile, stderrFile ); } } #endif } static void Finalize() { #ifdef HAVE_MPI if(isDistributed()) { if(GetRank(AllGroup)!=0) { // restore redirection (not necessary, it uses RAII internally...) Debugging::redirect_stdout_stderr( "", "", true ); } } MPI_Finalize(); #endif MPI::Finalize(); } static bool IsInitialized() { #ifdef HAVE_MPI int initialized, finalized; MPI_Initialized(&initialized); MPI_Finalized(&finalized); return initialized && !finalized; #else return true; #endif return MPI::isInitialized(); } static int GetRank(CommunicationGroup group = AllGroup ) { #ifdef HAVE_MPI TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); TNL_ASSERT_NE(group, NullGroup, "GetRank cannot be called with NullGroup"); int rank; MPI_Comm_rank(group,&rank); return rank; #else return 0; #endif return MPI::GetRank( group ); } static int GetSize(CommunicationGroup group = AllGroup ) { #ifdef HAVE_MPI TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); TNL_ASSERT_NE(group, NullGroup, "GetSize cannot be called with NullGroup"); int size; MPI_Comm_size(group,&size); return size; #else return 1; #endif return MPI::GetSize( group ); } #ifdef HAVE_MPI template< typename T > static MPI_Datatype getDataType( const T& t ) { return MPITypeResolver< T >::getType(); } #endif //dim-number of dimensions, distr array of guess distr - 0 for computation //distr array will be filled by computed distribution //more information in MPI documentation Loading Loading @@ -291,78 +185,42 @@ class MpiCommunicator static void Barrier( CommunicationGroup group = AllGroup ) { #ifdef HAVE_MPI TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); TNL_ASSERT_NE(group, NullGroup, "Barrier cannot be called with NullGroup"); MPI_Barrier(group); #endif MPI::Barrier( group ); } template <typename T> static void Send( const T* data, int count, int dest, int tag, CommunicationGroup group = AllGroup ) { #ifdef HAVE_MPI TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); TNL_ASSERT_NE(group, NullGroup, "Send cannot be called with NullGroup"); MPI_Send( const_cast< void* >( ( const void* ) data ), count, MPITypeResolver< T >::getType(), dest, tag, group ); #endif MPI::Send( data, count, dest, tag, group ); } template <typename T> static void Recv( T* data, int count, int src, int tag, CommunicationGroup group = AllGroup ) { #ifdef HAVE_MPI TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); TNL_ASSERT_NE(group, NullGroup, "Recv cannot be called with NullGroup"); MPI_Status status; MPI_Recv( const_cast< void* >( ( const void* ) data ), count, MPITypeResolver< T >::getType() , src, tag, group, &status ); #endif MPI::Recv( data, count, src, tag, group ); } template <typename T> static Request ISend( const T* data, int count, int dest, int tag, CommunicationGroup group = AllGroup ) { #ifdef HAVE_MPI TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); TNL_ASSERT_NE(group, NullGroup, "ISend cannot be called with NullGroup"); Request req; MPI_Isend( const_cast< void* >( ( const void* ) data ), count, MPITypeResolver< T >::getType(), dest, tag, group, &req); return req; #else return 1; #endif return MPI::Isend( data, count, dest, tag, group ); } template <typename T> static Request IRecv( T* data, int count, int src, int tag, CommunicationGroup group = AllGroup ) { #ifdef HAVE_MPI TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); TNL_ASSERT_NE(group, NullGroup, "IRecv cannot be called with NullGroup"); Request req; MPI_Irecv( const_cast< void* >( ( const void* ) data ), count, MPITypeResolver< T >::getType() , src, tag, group, &req); return req; #else return 1; #endif return MPI::Irecv( data, count, src, tag, group ); } static void WaitAll(Request *reqs, int length) { #ifdef HAVE_MPI TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); MPI_Waitall(length, reqs, MPI_STATUSES_IGNORE); #endif MPI::Waitall( reqs, length ); } template< typename T > static void Bcast( T* data, int count, int root, CommunicationGroup group) { #ifdef HAVE_MPI TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); TNL_ASSERT_NE(group, NullGroup, "BCast cannot be called with NullGroup"); MPI_Bcast((void*) data, count, MPITypeResolver< T >::getType(), root, group); #endif MPI::Bcast( data, count, root, group ); } template< typename T > Loading @@ -372,12 +230,7 @@ class MpiCommunicator const MPI_Op &op, CommunicationGroup group) { #ifdef HAVE_MPI TNL_ASSERT_NE(group, NullGroup, "Allreduce cannot be called with NullGroup"); MPI_Allreduce( const_cast< void* >( ( void* ) data ), (void*) reduced_data,count,MPITypeResolver< T >::getType(),op,group); #else memcpy( ( void* ) reduced_data, ( const void* ) data, count * sizeof( T ) ); #endif MPI::Allreduce( data, reduced_data, count, op, group ); } // in-place variant of Allreduce Loading @@ -387,27 +240,18 @@ class MpiCommunicator const MPI_Op &op, CommunicationGroup group) { #ifdef HAVE_MPI TNL_ASSERT_NE(group, NullGroup, "Allreduce cannot be called with NullGroup"); MPI_Allreduce( MPI_IN_PLACE, (void*) data,count,MPITypeResolver< T >::getType(),op,group); #endif MPI::Allreduce( data, count, op, group ); } template< typename T > static void Reduce( const T* data, T* reduced_data, int count, MPI_Op &op, const MPI_Op &op, int root, CommunicationGroup group) { #ifdef HAVE_MPI TNL_ASSERT_NE(group, NullGroup, "Reduce cannot be called with NullGroup"); MPI_Reduce( const_cast< void* >( ( void*) data ), (void*) reduced_data,count,MPITypeResolver< T >::getType(),op,root,group); #else memcpy( ( void* ) reduced_data, ( void* ) data, count * sizeof( T ) ); #endif MPI::Reduce( data, reduced_data, count, op, root, group ); } template< typename T > Loading @@ -421,24 +265,7 @@ class MpiCommunicator int receiveTag, CommunicationGroup group ) { #ifdef HAVE_MPI TNL_ASSERT_NE(group, NullGroup, "SendReceive cannot be called with NullGroup"); MPI_Status status; MPI_Sendrecv( const_cast< void* >( ( void* ) sendData ), sendCount, MPITypeResolver< T >::getType(), destination, sendTag, ( void* ) receiveData, receiveCount, MPITypeResolver< T >::getType(), source, receiveTag, group, &status ); #else throw Exceptions::MPISupportMissing(); #endif MPI::Sendrecv( sendData, sendCount, destination, sendTag, receiveData, receiveCount, source, receiveTag, group ); } template< typename T > Loading @@ -448,19 +275,7 @@ class MpiCommunicator int receiveCount, CommunicationGroup group ) { #ifdef HAVE_MPI TNL_ASSERT_NE(group, NullGroup, "SendReceive cannot be called with NullGroup"); MPI_Alltoall( const_cast< void* >( ( void* ) sendData ), sendCount, MPITypeResolver< T >::getType(), ( void* ) receiveData, receiveCount, MPITypeResolver< T >::getType(), group ); #else TNL_ASSERT_EQ( sendCount, receiveCount, "sendCount must be equal to receiveCount when running without MPI." ); memcpy( (void*) receiveData, (const void*) sendData, sendCount * sizeof( T ) ); #endif MPI::Alltoall( sendData, sendCount, receiveData, receiveCount, group ); } Loading @@ -485,58 +300,16 @@ class MpiCommunicator } #ifdef HAVE_MPI static MPI_Request NullRequest; static MPI_Comm AllGroup; static MPI_Comm NullGroup; #else static constexpr int NullRequest = -1; static constexpr int AllGroup = 1; static constexpr int NullGroup = 0; #endif private: static void selectGPU(void) { #ifdef HAVE_MPI #ifdef HAVE_CUDA const int count = GetSize(AllGroup); const int rank = GetRank(AllGroup); int gpuCount; cudaGetDeviceCount(&gpuCount); procName names[count]; int i=0; int len; MPI_Get_processor_name(names[rank].name, &len); for(i=0;i<count;i++) std::memcpy(names[i].name,names[rank].name,len+1); MPI_Alltoall( (void*)names ,MPI_MAX_PROCESSOR_NAME,MPI_CHAR, (void*)names,MPI_MAX_PROCESSOR_NAME,MPI_CHAR, MPI_COMM_WORLD); int nodeRank=0; for(i=0;i<rank;i++) { if(std::strcmp(names[rank].name,names[i].name)==0) nodeRank++; } const int gpuNumber = nodeRank % gpuCount; cudaSetDevice(gpuNumber); TNL_CHECK_CUDA_DEVICE; //std::cout<<"Node: " << rank << " gpu: " << gpuNumber << std::endl; #endif #endif } }; #ifdef HAVE_MPI MPI_Request MpiCommunicator::NullRequest = MPI_REQUEST_NULL; MPI_Comm MpiCommunicator::AllGroup = MPI_COMM_WORLD; MPI_Comm MpiCommunicator::NullGroup = MPI_COMM_NULL; #endif Loading
src/TNL/Containers/DistributedArray.hpp +0 −1 Original line number Diff line number Diff line Loading @@ -15,7 +15,6 @@ #include "DistributedArray.h" #include <TNL/Algorithms/ParallelFor.h> #include <TNL/Communicators/MpiDefs.h> // important only when MPI is disabled namespace TNL { namespace Containers { Loading