diff --git a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h index 74a3205d35f566e84e5871c0380c57f564b5a66b..abe08210d0f7e261dffeb6abf18add392fe508bb 100644 --- a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h +++ b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h @@ -20,7 +20,7 @@ #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Communicators/MpiCommunicator.h> -#include <TNL/Communicators/ScopedInitializer.h> +#include <TNL/MPI/ScopedInitializer.h> #include <TNL/Containers/Partitioner.h> #include <TNL/Containers/DistributedVector.h> #include <TNL/Matrices/DistributedMatrix.h> @@ -309,7 +309,7 @@ main( int argc, char* argv[] ) configSetup( conf_desc ); - Communicators::ScopedInitializer< CommunicatorType > scopedInit(argc, argv); + TNL::MPI::ScopedInitializer mpi(argc, argv); const int rank = CommunicatorType::GetRank( CommunicatorType::AllGroup ); if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) diff --git a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h index 06ba2bc946853450285d399cf98db50885d09f43..75b1e0e25829d79f787a9580c63462bbccf130df 100644 --- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h +++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h @@ -25,7 +25,7 @@ #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Communicators/MpiCommunicator.h> -#include <TNL/Communicators/ScopedInitializer.h> +#include <TNL/MPI/ScopedInitializer.h> #include <TNL/Containers/Partitioner.h> #include <TNL/Containers/DistributedVector.h> #include <TNL/Matrices/DistributedMatrix.h> @@ -592,7 +592,7 @@ main( int argc, char* argv[] ) configSetup( conf_desc ); - Communicators::ScopedInitializer< CommunicatorType > scopedInit(argc, argv); + TNL::MPI::ScopedInitializer mpi(argc, argv); const int rank = CommunicatorType::GetRank( CommunicatorType::AllGroup ); if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) diff --git a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h index aa4370c7ac358514584c5311c94ff7c3117c6f05..fcaaaedf21911a3080c1cecfbbe5f3fae2cce739 100644 --- a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h +++ b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h @@ -24,7 +24,7 @@ #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Communicators/MpiCommunicator.h> -#include <TNL/Communicators/ScopedInitializer.h> +#include <TNL/MPI/ScopedInitializer.h> #include <TNL/Solvers/ODE/Euler.h> #include <TNL/Solvers/ODE/Merson.h> @@ -63,7 +63,7 @@ benchmarkODESolvers( Benchmark& benchmark, #ifdef HAVE_CUDA CudaVectorPointer cuda_u( dofs ); *cuda_u = 0.0; -#endif +#endif if( solver == "euler" || solver == "all" ) { using HostSolver = Solvers::ODE::Euler< HostProblem, SolverMonitorType >; benchmark.setOperation("Euler"); @@ -168,10 +168,10 @@ bool resolveRealTypes( Benchmark& benchmark, Config::ParameterContainer& parameters ) { const String& realType = parameters.getParameter< String >( "real-type" ); - if( ( realType == "float" || realType == "all" ) && + if( ( realType == "float" || realType == "all" ) && ! resolveIndexType< float >( benchmark, metadata, parameters ) ) return false; - if( ( realType == "double" || realType == "all" ) && + if( ( realType == "double" || realType == "all" ) && ! resolveIndexType< double >( benchmark, metadata, parameters ) ) return false; return true; @@ -225,7 +225,7 @@ main( int argc, char* argv[] ) configSetup( conf_desc ); - Communicators::ScopedInitializer< CommunicatorType > scopedInit(argc, argv); + TNL::MPI::ScopedInitializer mpi(argc, argv); const int rank = CommunicatorType::GetRank( CommunicatorType::AllGroup ); if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) diff --git a/src/TNL/Communicators/MPITypeResolver.h b/src/TNL/Communicators/MPITypeResolver.h deleted file mode 100644 index 5429d5e33c970576fac1856f3624eeef7a06a458..0000000000000000000000000000000000000000 --- a/src/TNL/Communicators/MPITypeResolver.h +++ /dev/null @@ -1,108 +0,0 @@ -/*************************************************************************** - MPITypeResolver.h - description - ------------------- - begin : Feb 4, 2019 - copyright : (C) 2019 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -namespace TNL { -namespace Communicators { - -#ifdef HAVE_MPI -template<typename Type> -struct MPITypeResolver -{ - static inline MPI_Datatype getType() - { - static_assert( sizeof(Type) == sizeof(char) || - sizeof(Type) == sizeof(int) || - sizeof(Type) == sizeof(short int) || - sizeof(Type) == sizeof(long int), - "Fatal Error - Unknown MPI Type"); - switch( sizeof( Type ) ) - { - case sizeof( char ): - return MPI_CHAR; - case sizeof( int ): - return MPI_INT; - case sizeof( short int ): - return MPI_SHORT; - case sizeof( long int ): - return MPI_LONG; - } - // this will never happen thanks to the static_assert above, but icpc is not that smart - // and complains about missing return statement at the end of non-void function - throw 0; - } -}; - -template<> struct MPITypeResolver< char > -{ - static inline MPI_Datatype getType(){return MPI_CHAR;}; -}; - -template<> struct MPITypeResolver< int > -{ - static inline MPI_Datatype getType(){return MPI_INT;}; -}; - -template<> struct MPITypeResolver< short int > -{ - static inline MPI_Datatype getType(){return MPI_SHORT;}; -}; - -template<> struct MPITypeResolver< long int > -{ - static inline MPI_Datatype getType(){return MPI_LONG;}; -}; - -template<> struct MPITypeResolver< unsigned char > -{ - static inline MPI_Datatype getType(){return MPI_UNSIGNED_CHAR;}; -}; - -template<> struct MPITypeResolver< unsigned short int > -{ - static inline MPI_Datatype getType(){return MPI_UNSIGNED_SHORT;}; -}; - -template<> struct MPITypeResolver< unsigned int > -{ - static inline MPI_Datatype getType(){return MPI_UNSIGNED;}; -}; - -template<> struct MPITypeResolver< unsigned long int > -{ - static inline MPI_Datatype getType(){return MPI_UNSIGNED_LONG;}; -}; - -template<> struct MPITypeResolver< float > -{ - static inline MPI_Datatype getType(){return MPI_FLOAT;}; -}; - -template<> struct MPITypeResolver< double > -{ - static inline MPI_Datatype getType(){return MPI_DOUBLE;}; -}; - -template<> struct MPITypeResolver< long double > -{ - static inline MPI_Datatype getType(){return MPI_LONG_DOUBLE;}; -}; - -template<> struct MPITypeResolver< bool > -{ - // sizeof(bool) is implementation-defined: https://stackoverflow.com/a/4897859 - static_assert( sizeof(bool) == 1, "The systems where sizeof(bool) != 1 are not supported by MPI." ); - static inline MPI_Datatype getType() { return MPI_C_BOOL; }; -}; -#endif - -} // namespace Communicators -} // namespace TNL diff --git a/src/TNL/Communicators/MpiCommunicator.h b/src/TNL/Communicators/MpiCommunicator.h index dedc35f03af988a4bbc2997553682a8194a19e9e..1995978c5e22dc021cbc04cf99c08a68bd80c4ec 100644 --- a/src/TNL/Communicators/MpiCommunicator.h +++ b/src/TNL/Communicators/MpiCommunicator.h @@ -11,36 +11,23 @@ #pragma once #include <iostream> -#include <fstream> -#include <cstring> #ifdef HAVE_MPI -#include <mpi.h> #ifdef OMPI_MAJOR_VERSION // header specific to OpenMPI (needed for CUDA-aware detection) #include <mpi-ext.h> #endif #include <unistd.h> // getpid - -#ifdef HAVE_CUDA - #include <TNL/Cuda/CheckDevice.h> - - typedef struct __attribute__((__packed__)) { - char name[MPI_MAX_PROCESSOR_NAME]; - } procName; -#endif - #endif #include <TNL/String.h> #include <TNL/Logger.h> -#include <TNL/Debugging/OutputRedirection.h> -#include <TNL/Communicators/MpiDefs.h> +#include <TNL/MPI/Wrappers.h> +#include <TNL/MPI/DummyDefs.h> +#include <TNL/MPI/Utils.h> #include <TNL/Config/ConfigDescription.h> -#include <TNL/Exceptions/MPISupportMissing.h> #include <TNL/Exceptions/MPIDimsCreateError.h> -#include <TNL/Communicators/MPITypeResolver.h> namespace TNL { @@ -88,7 +75,7 @@ class MpiCommunicator const bool redirect = parameters.getParameter< bool >( "redirect-mpi-output" ); const String outputDirectory = parameters.getParameter< String >( "redirect-mpi-output-dir" ); if( redirect ) - setupRedirection( outputDirectory ); + MPI::setupRedirection( outputDirectory ); #ifdef HAVE_CUDA int size; MPI_Comm_size( MPI_COMM_WORLD, &size ); @@ -144,125 +131,32 @@ class MpiCommunicator static void Init( int& argc, char**& argv, int required_thread_level = MPI_THREAD_SINGLE ) { -#ifdef HAVE_MPI - switch( required_thread_level ) { - case MPI_THREAD_SINGLE: - case MPI_THREAD_FUNNELED: - case MPI_THREAD_SERIALIZED: - case MPI_THREAD_MULTIPLE: - break; - default: - printf("ERROR: invalid argument for the 'required' thread level support: %d\n", required_thread_level); - MPI_Abort(MPI_COMM_WORLD, 1); - } - - int provided; - MPI_Init_thread( &argc, &argv, required_thread_level, &provided ); - if( provided < required_thread_level ) { - const char* level = ""; - switch( required_thread_level ) { - case MPI_THREAD_SINGLE: - level = "MPI_THREAD_SINGLE"; - break; - case MPI_THREAD_FUNNELED: - level = "MPI_THREAD_FUNNELED"; - break; - case MPI_THREAD_SERIALIZED: - level = "MPI_THREAD_SERIALIZED"; - break; - case MPI_THREAD_MULTIPLE: - level = "MPI_THREAD_MULTIPLE"; - break; - } - printf("ERROR: The MPI library does not have the required level of thread support: %s\n", level); - MPI_Abort(MPI_COMM_WORLD, 1); - } - - selectGPU(); -#endif + MPI::Init( argc, argv, required_thread_level ); // silence warnings about (potentially) unused variables (void) NullGroup; - (void) NullRequest; - } - - static void setupRedirection( std::string outputDirectory ) - { -#ifdef HAVE_MPI - if(isDistributed() ) - { - if(GetRank(AllGroup)!=0) - { - const std::string stdoutFile = outputDirectory + "/stdout_" + std::to_string(GetRank(AllGroup)) + ".txt"; - const std::string stderrFile = outputDirectory + "/stderr_" + std::to_string(GetRank(AllGroup)) + ".txt"; - std::cout << GetRank(AllGroup) << ": Redirecting stdout and stderr to files " << stdoutFile << " and " << stderrFile << std::endl; - Debugging::redirect_stdout_stderr( stdoutFile, stderrFile ); - } - } -#endif } static void Finalize() { -#ifdef HAVE_MPI - if(isDistributed()) - { - if(GetRank(AllGroup)!=0) - { - // restore redirection (not necessary, it uses RAII internally...) - Debugging::redirect_stdout_stderr( "", "", true ); - } - } - MPI_Finalize(); -#endif + MPI::Finalize(); } static bool IsInitialized() { -#ifdef HAVE_MPI - int initialized, finalized; - MPI_Initialized(&initialized); - MPI_Finalized(&finalized); - return initialized && !finalized; -#else - return true; -#endif + return MPI::isInitialized(); } static int GetRank(CommunicationGroup group = AllGroup ) { -#ifdef HAVE_MPI - TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); - TNL_ASSERT_NE(group, NullGroup, "GetRank cannot be called with NullGroup"); - int rank; - MPI_Comm_rank(group,&rank); - return rank; -#else - return 0; -#endif + return MPI::GetRank( group ); } static int GetSize(CommunicationGroup group = AllGroup ) { -#ifdef HAVE_MPI - TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); - TNL_ASSERT_NE(group, NullGroup, "GetSize cannot be called with NullGroup"); - int size; - MPI_Comm_size(group,&size); - return size; -#else - return 1; -#endif + return MPI::GetSize( group ); } -#ifdef HAVE_MPI - template< typename T > - static MPI_Datatype getDataType( const T& t ) - { - return MPITypeResolver< T >::getType(); - } -#endif - //dim-number of dimensions, distr array of guess distr - 0 for computation //distr array will be filled by computed distribution //more information in MPI documentation @@ -291,78 +185,42 @@ class MpiCommunicator static void Barrier( CommunicationGroup group = AllGroup ) { -#ifdef HAVE_MPI - TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); - TNL_ASSERT_NE(group, NullGroup, "Barrier cannot be called with NullGroup"); - MPI_Barrier(group); -#endif + MPI::Barrier( group ); } template <typename T> static void Send( const T* data, int count, int dest, int tag, CommunicationGroup group = AllGroup ) { -#ifdef HAVE_MPI - TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); - TNL_ASSERT_NE(group, NullGroup, "Send cannot be called with NullGroup"); - MPI_Send( const_cast< void* >( ( const void* ) data ), count, MPITypeResolver< T >::getType(), dest, tag, group ); -#endif + MPI::Send( data, count, dest, tag, group ); } template <typename T> static void Recv( T* data, int count, int src, int tag, CommunicationGroup group = AllGroup ) { -#ifdef HAVE_MPI - TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); - TNL_ASSERT_NE(group, NullGroup, "Recv cannot be called with NullGroup"); - MPI_Status status; - MPI_Recv( const_cast< void* >( ( const void* ) data ), count, MPITypeResolver< T >::getType() , src, tag, group, &status ); -#endif - } + MPI::Recv( data, count, src, tag, group ); + } template <typename T> static Request ISend( const T* data, int count, int dest, int tag, CommunicationGroup group = AllGroup ) { -#ifdef HAVE_MPI - TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); - TNL_ASSERT_NE(group, NullGroup, "ISend cannot be called with NullGroup"); - Request req; - MPI_Isend( const_cast< void* >( ( const void* ) data ), count, MPITypeResolver< T >::getType(), dest, tag, group, &req); - return req; -#else - return 1; -#endif + return MPI::Isend( data, count, dest, tag, group ); } template <typename T> static Request IRecv( T* data, int count, int src, int tag, CommunicationGroup group = AllGroup ) { -#ifdef HAVE_MPI - TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); - TNL_ASSERT_NE(group, NullGroup, "IRecv cannot be called with NullGroup"); - Request req; - MPI_Irecv( const_cast< void* >( ( const void* ) data ), count, MPITypeResolver< T >::getType() , src, tag, group, &req); - return req; -#else - return 1; -#endif + return MPI::Irecv( data, count, src, tag, group ); } static void WaitAll(Request *reqs, int length) { -#ifdef HAVE_MPI - TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); - MPI_Waitall(length, reqs, MPI_STATUSES_IGNORE); -#endif + MPI::Waitall( reqs, length ); } template< typename T > static void Bcast( T* data, int count, int root, CommunicationGroup group) { -#ifdef HAVE_MPI - TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); - TNL_ASSERT_NE(group, NullGroup, "BCast cannot be called with NullGroup"); - MPI_Bcast((void*) data, count, MPITypeResolver< T >::getType(), root, group); -#endif + MPI::Bcast( data, count, root, group ); } template< typename T > @@ -372,12 +230,7 @@ class MpiCommunicator const MPI_Op &op, CommunicationGroup group) { -#ifdef HAVE_MPI - TNL_ASSERT_NE(group, NullGroup, "Allreduce cannot be called with NullGroup"); - MPI_Allreduce( const_cast< void* >( ( void* ) data ), (void*) reduced_data,count,MPITypeResolver< T >::getType(),op,group); -#else - memcpy( ( void* ) reduced_data, ( const void* ) data, count * sizeof( T ) ); -#endif + MPI::Allreduce( data, reduced_data, count, op, group ); } // in-place variant of Allreduce @@ -387,27 +240,18 @@ class MpiCommunicator const MPI_Op &op, CommunicationGroup group) { -#ifdef HAVE_MPI - TNL_ASSERT_NE(group, NullGroup, "Allreduce cannot be called with NullGroup"); - MPI_Allreduce( MPI_IN_PLACE, (void*) data,count,MPITypeResolver< T >::getType(),op,group); -#endif + MPI::Allreduce( data, count, op, group ); } - template< typename T > static void Reduce( const T* data, T* reduced_data, int count, - MPI_Op &op, + const MPI_Op &op, int root, CommunicationGroup group) { -#ifdef HAVE_MPI - TNL_ASSERT_NE(group, NullGroup, "Reduce cannot be called with NullGroup"); - MPI_Reduce( const_cast< void* >( ( void*) data ), (void*) reduced_data,count,MPITypeResolver< T >::getType(),op,root,group); -#else - memcpy( ( void* ) reduced_data, ( void* ) data, count * sizeof( T ) ); -#endif + MPI::Reduce( data, reduced_data, count, op, root, group ); } template< typename T > @@ -421,24 +265,7 @@ class MpiCommunicator int receiveTag, CommunicationGroup group ) { -#ifdef HAVE_MPI - TNL_ASSERT_NE(group, NullGroup, "SendReceive cannot be called with NullGroup"); - MPI_Status status; - MPI_Sendrecv( const_cast< void* >( ( void* ) sendData ), - sendCount, - MPITypeResolver< T >::getType(), - destination, - sendTag, - ( void* ) receiveData, - receiveCount, - MPITypeResolver< T >::getType(), - source, - receiveTag, - group, - &status ); -#else - throw Exceptions::MPISupportMissing(); -#endif + MPI::Sendrecv( sendData, sendCount, destination, sendTag, receiveData, receiveCount, source, receiveTag, group ); } template< typename T > @@ -448,19 +275,7 @@ class MpiCommunicator int receiveCount, CommunicationGroup group ) { -#ifdef HAVE_MPI - TNL_ASSERT_NE(group, NullGroup, "SendReceive cannot be called with NullGroup"); - MPI_Alltoall( const_cast< void* >( ( void* ) sendData ), - sendCount, - MPITypeResolver< T >::getType(), - ( void* ) receiveData, - receiveCount, - MPITypeResolver< T >::getType(), - group ); -#else - TNL_ASSERT_EQ( sendCount, receiveCount, "sendCount must be equal to receiveCount when running without MPI." ); - memcpy( (void*) receiveData, (const void*) sendData, sendCount * sizeof( T ) ); -#endif + MPI::Alltoall( sendData, sendCount, receiveData, receiveCount, group ); } @@ -485,58 +300,16 @@ class MpiCommunicator } #ifdef HAVE_MPI - static MPI_Request NullRequest; static MPI_Comm AllGroup; static MPI_Comm NullGroup; #else - static constexpr int NullRequest = -1; static constexpr int AllGroup = 1; static constexpr int NullGroup = 0; #endif private: - - static void selectGPU(void) - { -#ifdef HAVE_MPI - #ifdef HAVE_CUDA - const int count = GetSize(AllGroup); - const int rank = GetRank(AllGroup); - int gpuCount; - cudaGetDeviceCount(&gpuCount); - - procName names[count]; - - int i=0; - int len; - MPI_Get_processor_name(names[rank].name, &len); - - for(i=0;i<count;i++) - std::memcpy(names[i].name,names[rank].name,len+1); - - MPI_Alltoall( (void*)names ,MPI_MAX_PROCESSOR_NAME,MPI_CHAR, - (void*)names,MPI_MAX_PROCESSOR_NAME,MPI_CHAR, - MPI_COMM_WORLD); - - int nodeRank=0; - for(i=0;i<rank;i++) - { - if(std::strcmp(names[rank].name,names[i].name)==0) - nodeRank++; - } - - const int gpuNumber = nodeRank % gpuCount; - - cudaSetDevice(gpuNumber); - TNL_CHECK_CUDA_DEVICE; - - //std::cout<<"Node: " << rank << " gpu: " << gpuNumber << std::endl; - #endif -#endif - } }; #ifdef HAVE_MPI -MPI_Request MpiCommunicator::NullRequest = MPI_REQUEST_NULL; MPI_Comm MpiCommunicator::AllGroup = MPI_COMM_WORLD; MPI_Comm MpiCommunicator::NullGroup = MPI_COMM_NULL; #endif diff --git a/src/TNL/Containers/DistributedArray.hpp b/src/TNL/Containers/DistributedArray.hpp index cd0eb49d5ed4a5a688dd53dfc4caf7444aebdcc2..61dc3eda08341bde0d2a59da07e0f8e8e1954824 100644 --- a/src/TNL/Containers/DistributedArray.hpp +++ b/src/TNL/Containers/DistributedArray.hpp @@ -15,7 +15,6 @@ #include "DistributedArray.h" #include <TNL/Algorithms/ParallelFor.h> -#include <TNL/Communicators/MpiDefs.h> // important only when MPI is disabled namespace TNL { namespace Containers { diff --git a/src/TNL/Containers/Expressions/DistributedComparison.h b/src/TNL/Containers/Expressions/DistributedComparison.h index 1cef0873dd19d507f6c1bde8d03deac33d22de1d..2695ccccc3eab0d2a907c0cc4b6989ee36c2c6ba 100644 --- a/src/TNL/Containers/Expressions/DistributedComparison.h +++ b/src/TNL/Containers/Expressions/DistributedComparison.h @@ -11,7 +11,7 @@ #pragma once #include <TNL/Containers/Expressions/ExpressionVariableType.h> -#include <TNL/Communicators/MpiDefs.h> +#include <TNL/MPI/DummyDefs.h> namespace TNL { namespace Containers { diff --git a/src/TNL/Containers/Expressions/DistributedVerticalOperations.h b/src/TNL/Containers/Expressions/DistributedVerticalOperations.h index b525e8a5398001998955f2cdcfa40fdcb0891b05..f55ae3d4ae34c021969117eaa7d1a7f9d485e88e 100644 --- a/src/TNL/Containers/Expressions/DistributedVerticalOperations.h +++ b/src/TNL/Containers/Expressions/DistributedVerticalOperations.h @@ -11,7 +11,7 @@ #pragma once #include <TNL/Containers/Expressions/VerticalOperations.h> -#include <TNL/Communicators/MpiDefs.h> +#include <TNL/MPI/DummyDefs.h> namespace TNL { namespace Containers { diff --git a/src/TNL/MPI.h b/src/TNL/MPI.h new file mode 100644 index 0000000000000000000000000000000000000000..b1b7dd69884de7be3afed9e6893346116c0d88b6 --- /dev/null +++ b/src/TNL/MPI.h @@ -0,0 +1,29 @@ +/*************************************************************************** + MPI.h - description + ------------------- + begin : Dec 29, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +/** + * \brief A convenient header file which includes all headers from the + * `TNL/MPI/` subdirectory. + * + * Users may use this to avoid having to include many header files in their + * projects. On the other hand, parts of the TNL library should generally + * include only the specific headers they need, in order to avoid cycles in + * the header inclusion. + */ + +#include "MPI/DummyDefs.h" +#include "MPI/getDataType.h" +#include "MPI/selectGPU.h" +#include "MPI/Wrappers.h" +#include "MPI/Utils.h" +#include "MPI/ScopedInitializer.h" +#include "MPI/Print.h" diff --git a/src/TNL/Communicators/MpiDefs.h b/src/TNL/MPI/DummyDefs.h similarity index 64% rename from src/TNL/Communicators/MpiDefs.h rename to src/TNL/MPI/DummyDefs.h index df43005ec262e4ead2628f4e93e8f12667367ef1..cdd5ea48383738ffd085326a35deb24380816725 100644 --- a/src/TNL/Communicators/MpiDefs.h +++ b/src/TNL/MPI/DummyDefs.h @@ -1,8 +1,8 @@ /*************************************************************************** - MpiCommunicator.h - description + MPI/DummyDefs.h - description ------------------- - begin : 2005/04/23 - copyright : (C) 2005 by Tomas Oberhuber + begin : Dec 29, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ @@ -11,6 +11,9 @@ #pragma once #ifndef HAVE_MPI +using MPI_Request = int; +using MPI_Comm = int; + enum MPI_Op { MPI_MAX, MPI_MIN, @@ -28,9 +31,9 @@ enum MPI_Op { // MPI_Init_thread constants enum { - MPI_THREAD_SINGLE, - MPI_THREAD_FUNNELED, - MPI_THREAD_SERIALIZED, - MPI_THREAD_MULTIPLE + MPI_THREAD_SINGLE, + MPI_THREAD_FUNNELED, + MPI_THREAD_SERIALIZED, + MPI_THREAD_MULTIPLE }; #endif diff --git a/src/TNL/Communicators/MPIPrint.h b/src/TNL/MPI/Print.h similarity index 75% rename from src/TNL/Communicators/MPIPrint.h rename to src/TNL/MPI/Print.h index 6d78eafaf8c67c1c770faf01fa879d4b31b4032a..5cd4819a2951cf46093eecb6ab5052a7b278e155 100644 --- a/src/TNL/Communicators/MPIPrint.h +++ b/src/TNL/MPI/Print.h @@ -1,8 +1,8 @@ /*************************************************************************** - MPIPrint.h - description + MPI/Print.h - description ------------------- begin : Feb 7, 2019 - copyright : (C) 2019 by Tomas Oberhuber + copyright : (C) 2019 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ @@ -10,34 +10,35 @@ #pragma once +#include <iostream> #include <sstream> -#include <TNL/Communicators/MpiCommunicator.h> + +#include <TNL/String.h> +#include <TNL/MPI/Wrappers.h> #ifdef HAVE_MPI #define TNL_MPI_PRINT( message ) \ -if( ! TNL::Communicators::MpiCommunicator::IsInitialized() ) \ +if( ! TNL::MPI::Initialized() || TNL::MPI::Finalized() ) \ std::cerr << message << std::endl; \ else \ { \ - if( TNL::Communicators::MpiCommunicator::GetRank() > 0 ) \ + if( TNL::MPI::GetRank() > 0 ) \ { \ std::stringstream __tnl_mpi_print_stream_; \ - __tnl_mpi_print_stream_ << "Node " << TNL::Communicators::MpiCommunicator::GetRank() << " of " \ - << TNL::Communicators::MpiCommunicator::GetSize() << " : " << message << std::endl; \ + __tnl_mpi_print_stream_ << "Node " << TNL::MPI::GetRank() << " of " << TNL::MPI::GetSize() << " : " \ + << message << std::endl; \ TNL::String __tnl_mpi_print_string_( __tnl_mpi_print_stream_.str() ); \ mpiSend( __tnl_mpi_print_string_, 0, std::numeric_limits< int >::max() ); \ } \ else \ { \ - std::cerr << "Node 0 of " << TNL::Communicators::MpiCommunicator::GetSize() << " : " << message << std::endl; \ - for( int __tnl_mpi_print_j = 1; \ - __tnl_mpi_print_j < TNL::Communicators::MpiCommunicator::GetSize(); \ - __tnl_mpi_print_j++ ) \ - { \ - TNL::String __tnl_mpi_print_string_; \ - mpiReceive( __tnl_mpi_print_string_, __tnl_mpi_print_j, std::numeric_limits< int >::max() ); \ - std::cerr << __tnl_mpi_print_string_; \ - } \ + std::cerr << "Node 0 of " << TNL::MPI::GetSize() << " : " << message << std::endl; \ + for( int __tnl_mpi_print_j = 1; __tnl_mpi_print_j < TNL::MPI::GetSize(); __tnl_mpi_print_j++ ) \ + { \ + TNL::String __tnl_mpi_print_string_; \ + mpiReceive( __tnl_mpi_print_string_, __tnl_mpi_print_j, std::numeric_limits< int >::max() ); \ + std::cerr << __tnl_mpi_print_string_; \ + } \ } \ } #else @@ -47,11 +48,11 @@ else #ifdef HAVE_MPI #define TNL_MPI_PRINT_MASTER( message ) \ -if( ! TNL::Communicators::MpiCommunicator::IsInitialized() ) \ +if( ! TNL::MPI::Initialized() || TNL::MPI::Finalized() ) \ std::cerr << message << std::endl; \ else \ { \ - if( TNL::Communicators::MpiCommunicator::GetRank() == 0 ) \ + if( TNL::MPI::GetRank() == 0 ) \ { \ std::cerr << "Master node : " << message << std::endl; \ } \ @@ -63,20 +64,20 @@ else #ifdef HAVE_MPI #define TNL_MPI_PRINT_COND( condition, message ) \ -if( ! TNL::Communicators::MpiCommunicator::IsInitialized() ) \ +if( ! TNL::MPI::Initialized() || TNL::MPI::Finalized() ) \ { \ if( condition) std::cerr << message << std::endl; \ } \ else \ { \ - if( TNL::Communicators::MpiCommunicator::GetRank() > 0 ) \ + if( TNL::MPI::GetRank() > 0 ) \ { \ int __tnl_mpi_print_cnd = ( condition ); \ - TNL::Communicators::MpiCommunicator::Send( &__tnl_mpi_print_cnd, 1, 0, 0 ); \ + TNL::MPI::Send( &__tnl_mpi_print_cnd, 1, 0, 0 ); \ if( condition ) { \ std::stringstream __tnl_mpi_print_stream_; \ - __tnl_mpi_print_stream_ << "Node " << TNL::Communicators::MpiCommunicator::GetRank() << " of " \ - << TNL::Communicators::MpiCommunicator::GetSize() << " : " << message << std::endl; \ + __tnl_mpi_print_stream_ << "Node " << TNL::MPI::GetRank() << " of " << TNL::MPI::GetSize() << " : " \ + << message << std::endl; \ TNL::String __tnl_mpi_print_string_( __tnl_mpi_print_stream_.str() ); \ mpiSend( __tnl_mpi_print_string_, 0, std::numeric_limits< int >::max() ); \ } \ @@ -84,13 +85,11 @@ else else \ { \ if( condition ) \ - std::cerr << "Node 0 of " << TNL::Communicators::MpiCommunicator::GetSize() << " : " << message << std::endl; \ - for( int __tnl_mpi_print_j = 1; \ - __tnl_mpi_print_j < TNL::Communicators::MpiCommunicator::GetSize(); \ - __tnl_mpi_print_j++ ) \ + std::cerr << "Node 0 of " << TNL::MPI::GetSize() << " : " << message << std::endl; \ + for( int __tnl_mpi_print_j = 1; __tnl_mpi_print_j < TNL::MPI::GetSize(); __tnl_mpi_print_j++ ) \ { \ int __tnl_mpi_print_cond; \ - TNL::Communicators::MpiCommunicator::Recv( &__tnl_mpi_print_cond, 1, __tnl_mpi_print_j, 0 ); \ + TNL::MPI::Recv( &__tnl_mpi_print_cond, 1, __tnl_mpi_print_j, 0 ); \ if( __tnl_mpi_print_cond ) \ { \ TNL::String __tnl_mpi_print_string_; \ diff --git a/src/TNL/Communicators/ScopedInitializer.h b/src/TNL/MPI/ScopedInitializer.h similarity index 72% rename from src/TNL/Communicators/ScopedInitializer.h rename to src/TNL/MPI/ScopedInitializer.h index 2970bc628319bdf9d4c40d7a2cb32694a8148f7d..82ba02bc5743611bfb4af7395142de730672d548 100644 --- a/src/TNL/Communicators/ScopedInitializer.h +++ b/src/TNL/MPI/ScopedInitializer.h @@ -12,22 +12,25 @@ #pragma once +#include "Wrappers.h" +#include "Utils.h" + namespace TNL { -namespace Communicators { +namespace MPI { -template< typename Communicator > struct ScopedInitializer { - ScopedInitializer( int& argc, char**& argv ) + ScopedInitializer( int& argc, char**& argv, int required_thread_level = MPI_THREAD_SINGLE ) { - Communicator::Init( argc, argv ); + Init( argc, argv ); } ~ScopedInitializer() { - Communicator::Finalize(); + restoreRedirection(); + Finalize(); } }; -} // namespace Communicators +} // namespace MPI } // namespace TNL diff --git a/src/TNL/MPI/Utils.h b/src/TNL/MPI/Utils.h new file mode 100644 index 0000000000000000000000000000000000000000..b655aefd0087ca4cc030a57c5f3291570075ce65 --- /dev/null +++ b/src/TNL/MPI/Utils.h @@ -0,0 +1,46 @@ +/*************************************************************************** + MPI/Wrappers.h - description + ------------------- + begin : Apr 23, 2005 + copyright : (C) 2005 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Debugging/OutputRedirection.h> + +#include "Wrappers.h" + +namespace TNL { +namespace MPI { + +inline bool isInitialized() +{ + return Initialized() && ! Finalized(); +} + +inline void setupRedirection( std::string outputDirectory ) +{ +#ifdef HAVE_MPI + if( GetSize() > 1 && GetRank() != 0 ) { + const std::string stdoutFile = outputDirectory + "/stdout_" + std::to_string(GetRank()) + ".txt"; + const std::string stderrFile = outputDirectory + "/stderr_" + std::to_string(GetRank()) + ".txt"; + std::cout << GetRank() << ": Redirecting stdout and stderr to files " << stdoutFile << " and " << stderrFile << std::endl; + Debugging::redirect_stdout_stderr( stdoutFile, stderrFile ); + } +#endif +} + +// restore redirection (usually not necessary, it uses RAII internally...) +inline void restoreRedirection() +{ + if( GetSize() > 1 && GetRank() != 0 ) { + Debugging::redirect_stdout_stderr( "", "", true ); + } +} + +} // namespace MPI +} // namespace TNL diff --git a/src/TNL/MPI/Wrappers.h b/src/TNL/MPI/Wrappers.h new file mode 100644 index 0000000000000000000000000000000000000000..9a057da5f56d2114392f63f5b0ef5ad96c7e2c5d --- /dev/null +++ b/src/TNL/MPI/Wrappers.h @@ -0,0 +1,347 @@ +/*************************************************************************** + MPI/Wrappers.h - description + ------------------- + begin : Apr 23, 2005 + copyright : (C) 2005 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <iostream> + +#ifdef HAVE_MPI + #include <mpi.h> +#else + #include "DummyDefs.h" + #include <cstring> // std::memcpy + #include <TNL/Exceptions/MPISupportMissing.h> +#endif + +#include <TNL/Assert.h> +#include "getDataType.h" +#include "selectGPU.h" + +namespace TNL { +namespace MPI { + +// function wrappers for MPI constants + +inline MPI_Comm AllGroup() +{ +#ifdef HAVE_MPI + return MPI_COMM_WORLD; +#else + return 1; +#endif +} + +inline MPI_Comm NullGroup() +{ +#ifdef HAVE_MPI + return MPI_COMM_NULL; +#else + return 0; +#endif +} + +inline MPI_Request NullRequest() +{ +#ifdef HAVE_MPI + return MPI_REQUEST_NULL; +#else + return 0; +#endif +} + +// wrappers for basic MPI functions + +inline void Init( int& argc, char**& argv, int required_thread_level = MPI_THREAD_SINGLE ) +{ +#ifdef HAVE_MPI + switch( required_thread_level ) { + case MPI_THREAD_SINGLE: + case MPI_THREAD_FUNNELED: + case MPI_THREAD_SERIALIZED: + case MPI_THREAD_MULTIPLE: + break; + default: + std::cerr << "ERROR: invalid argument for the 'required' thread level support: " << required_thread_level << std::endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + int provided; + MPI_Init_thread( &argc, &argv, required_thread_level, &provided ); + if( provided < required_thread_level ) { + const char* level = ""; + switch( required_thread_level ) { + case MPI_THREAD_SINGLE: + level = "MPI_THREAD_SINGLE"; + break; + case MPI_THREAD_FUNNELED: + level = "MPI_THREAD_FUNNELED"; + break; + case MPI_THREAD_SERIALIZED: + level = "MPI_THREAD_SERIALIZED"; + break; + case MPI_THREAD_MULTIPLE: + level = "MPI_THREAD_MULTIPLE"; + break; + } + std::cerr << "ERROR: The MPI library does not have the required level of thread support: " << level << std::endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + selectGPU(); +#endif +} + +inline void Finalize() +{ +#ifdef HAVE_MPI + MPI_Finalize(); +#endif +} + +inline bool Initialized() +{ +#ifdef HAVE_MPI + int flag; + MPI_Initialized(&flag); + return flag; +#else + return true; +#endif +} + +inline bool Finalized() +{ +#ifdef HAVE_MPI + int flag; + MPI_Finalized(&flag); + return flag; +#else + return false; +#endif +} + +inline int GetRank( MPI_Comm group = AllGroup() ) +{ +#ifdef HAVE_MPI + TNL_ASSERT_TRUE( Initialized() && ! Finalized(), "Fatal Error - MPI is not initialized" ); + TNL_ASSERT_NE( group, NullGroup(), "GetRank cannot be called with NullGroup" ); + int rank; + MPI_Comm_rank( group, &rank ); + return rank; +#else + return 0; +#endif +} + +inline int GetSize( MPI_Comm group = AllGroup() ) +{ +#ifdef HAVE_MPI + TNL_ASSERT_TRUE( Initialized() && ! Finalized(), "Fatal Error - MPI is not initialized" ); + TNL_ASSERT_NE( group, NullGroup(), "GetSize cannot be called with NullGroup" ); + int size; + MPI_Comm_size( group, &size ); + return size; +#else + return 1; +#endif +} + +// wrappers for MPI communication functions + +inline void Barrier( MPI_Comm group = AllGroup() ) +{ +#ifdef HAVE_MPI + TNL_ASSERT_TRUE( Initialized() && ! Finalized(), "Fatal Error - MPI is not initialized" ); + TNL_ASSERT_NE( group, NullGroup(), "Barrier cannot be called with NullGroup" ); + MPI_Barrier(group); +#endif +} + +inline void Waitall( MPI_Request* reqs, int length ) +{ +#ifdef HAVE_MPI + TNL_ASSERT_TRUE( Initialized() && ! Finalized(), "Fatal Error - MPI is not initialized" ); + MPI_Waitall( length, reqs, MPI_STATUSES_IGNORE ); +#endif +} + +template< typename T > +void Send( const T* data, + int count, + int dest, + int tag, + MPI_Comm group = AllGroup() ) +{ +#ifdef HAVE_MPI + TNL_ASSERT_TRUE( Initialized() && ! Finalized(), "Fatal Error - MPI is not initialized" ); + TNL_ASSERT_NE( group, NullGroup(), "Send cannot be called with NullGroup" ); + MPI_Send( (const void*) data, count, getDataType<T>(), dest, tag, group ); +#endif +} + +template< typename T > +void Recv( T* data, + int count, + int src, + int tag, + MPI_Comm group = AllGroup() ) +{ +#ifdef HAVE_MPI + TNL_ASSERT_TRUE( Initialized() && ! Finalized(), "Fatal Error - MPI is not initialized" ); + TNL_ASSERT_NE( group, NullGroup(), "Recv cannot be called with NullGroup" ); + MPI_Recv( (void*) data, count, getDataType<T>(), src, tag, group, MPI_STATUS_IGNORE ); +#endif +} + +template< typename T > +void Sendrecv( const T* sendData, + int sendCount, + int destination, + int sendTag, + T* receiveData, + int receiveCount, + int source, + int receiveTag, + MPI_Comm group = AllGroup() ) +{ +#ifdef HAVE_MPI + TNL_ASSERT_TRUE( Initialized() && ! Finalized(), "Fatal Error - MPI is not initialized" ); + TNL_ASSERT_NE( group, NullGroup(), "Sendrecv cannot be called with NullGroup" ); + MPI_Sendrecv( (void*) sendData, + sendCount, + getDataType<T>(), + destination, + sendTag, + (void*) receiveData, + receiveCount, + getDataType<T>(), + source, + receiveTag, + group, + MPI_STATUS_IGNORE ); +#else + throw Exceptions::MPISupportMissing(); +#endif +} + +template< typename T > +MPI_Request Isend( const T* data, + int count, + int dest, + int tag, + MPI_Comm group = AllGroup() ) +{ +#ifdef HAVE_MPI + TNL_ASSERT_TRUE( Initialized() && ! Finalized(), "Fatal Error - MPI is not initialized" ); + TNL_ASSERT_NE( group, NullGroup(), "Isend cannot be called with NullGroup" ); + MPI_Request req; + MPI_Isend( (const void*) data, count, getDataType<T>(), dest, tag, group, &req ); + return req; +#else + return NullRequest(); +#endif +} + +template< typename T > +MPI_Request Irecv( T* data, + int count, + int src, + int tag, + MPI_Comm group = AllGroup() ) +{ +#ifdef HAVE_MPI + TNL_ASSERT_TRUE( Initialized() && ! Finalized(), "Fatal Error - MPI is not initialized" ); + TNL_ASSERT_NE( group, NullGroup(), "Irecv cannot be called with NullGroup" ); + MPI_Request req; + MPI_Irecv( (void*) data, count, getDataType<T>(), src, tag, group, &req ); + return req; +#else + return NullRequest(); +#endif +} + +template< typename T > +void Allreduce( const T* data, + T* reduced_data, + int count, + const MPI_Op& op, + MPI_Comm group) +{ +#ifdef HAVE_MPI + TNL_ASSERT_NE( group, NullGroup(), "Allreduce cannot be called with NullGroup" ); + MPI_Allreduce( (const void*) data, (void*) reduced_data, count, getDataType<T>(), op, group ); +#else + std::memcpy( (void*) reduced_data, (const void*) data, count * sizeof(T) ); +#endif +} + +// in-place variant of Allreduce +template< typename T > +void Allreduce( T* data, + int count, + const MPI_Op& op, + MPI_Comm group) +{ +#ifdef HAVE_MPI + TNL_ASSERT_NE( group, NullGroup(), "Allreduce cannot be called with NullGroup" ); + MPI_Allreduce( MPI_IN_PLACE, (void*) data, count, getDataType<T>(), op, group ); +#endif +} + +template< typename T > +void Reduce( const T* data, + T* reduced_data, + int count, + const MPI_Op& op, + int root, + MPI_Comm group) +{ +#ifdef HAVE_MPI + TNL_ASSERT_NE( group, NullGroup(), "Reduce cannot be called with NullGroup" ); + MPI_Reduce( (const void*) data, (void*) reduced_data, count, getDataType<T>(), op, root, group ); +#else + std::memcpy( (void*) reduced_data, (void*) data, count * sizeof(T) ); +#endif +} + +template< typename T > +void Bcast( T* data, int count, int root, MPI_Comm group) +{ +#ifdef HAVE_MPI + TNL_ASSERT_TRUE( Initialized() && ! Finalized(), "Fatal Error - MPI is not initialized" ); + TNL_ASSERT_NE( group, NullGroup(), "Bcast cannot be called with NullGroup" ); + MPI_Bcast( (void*) data, count, getDataType<T>(), root, group ); +#endif +} + +template< typename T > +void Alltoall( const T* sendData, + int sendCount, + T* receiveData, + int receiveCount, + MPI_Comm group ) +{ +#ifdef HAVE_MPI + TNL_ASSERT_NE( group, NullGroup(), "Alltoall cannot be called with NullGroup" ); + MPI_Alltoall( (const void*) sendData, + sendCount, + getDataType<T>(), + (void*) receiveData, + receiveCount, + getDataType<T>(), + group ); +#else + TNL_ASSERT_EQ( sendCount, receiveCount, "sendCount must be equal to receiveCount when running without MPI." ); + std::memcpy( (void*) receiveData, (const void*) sendData, sendCount * sizeof(T) ); +#endif +} + +} // namespace MPI +} // namespace TNL diff --git a/src/TNL/MPI/getDataType.h b/src/TNL/MPI/getDataType.h new file mode 100644 index 0000000000000000000000000000000000000000..f3570679bf2708cca08de3e485890588396a051e --- /dev/null +++ b/src/TNL/MPI/getDataType.h @@ -0,0 +1,119 @@ +/*************************************************************************** + getDataType.h - description + ------------------- + begin : Feb 4, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#ifdef HAVE_MPI + #include <mpi.h> +#endif + +namespace TNL { +namespace MPI { + +#ifdef HAVE_MPI +template< typename T > +struct TypeResolver +{ + static inline MPI_Datatype getType() + { + static_assert( sizeof(T) == sizeof(char) || + sizeof(T) == sizeof(int) || + sizeof(T) == sizeof(short int) || + sizeof(T) == sizeof(long int), + "Fatal Error - Unknown MPI Type"); + switch( sizeof(T) ) + { + case sizeof(char): + return MPI_CHAR; + case sizeof(int): + return MPI_INT; + case sizeof(short int): + return MPI_SHORT; + case sizeof(long int): + return MPI_LONG; + } + // This will never happen thanks to the static_assert above, but icpc is + // not that smart and complains about missing return statement at the end + // of non-void function. + throw 0; + } +}; + +template<> struct TypeResolver< char > +{ + static inline MPI_Datatype getType(){return MPI_CHAR;}; +}; + +template<> struct TypeResolver< int > +{ + static inline MPI_Datatype getType(){return MPI_INT;}; +}; + +template<> struct TypeResolver< short int > +{ + static inline MPI_Datatype getType(){return MPI_SHORT;}; +}; + +template<> struct TypeResolver< long int > +{ + static inline MPI_Datatype getType(){return MPI_LONG;}; +}; + +template<> struct TypeResolver< unsigned char > +{ + static inline MPI_Datatype getType(){return MPI_UNSIGNED_CHAR;}; +}; + +template<> struct TypeResolver< unsigned short int > +{ + static inline MPI_Datatype getType(){return MPI_UNSIGNED_SHORT;}; +}; + +template<> struct TypeResolver< unsigned int > +{ + static inline MPI_Datatype getType(){return MPI_UNSIGNED;}; +}; + +template<> struct TypeResolver< unsigned long int > +{ + static inline MPI_Datatype getType(){return MPI_UNSIGNED_LONG;}; +}; + +template<> struct TypeResolver< float > +{ + static inline MPI_Datatype getType(){return MPI_FLOAT;}; +}; + +template<> struct TypeResolver< double > +{ + static inline MPI_Datatype getType(){return MPI_DOUBLE;}; +}; + +template<> struct TypeResolver< long double > +{ + static inline MPI_Datatype getType(){return MPI_LONG_DOUBLE;}; +}; + +template<> struct TypeResolver< bool > +{ + // sizeof(bool) is implementation-defined: https://stackoverflow.com/a/4897859 + static_assert( sizeof(bool) == 1, "The systems where sizeof(bool) != 1 are not supported by MPI." ); + static inline MPI_Datatype getType() { return MPI_C_BOOL; }; +}; + +template< typename T > +MPI_Datatype getDataType( const T& = T{} ) +{ + return TypeResolver< T >::getType(); +} +#endif + +} // namespace MPI +} // namespace TNL diff --git a/src/TNL/MPI/selectGPU.h b/src/TNL/MPI/selectGPU.h new file mode 100644 index 0000000000000000000000000000000000000000..def9a329f91029061b82d32067ee0d44c1876c94 --- /dev/null +++ b/src/TNL/MPI/selectGPU.h @@ -0,0 +1,72 @@ +/*************************************************************************** + MPI/Wrappers.h - description + ------------------- + begin : Apr 23, 2005 + copyright : (C) 2005 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <cstring> + +#include <TNL/Cuda/CheckDevice.h> + +namespace TNL { +namespace MPI { +namespace { + +#ifdef HAVE_MPI +#ifdef HAVE_CUDA + typedef struct __attribute__((__packed__)) { + char name[MPI_MAX_PROCESSOR_NAME]; + } procName; +#endif +#endif + +inline void selectGPU() +{ +#ifdef HAVE_MPI +#ifdef HAVE_CUDA + int size; + MPI_Comm_size( MPI_COMM_WORLD, &size ); + int rank; + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); + int gpuCount; + cudaGetDeviceCount( &gpuCount ); + + procName names[size]; + + int i=0; + int len; + MPI_Get_processor_name(names[rank].name, &len); + + for(i=0;i<size;i++) + std::memcpy(names[i].name,names[rank].name,len+1); + + MPI_Alltoall( (void*)names ,MPI_MAX_PROCESSOR_NAME,MPI_CHAR, + (void*)names,MPI_MAX_PROCESSOR_NAME,MPI_CHAR, + MPI_COMM_WORLD); + + int nodeRank=0; + for(i=0;i<rank;i++) + { + if(std::strcmp(names[rank].name,names[i].name)==0) + nodeRank++; + } + + const int gpuNumber = nodeRank % gpuCount; + + cudaSetDevice(gpuNumber); + TNL_CHECK_CUDA_DEVICE; + + //std::cout<<"Node: " << rank << " gpu: " << gpuNumber << std::endl; +#endif +#endif +} + +} // namespace <unnamed> +} // namespace MPI +} // namespace TNL diff --git a/src/TNL/Meshes/DistributedMeshes/BufferEntitiesHelper.h b/src/TNL/Meshes/DistributedMeshes/BufferEntitiesHelper.h index 6030b976f038ab290ada814575db1bfb444ce694..04647cb4af883d07bf00e6ef177a8205c5be559c 100644 --- a/src/TNL/Meshes/DistributedMeshes/BufferEntitiesHelper.h +++ b/src/TNL/Meshes/DistributedMeshes/BufferEntitiesHelper.h @@ -12,7 +12,6 @@ #include <TNL/Algorithms/ParallelFor.h> #include <TNL/Containers/StaticVector.h> -#include <TNL/Communicators/MPIPrint.h> namespace TNL { namespace Meshes { diff --git a/src/TNL/Meshes/DistributedMeshes/DistributedGridIO_MeshFunction.h b/src/TNL/Meshes/DistributedMeshes/DistributedGridIO_MeshFunction.h index 60605c6eb07514359e799026b73416eb404d9d95..99f505bba77467b5d4c669cf09955a6752014898 100644 --- a/src/TNL/Meshes/DistributedMeshes/DistributedGridIO_MeshFunction.h +++ b/src/TNL/Meshes/DistributedMeshes/DistributedGridIO_MeshFunction.h @@ -12,6 +12,7 @@ #include <TNL/Functions/MeshFunction.h> #include <TNL/Functions/MeshFunctionView.h> +#include <TNL/MPI/getDataType.h> namespace TNL { namespace Meshes { @@ -19,7 +20,7 @@ namespace DistributedMeshes { /* - * This variant cerate copy of MeshFunction but smaller, reduced to local entities, without overlap. + * This variant cerate copy of MeshFunction but smaller, reduced to local entities, without overlap. * It is slow and has high RAM consumption */ template< typename MeshFunction, @@ -88,8 +89,8 @@ class DistributedGridIO< return true; }; - - static bool load(const String& fileName,MeshFunctionType &meshFunction) + + static bool load(const String& fileName,MeshFunctionType &meshFunction) { auto *distrGrid=meshFunction.getMesh().getDistributedMesh(); if(distrGrid==NULL) //not distributed @@ -99,10 +100,10 @@ class DistributedGridIO< } const MeshType& mesh=meshFunction.getMesh(); - + PointType spaceSteps=mesh.getSpaceSteps(); PointType origin=mesh.getOrigin(); - + CoordinatesType localSize=distrGrid->getLocalSize(); CoordinatesType localBegin=distrGrid->getLocalBegin(); @@ -111,33 +112,33 @@ class DistributedGridIO< newMesh->setSpaceSteps(spaceSteps); CoordinatesType newOrigin; newMesh->setOrigin(origin+spaceSteps*localBegin); - + VectorType newDof(newMesh-> template getEntitiesCount< typename MeshType::Cell >()); MeshFunctionType newMeshFunction; - newMeshFunction.bind(newMesh,newDof); + newMeshFunction.bind(newMesh,newDof); CoordinatesType zeroCoord; - zeroCoord.setValue(0); + zeroCoord.setValue(0); File file; file.open( fileName+String("-")+distrGrid->printProcessCoords()+String(".tnl"), std::ios_base::in ); newMeshFunction.boundLoad(file); file.close(); CopyEntitiesHelper<MeshFunctionType>::Copy(newMeshFunction,meshFunction,zeroCoord,localBegin,localSize); - + return true; }; - + }; /* - * Save distributed data into single file without overlaps using MPIIO and MPI datatypes, + * Save distributed data into single file without overlaps using MPIIO and MPI datatypes, * EXPLOSIVE: works with only Grids and MPI * BAD IMPLEMENTTION creating MPI-Types at every save! -- I dont want contamine more places by MPI.. */ #ifdef HAVE_MPI -template<typename MeshFunctionType> +template<typename MeshFunctionType> class DistributedGridIO_MPIIOBase { public: @@ -152,7 +153,7 @@ class DistributedGridIO_MPIIOBase static bool save(const String& fileName, MeshFunctionType &meshFunction, RealType *data) { auto *distrGrid=meshFunction.getMesh().getDistributedMesh(); - + if(distrGrid==NULL) //not distributed { meshFunction.save(fileName); @@ -168,7 +169,7 @@ class DistributedGridIO_MPIIOBase &file); if( ok != 0 ) throw std::runtime_error("Open file falied"); - + int written=save(file,meshFunction, data,0); MPI_File_close(&file); @@ -176,7 +177,7 @@ class DistributedGridIO_MPIIOBase return written>0; }; - + static int save(MPI_File &file, MeshFunctionType &meshFunction, RealType *data, int offset) { @@ -187,7 +188,7 @@ class DistributedGridIO_MPIIOBase int dataCount=CreateDataTypes(distrGrid,&ftype,&atype); int headerSize; - + MPI_File_set_view(file,0,MPI_BYTE,MPI_BYTE,"native",MPI_INFO_NULL); if(Communicators::MpiCommunicator::GetRank(group)==0) @@ -200,9 +201,9 @@ class DistributedGridIO_MPIIOBase offset +=headerSize; MPI_File_set_view(file,offset, - Communicators::MPITypeResolver<RealType>::getType(), + TNL::MPI::getDataType<RealType>(), ftype,"native",MPI_INFO_NULL); - + MPI_Status wstatus; MPI_File_write(file,data,1,atype,&wstatus); @@ -222,7 +223,7 @@ class DistributedGridIO_MPIIOBase int fstarts[dim]; int flsize[dim]; int fgsize[dim]; - + hackArray(dim,fstarts,distrGrid->getGlobalBegin().getData()); hackArray(dim,flsize,distrGrid->getLocalSize().getData()); hackArray(dim,fgsize,distrGrid->getGlobalSize().getData()); @@ -230,14 +231,14 @@ class DistributedGridIO_MPIIOBase MPI_Type_create_subarray(dim, fgsize,flsize,fstarts, MPI_ORDER_C, - Communicators::MPITypeResolver<RealType>::getType(), + TNL::MPI::getDataType<RealType>(), ftype); MPI_Type_commit(ftype); int agsize[dim]; int alsize[dim]; - int astarts[dim]; + int astarts[dim]; hackArray(dim,astarts,distrGrid->getLocalBegin().getData()); hackArray(dim,alsize,distrGrid->getLocalSize().getData()); @@ -246,7 +247,7 @@ class DistributedGridIO_MPIIOBase MPI_Type_create_subarray(dim, agsize,alsize,astarts, MPI_ORDER_C, - Communicators::MPITypeResolver<RealType>::getType(), + TNL::MPI::getDataType<RealType>(), atype); MPI_Type_commit(atype); @@ -350,9 +351,9 @@ class DistributedGridIO_MPIIOBase MPI_File_close(&file); return ret; } - + /* Funky bomb - no checks - only dirty load */ - static int load(MPI_File &file,MeshFunctionType &meshFunction, RealType* data, int offset ) + static int load(MPI_File &file,MeshFunctionType &meshFunction, RealType* data, int offset ) { auto *distrGrid=meshFunction.getMesh().getDistributedMesh(); @@ -360,7 +361,7 @@ class DistributedGridIO_MPIIOBase MPI_Datatype ftype; MPI_Datatype atype; int dataCount=CreateDataTypes(distrGrid,&ftype,&atype); - + MPI_File_set_view(file,0,MPI_BYTE,MPI_BYTE,"native",MPI_INFO_NULL); int headerSize=0; @@ -371,18 +372,18 @@ class DistributedGridIO_MPIIOBase headerSize=readMeshFunctionHeader(file,meshFunction,dataCount); } MPI_Bcast(&headerSize, 1, MPI_INT,0, group); - + if(headerSize<0) return false; offset+=headerSize; MPI_File_set_view(file,offset, - Communicators::MPITypeResolver<RealType>::getType(), + TNL::MPI::getDataType<RealType>(), ftype,"native",MPI_INFO_NULL); MPI_Status wstatus; MPI_File_read(file,(void*)data,1,atype,&wstatus); - + MPI_Type_free(&atype); MPI_Type_free(&ftype); @@ -412,7 +413,7 @@ class DistributedGridIO_MPIIOBase size+=count*sizeof(char); MPI_File_read(file, (void *)&count,1, MPI_INT, &rstatus);//DATACOUNT size+=1*sizeof(int); - + if(count!=length) { std::cerr<<"Chyba naÄŤĂtánĂ MeshFunction, dĂ©lka dat v souboru neodpovĂdá oÄŤekávanĂ© dĂ©lce" << std::endl; @@ -421,7 +422,7 @@ class DistributedGridIO_MPIIOBase return size; }; - + }; #endif @@ -444,10 +445,10 @@ class DistributedGridIO< #ifdef HAVE_MPI if(Communicators::MpiCommunicator::IsInitialized())//i.e. - isUsed { - using HostVectorType = Containers::Vector<typename MeshFunctionType::RealType, Devices::Host, typename MeshFunctionType::IndexType >; + using HostVectorType = Containers::Vector<typename MeshFunctionType::RealType, Devices::Host, typename MeshFunctionType::IndexType >; HostVectorType hostVector; hostVector=meshFunction.getData(); - typename MeshFunctionType::RealType * data=hostVector.getData(); + typename MeshFunctionType::RealType * data=hostVector.getData(); return DistributedGridIO_MPIIOBase<MeshFunctionType>::save(fileName,meshFunction,data); } #endif @@ -455,12 +456,12 @@ class DistributedGridIO< return false; }; - static bool load(const String& fileName,MeshFunctionType &meshFunction) + static bool load(const String& fileName,MeshFunctionType &meshFunction) { #ifdef HAVE_MPI if(Communicators::MpiCommunicator::IsInitialized())//i.e. - isUsed { - using HostVectorType = Containers::Vector<typename MeshFunctionType::RealType, Devices::Host, typename MeshFunctionType::IndexType >; + using HostVectorType = Containers::Vector<typename MeshFunctionType::RealType, Devices::Host, typename MeshFunctionType::IndexType >; HostVectorType hostVector; hostVector.setLike(meshFunction.getData()); auto* data=hostVector.getData(); @@ -501,7 +502,7 @@ class DistributedGridIO< return false; }; - static bool load(const String& fileName,MeshFunctionType &meshFunction) + static bool load(const String& fileName,MeshFunctionType &meshFunction) { #ifdef HAVE_MPI if(Communicators::MpiCommunicator::IsInitialized())//i.e. - isUsed diff --git a/src/TNL/Meshes/DistributedMeshes/DistributedGridSynchronizer.h b/src/TNL/Meshes/DistributedMeshes/DistributedGridSynchronizer.h index 5a11502403ca54be090c350b802e80d102926392..7bc17f9204a8a6b1ed13b301f0f3423d39daaa00 100644 --- a/src/TNL/Meshes/DistributedMeshes/DistributedGridSynchronizer.h +++ b/src/TNL/Meshes/DistributedMeshes/DistributedGridSynchronizer.h @@ -16,7 +16,6 @@ #include <TNL/Meshes/DistributedMeshes/DistributedMeshSynchronizer.h> #include <TNL/Meshes/DistributedMeshes/BufferEntitiesHelper.h> #include <TNL/Meshes/DistributedMeshes/Directions.h> -#include <TNL/Communicators/MPIPrint.h> #include <TNL/Pointers/SharedPointer.h> namespace TNL { diff --git a/src/TNL/Solvers/Solver_impl.h b/src/TNL/Solvers/Solver_impl.h index 9182c620fe2ed589389ab5a64328917f392795c7..5c35c7c338d7f699e30bdea56079fa929cdbcb5a 100644 --- a/src/TNL/Solvers/Solver_impl.h +++ b/src/TNL/Solvers/Solver_impl.h @@ -16,11 +16,11 @@ #include <TNL/Config/parseCommandLine.h> #include <TNL/Devices/Cuda.h> #include <TNL/Communicators/MpiCommunicator.h> -#include <TNL/Communicators/ScopedInitializer.h> +#include <TNL/MPI/ScopedInitializer.h> namespace TNL { namespace Solvers { - + template< template< typename Real, typename Device, typename Index, typename MeshType, typename MeshConfig, typename SolverStarter, typename CommunicatorType > class ProblemSetter, template< typename MeshConfig > class ProblemConfig, typename MeshConfig > @@ -37,7 +37,7 @@ run( int argc, char* argv[] ) Devices::Cuda::configSetup( configDescription ); Communicators::MpiCommunicator::configSetup( configDescription ); - Communicators::ScopedInitializer< Communicators::MpiCommunicator > mpi( argc, argv ); + TNL::MPI::ScopedInitializer mpi( argc, argv ); if( ! parseCommandLine( argc, argv, configDescription, parameters ) ) return false; diff --git a/src/Tools/tnl-game-of-life.cpp b/src/Tools/tnl-game-of-life.cpp index c33ae829439885aa0e69ed2efde610796d4d0ed2..a2d4f48e9a1895df82a88dde7785dd9a5959e6e3 100644 --- a/src/Tools/tnl-game-of-life.cpp +++ b/src/Tools/tnl-game-of-life.cpp @@ -18,7 +18,7 @@ #include <TNL/Meshes/Writers/VTUWriter.h> #include <TNL/Meshes/Writers/PVTUWriter.h> #include <TNL/Communicators/MpiCommunicator.h> -#include <TNL/Communicators/ScopedInitializer.h> +#include <TNL/MPI/ScopedInitializer.h> using namespace TNL; @@ -361,7 +361,7 @@ int main( int argc, char* argv[] ) configSetup( conf_desc ); - Communicators::ScopedInitializer< CommunicatorType > scopedInit(argc, argv); + TNL::MPI::ScopedInitializer mpi(argc, argv); if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) return EXIT_FAILURE; diff --git a/src/Tools/tnl-init.cpp b/src/Tools/tnl-init.cpp index 1a7769b5c89911018127c0fb4e105f3931f6a7a1..73765aafb982962c0e44d18565aecf136c2e7b3b 100644 --- a/src/Tools/tnl-init.cpp +++ b/src/Tools/tnl-init.cpp @@ -16,7 +16,7 @@ #include <TNL/Meshes/Grid.h> #include <TNL/Communicators/MpiCommunicator.h> -#include <TNL/Communicators/ScopedInitializer.h> +#include <TNL/MPI/ScopedInitializer.h> using namespace TNL; @@ -55,7 +55,7 @@ int main( int argc, char* argv[] ) setupConfig( configDescription ); Communicators::MpiCommunicator::configSetup( configDescription ); - Communicators::ScopedInitializer< Communicators::MpiCommunicator > mpi(argc, argv); + TNL::MPI::ScopedInitializer mpi(argc, argv); if( ! parseCommandLine( argc, argv, configDescription, parameters ) ) return EXIT_FAILURE; diff --git a/src/Tools/tnl-test-distributed-mesh.h b/src/Tools/tnl-test-distributed-mesh.h index 0be53242b8122f4173a11d27181a73232067b125..1b8c59c75f89b981e02ba066cf5a61a27479bbcd 100644 --- a/src/Tools/tnl-test-distributed-mesh.h +++ b/src/Tools/tnl-test-distributed-mesh.h @@ -19,7 +19,7 @@ #include <TNL/Meshes/Writers/VTUWriter.h> #include <TNL/Meshes/Writers/PVTUWriter.h> #include <TNL/Communicators/MpiCommunicator.h> -#include <TNL/Communicators/ScopedInitializer.h> +#include <TNL/MPI/ScopedInitializer.h> using namespace TNL; @@ -431,7 +431,7 @@ int main( int argc, char* argv[] ) configSetup( conf_desc ); - Communicators::ScopedInitializer< CommunicatorType > scopedInit(argc, argv); + TNL::MPI::ScopedInitializer mpi(argc, argv); if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) return EXIT_FAILURE; diff --git a/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_1D_test.h b/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_1D_test.h index 113d1daa3ea02e947c6018115aa8cfef1bd6ae2a..366535cc7474392c6b16e0e4c3ab487ed1d0000f 100644 --- a/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_1D_test.h +++ b/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_1D_test.h @@ -10,7 +10,6 @@ #include <gtest/gtest.h> #include <TNL/Communicators/MpiCommunicator.h> -#include <TNL/Communicators/ScopedInitializer.h> #include <TNL/Containers/DistributedNDArray.h> #include <TNL/Containers/DistributedNDArrayView.h> #include <TNL/Containers/DistributedNDArraySynchronizer.h> diff --git a/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_semi1D_test.h b/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_semi1D_test.h index 145b0db5b49202ee26f7d89c5b99fb407e56d21c..aba9420f04f7b7eedaf3c50a47a6982c6adf32ab 100644 --- a/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_semi1D_test.h +++ b/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_semi1D_test.h @@ -10,7 +10,6 @@ #include <gtest/gtest.h> #include <TNL/Communicators/MpiCommunicator.h> -#include <TNL/Communicators/ScopedInitializer.h> #include <TNL/Containers/DistributedNDArray.h> #include <TNL/Containers/DistributedNDArrayView.h> #include <TNL/Containers/DistributedNDArraySynchronizer.h> diff --git a/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h b/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h index d80e467f5f886e2329df8800c43cc40358e123e1..3c637de4d2792d4bd3f7a6fd057d1d5e7316aa09 100644 --- a/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h +++ b/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h @@ -10,7 +10,6 @@ #include <gtest/gtest.h> #include <TNL/Communicators/MpiCommunicator.h> -#include <TNL/Communicators/ScopedInitializer.h> #include <TNL/Containers/DistributedNDArray.h> #include <TNL/Containers/DistributedNDArrayView.h> #include <TNL/Containers/ArrayView.h> diff --git a/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h b/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h index a072b2e8063c9444dc670ff83d866d093bde9804..93d6c3036bf513ee92561f6c09f98fcf10bb9c5b 100644 --- a/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h +++ b/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h @@ -10,7 +10,6 @@ #include <gtest/gtest.h> #include <TNL/Communicators/MpiCommunicator.h> -#include <TNL/Communicators/ScopedInitializer.h> #include <TNL/Containers/DistributedNDArray.h> #include <TNL/Containers/DistributedNDArrayView.h> #include <TNL/Containers/ArrayView.h> diff --git a/src/UnitTests/Meshes/DistributedMeshes/DistributedMeshTest.h b/src/UnitTests/Meshes/DistributedMeshes/DistributedMeshTest.h index 7decaf5752680df4b59ac86350c152d525000697..b778937b69c9752ea5f3a5cc04f2d1c44d7cd9d4 100644 --- a/src/UnitTests/Meshes/DistributedMeshes/DistributedMeshTest.h +++ b/src/UnitTests/Meshes/DistributedMeshes/DistributedMeshTest.h @@ -18,7 +18,6 @@ #include <TNL/Meshes/DistributedMeshes/distributeSubentities.h> #include <TNL/Meshes/DistributedMeshes/DistributedMeshSynchronizer.h> #include <TNL/Communicators/MpiCommunicator.h> -#include <TNL/Communicators/MPIPrint.h> #include <TNL/Functions/MeshFunction.h> #include <TNL/Meshes/Writers/PVTUWriter.h> #include <TNL/Meshes/Readers/PVTUReader.h> diff --git a/src/UnitTests/main_mpi.h b/src/UnitTests/main_mpi.h index 0f8f4b059a119fb42d5bbb78be01540a50cbdf9b..4c89b60ba446c5d2c305bf74f03add303911ebfc 100644 --- a/src/UnitTests/main_mpi.h +++ b/src/UnitTests/main_mpi.h @@ -7,7 +7,7 @@ #if (defined(HAVE_GTEST) && defined(HAVE_MPI)) #include <TNL/Communicators/MpiCommunicator.h> -#include <TNL/Communicators/ScopedInitializer.h> +#include <TNL/MPI/ScopedInitializer.h> using CommunicatorType = TNL::Communicators::MpiCommunicator; #include <sstream> @@ -58,7 +58,7 @@ int main( int argc, char* argv[] ) delete listeners.Release(listeners.default_result_printer()); listeners.Append(new MinimalistBufferedPrinter); - TNL::Communicators::ScopedInitializer< CommunicatorType > mpi(argc, argv); + TNL::MPI::ScopedInitializer mpi(argc, argv); #endif return RUN_ALL_TESTS(); #else