Commit 52f8128a authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Added functinality for attaching GDB to MPI processes.

parent a00dd0a5
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
#ADD_SUBDIRECTORY( Python )
ADD_SUBDIRECTORY( Python )
ADD_SUBDIRECTORY( TNL )
ADD_SUBDIRECTORY( Tools )
ADD_SUBDIRECTORY( UnitTests )
+67 −26
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@

#ifdef HAVE_MPI
#include <mpi.h>
#include <mpi-ext.h>

#ifdef HAVE_CUDA
    #include <TNL/Devices/Cuda.h>
@@ -72,6 +73,8 @@ class MpiCommunicator
      {
#ifdef HAVE_MPI
         config.addEntry< bool >( "redirect-mpi-output", "Only process with rank 0 prints to console. Other processes are redirected to files.", true );
         config.addEntry< bool >( "mpi-gdb-debug", "Wait for GDB to attach the master MPI process.", false );
         config.addEntry< int >( "mpi-process-to-attach", "Number of the MPI process to be attached by GDB.", 0 );
#endif
      }

@@ -83,8 +86,47 @@ class MpiCommunicator
         {
            redirect = parameters.getParameter< bool >( "redirect-mpi-output" );
            setupRedirection();                        
         }
#ifdef HAVE_CUDA
   #if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
            std::cout << "CUDA-aware MPI detected on this system ... " << std::endl;
   #elif defined(MPIX_CUDA_AWARE_SUPPORT) && !MPIX_CUDA_AWARE_SUPPORT
            std::cerr << "MPI is not CUDA-aware. Please install correct version of MPI." << std::endl;
            return false;
   #else
            std::cerr << "WARNING: TNL cannot detect if you have CUDA-aware MPI. Some problems may occur." << std::endl;
   #endif
#endif // HAVE_CUDA
            bool gdbDebug = parameters.getParameter< bool >( "mpi-gdb-debug" );
            int processToAttach = parameters.getParameter< int >( "mpi-process-to-attach" );            
    
            if( gdbDebug )
            {
               int rank = GetRank( MPI_COMM_WORLD );
               int pid = getpid();
                              
               volatile int tnlMPIDebugAttached = 0;
               MPI_Send( &pid, 1, MPI_INT, 0, 0, MPI_COMM_WORLD );
               if( rank == 0 )
                  std::cerr << "Attach GDB to MPI process(es) by entering:" << std::endl;
               for( int i = 0; i < GetSize( MPI_COMM_WORLD ); i++ )
               {
                  MPI_Status status;
                  int recvPid;
                  MPI_Recv( &recvPid, 1, MPI_INT, i, 0, MPI_COMM_WORLD, &status );
               
                  if( i == processToAttach || processToAttach == -1 )
                  {
                     std::cerr << "  For MPI process " << i << ": gdb -q -ex \"attach " << recvPid << "\"" 
                               << " -ex \"set variable tnlMPIDebugAttached=1\"" 
                               << " -ex \"finish\"" << std::endl;
                  }
               }
               if( rank == processToAttach || processToAttach == -1 )
                  while( ! tnlMPIDebugAttached );
               MPI_Barrier( MPI_COMM_WORLD );
            }
         }
#endif // HAVE_MPI
         return true;
      }

@@ -188,9 +230,8 @@ class MpiCommunicator
           int sum=0;
           for(int i=0;i<dim;i++)
                sum+=distr[i];
           if(sum==0) //uživatel neovlivňuje distribuci
           if(sum==0)
           {
               std::cout << "vynucuji distribuci" <<std::endl;
               for(int i=0;i<dim-1;i++)
               {
                    distr[i]=1;
+11 −11
Original line number Diff line number Diff line
@@ -203,7 +203,7 @@ parseCommandLine( int argc, char* argv[],
         Containers::List< String > parsedEntryType;
         if( ! parseObjectType( entryType, parsedEntryType ) )
         {
            std::cerr << "Internal error: Uknown config entry type " << entryType << "." << std::endl;
            std::cerr << "Internal error: Unknown config entry type " << entryType << "." << std::endl;
            return false;
         }
         if( parsedEntryType[ 0 ] == "List" )
@@ -308,7 +308,7 @@ parseCommandLine( int argc, char* argv[],
            }
            if( parsedEntryType[ 0 ] == "int" )
            {
               /*if( ! isdigit( value ) )
               /*if( ! std::isdigit( value ) ) //TODO: Check for real number
               {
                  std::cerr << "Integer constant is required for the parameter " << option << "." << std::endl;
                  parse_error = true;
@@ -320,7 +320,7 @@ parseCommandLine( int argc, char* argv[],
            }
            if( parsedEntryType[ 0 ] == "double" )
            {
               /*if( ! isdigit( value ) )
               /*if( ! std::isdigit( value ) )  //TODO: Check for real number
               {
                  std::cerr << "Real constant is required for the parameter " << option << "." << std::endl;
                  parse_error = true;
+0 −2
Original line number Diff line number Diff line
@@ -171,8 +171,6 @@ void IterativeSolverMonitor< Real, Index > :: refresh()
      if( nodesPerIteration ) // otherwise MLUPS: 0 is printed
      {
         const RealType mlups = nodesPerIteration * (iterations - iterations_before_refresh) / (getElapsedTime() - elapsed_time_before_refresh) * 1e-6;
         //std::cerr << std::endl << " iterations - iterations_before_refresh = " << iterations - iterations_before_refresh
         //          << " getElapsedTime() - elapsed_time_before_refresh = " << getElapsedTime() - elapsed_time_before_refresh << std::endl;
         print_item( " MLUPS:", 0 );
         if( mlups > 0 )
         {
+6 −1
Original line number Diff line number Diff line
@@ -107,7 +107,12 @@ bool IterativeSolver< Real, Index> :: checkNextIteration()
{
   this->refreshSolverMonitor();

   if( std::isnan( this->getResidue() ) ||
   if(
#ifndef HAVE_CUDA      
      std::isnan( this->getResidue() ) ||
      // TODO: Fix this !!!!
      // this does not work (at least) with nvcc 8.0 and g++ 5.4
#endif      
       this->getIterations() > this->getMaxIterations()  ||
       ( this->getResidue() > this->getDivergenceResidue() && this->getIterations() >= this->getMinIterations() ) ||
       ( this->getResidue() < this->getConvergenceResidue() && this->getIterations() >= this->getMinIterations() ) )
+1 −1

File changed.

Contains only whitespace changes.

Loading