Commit 2a13c0dc authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Added function distributeSubentities and tests for DistributedMeshSynchronizer on faces

Closes #73.
parent 3c662842
Loading
Loading
Loading
Loading
+137 −0
Original line number Diff line number Diff line
@@ -220,6 +220,143 @@ public:
      CommunicatorType::WaitAll( requests.data(), requests.size() );
   }

   // performs a synchronization of a sparse matrix
   //    - row indices -- ghost indices (recv), ghost neighbors (send)
   //    - column indices -- values to be synchronized (in a CSR-like format)
   // returns: a tuple of the received rankOffsets, rowPointers and columnIndices arrays
   template< typename SparsePattern >
   auto
   synchronizeSparse( const SparsePattern& pattern )
   {
      TNL_ASSERT_EQ( pattern.getRows(), ghostOffsets[ ghostOffsets.getSize() - 1 ], "invalid sparse pattern matrix" );

      const int rank = CommunicatorType::GetRank( group );
      const int nproc = CommunicatorType::GetSize( group );

      // buffer for asynchronous communication requests
      std::vector< typename CommunicatorType::Request > requests;

      Containers::Array< GlobalIndexType, Devices::Host, int > send_rankOffsets( nproc + 1 ), recv_rankOffsets( nproc + 1 );
      Containers::Array< GlobalIndexType, Devices::Host, GlobalIndexType > send_rowPointers, send_columnIndices, recv_rowPointers, recv_columnIndices;

      // sending part
      {
         // set rank offsets
         send_rankOffsets[ 0 ] = 0;
         for( int i = 0; i < nproc; i++ )
            send_rankOffsets[ i + 1 ] = send_rankOffsets[ i ] + ghostNeighborOffsets[ i + 1 ] - ghostNeighborOffsets[ i ];

         // allocate row pointers
         send_rowPointers.setSize( send_rankOffsets[ nproc ] + 1 );

         // set row pointers
         GlobalIndexType rowPtr = 0;
         send_rowPointers[ rowPtr ] = 0;
         for( int i = 0; i < nproc; i++ ) {
            if( i == rank )
               continue;
            for( GlobalIndexType r = ghostNeighborOffsets[ i ]; r < ghostNeighborOffsets[ i + 1 ]; r++ ) {
               const GlobalIndexType rowIdx = ghostNeighbors[ r ];
               const auto row = pattern.getRow( rowIdx );
               send_rowPointers[ rowPtr + 1 ] = send_rowPointers[ rowPtr ] + row.getSize();
               rowPtr++;
            }
         }

         // allocate column indices
         send_columnIndices.setSize( send_rowPointers[ send_rowPointers.getSize() - 1 ] );

         // set column indices
         for( int i = 0; i < nproc; i++ ) {
            if( i == rank )
               continue;
            const GlobalIndexType rankOffset = send_rankOffsets[ i ];
            GlobalIndexType rowBegin = send_rowPointers[ rankOffset ];
            for( GlobalIndexType r = ghostNeighborOffsets[ i ]; r < ghostNeighborOffsets[ i + 1 ]; r++ ) {
               const GlobalIndexType rowIdx = ghostNeighbors[ r ];
               const auto row = pattern.getRow( rowIdx );
               for( GlobalIndexType c = 0; c < row.getSize(); c++ )
                  send_columnIndices[ rowBegin + c ] = row.getColumnIndex( c );
               rowBegin += row.getSize();
            }
         }

         for( int i = 0; i < nproc; i++ ) {
            if( send_rankOffsets[ i + 1 ] == send_rankOffsets[ i ] )
               continue;
            // issue async send operation
            requests.push_back( CommunicatorType::ISend(
                     send_columnIndices.getData() + send_rowPointers[ send_rankOffsets[ i ] ],
                     send_rowPointers[ send_rankOffsets[ i + 1 ] ] - send_rowPointers[ send_rankOffsets[ i ] ],
                     i, 0, group ) );
         }
      }

      // receiving part
      {
         // set rank offsets
         recv_rankOffsets[ 0 ] = 0;
         for( int i = 0; i < nproc; i++ )
            recv_rankOffsets[ i + 1 ] = recv_rankOffsets[ i ] + ghostOffsets[ i + 1 ] - ghostOffsets[ i ];

         // allocate row pointers
         recv_rowPointers.setSize( recv_rankOffsets[ nproc ] + 1 );

         // set row pointers
         GlobalIndexType rowPtr = 0;
         recv_rowPointers[ rowPtr ] = 0;
         for( int i = 0; i < nproc; i++ ) {
            if( i == rank )
               continue;
            for( GlobalIndexType rowIdx = ghostOffsets[ i ]; rowIdx < ghostOffsets[ i + 1 ]; rowIdx++ ) {
               const auto row = pattern.getRow( rowIdx );
               recv_rowPointers[ rowPtr + 1 ] = recv_rowPointers[ rowPtr ] + row.getSize();
               rowPtr++;
            }
         }

         // allocate column indices
         recv_columnIndices.setSize( recv_rowPointers[ recv_rowPointers.getSize() - 1 ] );

         for( int i = 0; i < nproc; i++ ) {
            if( recv_rankOffsets[ i + 1 ] == recv_rankOffsets[ i ] )
               continue;
            // issue async send operation
            requests.push_back( CommunicatorType::IRecv(
                     recv_columnIndices.getData() + recv_rowPointers[ recv_rankOffsets[ i ] ],
                     recv_rowPointers[ recv_rankOffsets[ i + 1 ] ] - recv_rowPointers[ recv_rankOffsets[ i ] ],
                     i, 0, group ) );
         }
      }

      // wait for all communications to finish
      CommunicatorType::WaitAll( requests.data(), requests.size() );

      return std::make_tuple( recv_rankOffsets, recv_rowPointers, recv_columnIndices );
   }

   // public const accessors for the communication pattern matrix and index arrays which were
   // created in the `initialize` method
   const auto& getGhostEntitiesCounts() const
   {
      return ghostEntitiesCounts;
   }

   const auto& getGhostOffsets() const
   {
      return ghostOffsets;
   }

   const auto& getGhostNeighborOffsets() const
   {
      return ghostNeighborOffsets;
   }

   const auto& getGhostNeighbors() const
   {
      return ghostNeighbors;
   }

protected:
   // GOTCHA (see above)
   int gpu_id = 0;
+195 −0
Original line number Diff line number Diff line
/***************************************************************************
                          distributeSubentities.h  -  description
                             -------------------
    begin                : July 13, 2020
    copyright            : (C) 2020 by Tomas Oberhuber et al.
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

/* See Copyright Notice in tnl/Copyright */

// Implemented by: Jakub Klinkovský

#pragma once

#include <numeric>   // std::iota

#include <TNL/Meshes/DistributedMeshes/DistributedMeshSynchronizer.h>
#include <TNL/Meshes/MeshDetails/layers/EntityTags/Traits.h>

#include <TNL/Meshes/Geometry/getEntityCenter.h>

namespace TNL {
namespace Meshes {
namespace DistributedMeshes {

template< int Dimension, typename DistributedMesh >
void
distributeSubentities( DistributedMesh& mesh )
{
   using DeviceType = typename DistributedMesh::DeviceType;
   using GlobalIndexType = typename DistributedMesh::GlobalIndexType;
   using LocalIndexType = typename DistributedMesh::LocalIndexType;
   using LocalMesh = typename DistributedMesh::MeshType;
   using CommunicatorType = typename DistributedMesh::CommunicatorType;

   static_assert( ! std::is_same< DeviceType, Devices::Cuda >::value,
                  "this method can be called only for host meshes" );
   static_assert( 0 < Dimension && Dimension < DistributedMesh::getMeshDimension(),
                  "Vertices and cells cannot be distributed using this method." );
   if( mesh.getGhostLevels() <= 0 )
      throw std::logic_error( "There are no ghost levels on the distributed mesh." );

   const int rank = CommunicatorType::GetRank( mesh.getCommunicationGroup() );
   const int nproc = CommunicatorType::GetSize( mesh.getCommunicationGroup() );

   // exchange the global vertex index offsets so that each rank can determine the
   // owner of every vertex by its global index
   const GlobalIndexType ownVertexStart = mesh.template getGlobalIndices< 0 >().getElement( 0 );
   Containers::Array< GlobalIndexType, Devices::Host, int > vertexOffsets( nproc );
   {
      Containers::Array< GlobalIndexType, Devices::Host, int > sendbuf( nproc );
      sendbuf.setValue( ownVertexStart );
      CommunicatorType::Alltoall( sendbuf.getData(), 1,
                                  vertexOffsets.getData(), 1,
                                  mesh.getCommunicationGroup() );
   }

   auto getVertexOwner = [&] ( GlobalIndexType local_idx ) -> int
   {
      const GlobalIndexType global_idx = mesh.template getGlobalIndices< 0 >()[ local_idx ];
      for( int i = 0; i < nproc - 1; i++ )
         if( vertexOffsets[ i ] <= global_idx && global_idx < vertexOffsets[ i + 1 ] )
            return i;
      return nproc - 1;
   };

   auto getEntityOwner = [&] ( GlobalIndexType local_idx ) -> int
   {
      auto entity = mesh.getLocalMesh().template getEntity< Dimension >( local_idx );
      int owner = 0;
      for( LocalIndexType v = 0; v < entity.template getSubentitiesCount< 0 >(); v++ ) {
         const GlobalIndexType gv = entity.template getSubentityIndex< 0 >( v );
         owner = TNL::max( owner, getVertexOwner( gv ) );
      }
      return owner;
   };

   // 1. identify local entities, set the GhostEntity tag on others
   LocalMesh& localMesh = mesh.getLocalMesh();
   localMesh.template forAll< Dimension >( [&] ( GlobalIndexType i ) mutable {
      if( getEntityOwner( i ) != rank )
         localMesh.template addEntityTag< Dimension >( i, EntityTags::GhostEntity );
   });

   // 2. reorder the entities to make sure that all ghost entities are after local entities
   // TODO: it would be nice if the mesh initializer could do this
   {
      // count local entities
      GlobalIndexType localEntitiesCount = 0;
      for( GlobalIndexType i = 0; i < localMesh.template getEntitiesCount< Dimension >(); i++ )
         if( ! localMesh.template isGhostEntity< Dimension >( i ) )
            ++localEntitiesCount;

      // create the permutation
      typename LocalMesh::GlobalIndexArray perm, iperm;
      perm.setSize( localMesh.template getEntitiesCount< Dimension >() );
      iperm.setSize( localMesh.template getEntitiesCount< Dimension >() );
      GlobalIndexType localsCount = 0;
      GlobalIndexType ghostsCount = 0;
      for( GlobalIndexType j = 0; j < iperm.getSize(); j++ ) {
         if( localMesh.template isGhostEntity< Dimension >( j ) ) {
            iperm[ j ] = localEntitiesCount + ghostsCount;
            perm[ localEntitiesCount + ghostsCount ] = j;
            ++ghostsCount;
         }
         else {
            iperm[ j ] = localsCount;
            perm[ localsCount ] = j;
            ++localsCount;
         }
      }

      // reorder the local mesh
      localMesh.template reorderEntities< Dimension >( perm, iperm );
   }

   // 3. update entity tags layer (this is actually done as part of the mesh reordering)
   //localMesh.template updateEntityTagsLayer< Dimension >();

   // 4. exchange the counts of local entities between ranks, compute offsets for global indices
   Containers::Vector< GlobalIndexType, Devices::Host, int > globalOffsets( nproc );
   {
      Containers::Array< GlobalIndexType, Devices::Host, int > sendbuf( nproc );
      sendbuf.setValue( localMesh.template getGhostEntitiesOffset< Dimension >() );
      CommunicatorType::Alltoall( sendbuf.getData(), 1,
                                  globalOffsets.getData(), 1,
                                  mesh.getCommunicationGroup() );
   }
   globalOffsets.template scan< Algorithms::ScanType::Exclusive >();

   // 5. assign global indices to the local entities
   mesh.template getGlobalIndices< Dimension >().setSize( localMesh.template getEntitiesCount< Dimension >() );
   localMesh.template forLocal< Dimension >( [&] ( GlobalIndexType i ) mutable {
      mesh.template getGlobalIndices< Dimension >()[ i ] = globalOffsets[ rank ] + i;
   });

   // 6. exchange cell data to prepare the communication pattern
   DistributedMeshSynchronizer< DistributedMesh > synchronizer;
   synchronizer.initialize( mesh );

   // 7. exchange local indices for ghost entities
   const auto sparseResult = synchronizer.synchronizeSparse( localMesh.template getSubentitiesMatrix< DistributedMesh::getMeshDimension(), Dimension >() );
   const auto& rankOffsets = std::get< 0 >( sparseResult );
   const auto& rowPointers = std::get< 1 >( sparseResult );
   const auto& columnIndices = std::get< 2 >( sparseResult );

   // 8. set the global indices of our ghost entities
   for( int i = 0; i < nproc; i++ ) {
      if( i == rank )
         continue;
      for( GlobalIndexType cell = synchronizer.getGhostOffsets()[ i ]; cell < synchronizer.getGhostOffsets()[ i + 1 ]; cell++ ) {
         for( LocalIndexType e = 0; e < mesh.getLocalMesh().template getSubentitiesCount< DistributedMesh::getMeshDimension(), Dimension >( cell ); e++ ) {
            const GlobalIndexType entityIndex = mesh.getLocalMesh().template getSubentityIndex< DistributedMesh::getMeshDimension(), Dimension >( cell, e );
            const int owner = getEntityOwner( entityIndex );
            // pick the right owner as we might have received an index from multiple ranks
            if( owner == i ) {
               const GlobalIndexType ghostOffset = cell - synchronizer.getGhostOffsets()[ owner ];
               // global index = owner's local index + owner's offset
               const GlobalIndexType globalEntityIndex = columnIndices[ rowPointers[ rankOffsets[ owner ] + ghostOffset ] + e ] + globalOffsets[ owner ];
               mesh.template getGlobalIndices< Dimension >()[ entityIndex ] = globalEntityIndex;
            }
         }
      }
   }

   // 9. reorder the entities to make sure that global indices are sorted
   {
      // prepare vector with an identity permutation
      std::vector< GlobalIndexType > permutation( localMesh.template getEntitiesCount< Dimension >() );
      std::iota( permutation.begin(), permutation.end(), (GlobalIndexType) 0 );

      // sort the subarray corresponding to ghost entities by the global index
      std::stable_sort( permutation.begin() + mesh.getLocalMesh().template getGhostEntitiesOffset< Dimension >(),
                        permutation.end(),
                        [&mesh](auto& left, auto& right) {
         return mesh.template getGlobalIndices< Dimension >()[ left ] < mesh.template getGlobalIndices< Dimension >()[ right ];
      });

      // copy the permutation into TNL array and invert
      typename LocalMesh::GlobalIndexArray perm, iperm;
      perm.setSize( localMesh.template getEntitiesCount< Dimension >() );
      iperm.setSize( localMesh.template getEntitiesCount< Dimension >() );
      for( GlobalIndexType i = 0; i < localMesh.template getEntitiesCount< Dimension >(); i++ ) {
         perm[ i ] = permutation[ i ];
         iperm[ perm[ i ] ] = i;
      }

      // reorder the mesh
      mesh.template reorderEntities< Dimension >( perm, iperm );
   }
}

} // namespace DistributedMeshes
} // namespace Meshes
} // namespace TNL
+0 −2
Original line number Diff line number Diff line
@@ -261,8 +261,6 @@ class Mesh
      // Methods for the mesh initializer
      using StorageBaseType::getPoints;
      using StorageBaseType::setEntitiesCount;
      using StorageBaseType::getSubentitiesMatrix;
      using StorageBaseType::getSuperentitiesMatrix;

      friend Initializer< MeshConfig >;

+14 −0
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
#include <TNL/Meshes/DefaultConfig.h>
#include <TNL/Meshes/VTKTraits.h>
#include <TNL/Meshes/DistributedMeshes/DistributedMesh.h>
#include <TNL/Meshes/DistributedMeshes/distributeSubentities.h>
#include <TNL/Meshes/DistributedMeshes/DistributedMeshSynchronizer.h>
#include <TNL/Communicators/MpiCommunicator.h>
#include <TNL/Communicators/MPIPrint.h>
@@ -263,6 +264,11 @@ struct GridDistributor< TNL::Meshes::Grid< 2, Real, Device, Index > >

      // set the communication group
      mesh.setCommunicationGroup( group );

      if( overlap > 0 ) {
         // distribute faces
         distributeSubentities< 1 >( mesh );
      }
   }

   static std::map< Index, Index > renumberVertices( const GridType& grid, CoordinatesType rank_sizes )
@@ -607,9 +613,13 @@ void testSynchronizer( const Mesh& mesh )
{
   testSynchronizerOnDevice< Devices::Host, typename Mesh::Cell >( mesh );
   testSynchronizerOnDevice< Devices::Host, typename Mesh::Vertex >( mesh );
   if( mesh.template getGlobalIndices< 1 >().getSize() > 0 )
      testSynchronizerOnDevice< Devices::Host, typename Mesh::Face >( mesh );
#ifdef HAVE_CUDA
   testSynchronizerOnDevice< Devices::Cuda, typename Mesh::Cell >( mesh );
   testSynchronizerOnDevice< Devices::Cuda, typename Mesh::Vertex >( mesh );
   if( mesh.template getGlobalIndices< 1 >().getSize() > 0 )
      testSynchronizerOnDevice< Devices::Cuda, typename Mesh::Face >( mesh );
#endif
}

@@ -721,6 +731,10 @@ TEST( DistributedMeshTest, PVTUWriterReader )
   EXPECT_EQ( reader.getMeshType(), "Meshes::DistributedMesh" );
   Mesh loadedMesh;
   reader.loadMesh( loadedMesh );
   // decomposition of faces is not stored in the VTK files
   if( mesh.getGhostLevels() > 0 ) {
      distributeSubentities< 1 >( loadedMesh );
   }
   EXPECT_EQ( loadedMesh, mesh );

   // cleanup