Loading src/TNL/Meshes/DistributedMeshes/DistributedMeshSynchronizer.h +137 −0 Original line number Diff line number Diff line Loading @@ -220,6 +220,143 @@ public: CommunicatorType::WaitAll( requests.data(), requests.size() ); } // performs a synchronization of a sparse matrix // - row indices -- ghost indices (recv), ghost neighbors (send) // - column indices -- values to be synchronized (in a CSR-like format) // returns: a tuple of the received rankOffsets, rowPointers and columnIndices arrays template< typename SparsePattern > auto synchronizeSparse( const SparsePattern& pattern ) { TNL_ASSERT_EQ( pattern.getRows(), ghostOffsets[ ghostOffsets.getSize() - 1 ], "invalid sparse pattern matrix" ); const int rank = CommunicatorType::GetRank( group ); const int nproc = CommunicatorType::GetSize( group ); // buffer for asynchronous communication requests std::vector< typename CommunicatorType::Request > requests; Containers::Array< GlobalIndexType, Devices::Host, int > send_rankOffsets( nproc + 1 ), recv_rankOffsets( nproc + 1 ); Containers::Array< GlobalIndexType, Devices::Host, GlobalIndexType > send_rowPointers, send_columnIndices, recv_rowPointers, recv_columnIndices; // sending part { // set rank offsets send_rankOffsets[ 0 ] = 0; for( int i = 0; i < nproc; i++ ) send_rankOffsets[ i + 1 ] = send_rankOffsets[ i ] + ghostNeighborOffsets[ i + 1 ] - ghostNeighborOffsets[ i ]; // allocate row pointers send_rowPointers.setSize( send_rankOffsets[ nproc ] + 1 ); // set row pointers GlobalIndexType rowPtr = 0; send_rowPointers[ rowPtr ] = 0; for( int i = 0; i < nproc; i++ ) { if( i == rank ) continue; for( GlobalIndexType r = ghostNeighborOffsets[ i ]; r < ghostNeighborOffsets[ i + 1 ]; r++ ) { const GlobalIndexType rowIdx = ghostNeighbors[ r ]; const auto row = pattern.getRow( rowIdx ); send_rowPointers[ rowPtr + 1 ] = send_rowPointers[ rowPtr ] + row.getSize(); rowPtr++; } } // allocate column indices send_columnIndices.setSize( send_rowPointers[ send_rowPointers.getSize() - 1 ] ); // set column indices for( int i = 0; i < nproc; i++ ) { if( i == rank ) continue; const GlobalIndexType rankOffset = send_rankOffsets[ i ]; GlobalIndexType rowBegin = send_rowPointers[ rankOffset ]; for( GlobalIndexType r = ghostNeighborOffsets[ i ]; r < ghostNeighborOffsets[ i + 1 ]; r++ ) { const GlobalIndexType rowIdx = ghostNeighbors[ r ]; const auto row = pattern.getRow( rowIdx ); for( GlobalIndexType c = 0; c < row.getSize(); c++ ) send_columnIndices[ rowBegin + c ] = row.getColumnIndex( c ); rowBegin += row.getSize(); } } for( int i = 0; i < nproc; i++ ) { if( send_rankOffsets[ i + 1 ] == send_rankOffsets[ i ] ) continue; // issue async send operation requests.push_back( CommunicatorType::ISend( send_columnIndices.getData() + send_rowPointers[ send_rankOffsets[ i ] ], send_rowPointers[ send_rankOffsets[ i + 1 ] ] - send_rowPointers[ send_rankOffsets[ i ] ], i, 0, group ) ); } } // receiving part { // set rank offsets recv_rankOffsets[ 0 ] = 0; for( int i = 0; i < nproc; i++ ) recv_rankOffsets[ i + 1 ] = recv_rankOffsets[ i ] + ghostOffsets[ i + 1 ] - ghostOffsets[ i ]; // allocate row pointers recv_rowPointers.setSize( recv_rankOffsets[ nproc ] + 1 ); // set row pointers GlobalIndexType rowPtr = 0; recv_rowPointers[ rowPtr ] = 0; for( int i = 0; i < nproc; i++ ) { if( i == rank ) continue; for( GlobalIndexType rowIdx = ghostOffsets[ i ]; rowIdx < ghostOffsets[ i + 1 ]; rowIdx++ ) { const auto row = pattern.getRow( rowIdx ); recv_rowPointers[ rowPtr + 1 ] = recv_rowPointers[ rowPtr ] + row.getSize(); rowPtr++; } } // allocate column indices recv_columnIndices.setSize( recv_rowPointers[ recv_rowPointers.getSize() - 1 ] ); for( int i = 0; i < nproc; i++ ) { if( recv_rankOffsets[ i + 1 ] == recv_rankOffsets[ i ] ) continue; // issue async send operation requests.push_back( CommunicatorType::IRecv( recv_columnIndices.getData() + recv_rowPointers[ recv_rankOffsets[ i ] ], recv_rowPointers[ recv_rankOffsets[ i + 1 ] ] - recv_rowPointers[ recv_rankOffsets[ i ] ], i, 0, group ) ); } } // wait for all communications to finish CommunicatorType::WaitAll( requests.data(), requests.size() ); return std::make_tuple( recv_rankOffsets, recv_rowPointers, recv_columnIndices ); } // public const accessors for the communication pattern matrix and index arrays which were // created in the `initialize` method const auto& getGhostEntitiesCounts() const { return ghostEntitiesCounts; } const auto& getGhostOffsets() const { return ghostOffsets; } const auto& getGhostNeighborOffsets() const { return ghostNeighborOffsets; } const auto& getGhostNeighbors() const { return ghostNeighbors; } protected: // GOTCHA (see above) int gpu_id = 0; Loading src/TNL/Meshes/DistributedMeshes/distributeSubentities.h 0 → 100644 +195 −0 Original line number Diff line number Diff line /*************************************************************************** distributeSubentities.h - description ------------------- begin : July 13, 2020 copyright : (C) 2020 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ // Implemented by: Jakub Klinkovský #pragma once #include <numeric> // std::iota #include <TNL/Meshes/DistributedMeshes/DistributedMeshSynchronizer.h> #include <TNL/Meshes/MeshDetails/layers/EntityTags/Traits.h> #include <TNL/Meshes/Geometry/getEntityCenter.h> namespace TNL { namespace Meshes { namespace DistributedMeshes { template< int Dimension, typename DistributedMesh > void distributeSubentities( DistributedMesh& mesh ) { using DeviceType = typename DistributedMesh::DeviceType; using GlobalIndexType = typename DistributedMesh::GlobalIndexType; using LocalIndexType = typename DistributedMesh::LocalIndexType; using LocalMesh = typename DistributedMesh::MeshType; using CommunicatorType = typename DistributedMesh::CommunicatorType; static_assert( ! std::is_same< DeviceType, Devices::Cuda >::value, "this method can be called only for host meshes" ); static_assert( 0 < Dimension && Dimension < DistributedMesh::getMeshDimension(), "Vertices and cells cannot be distributed using this method." ); if( mesh.getGhostLevels() <= 0 ) throw std::logic_error( "There are no ghost levels on the distributed mesh." ); const int rank = CommunicatorType::GetRank( mesh.getCommunicationGroup() ); const int nproc = CommunicatorType::GetSize( mesh.getCommunicationGroup() ); // exchange the global vertex index offsets so that each rank can determine the // owner of every vertex by its global index const GlobalIndexType ownVertexStart = mesh.template getGlobalIndices< 0 >().getElement( 0 ); Containers::Array< GlobalIndexType, Devices::Host, int > vertexOffsets( nproc ); { Containers::Array< GlobalIndexType, Devices::Host, int > sendbuf( nproc ); sendbuf.setValue( ownVertexStart ); CommunicatorType::Alltoall( sendbuf.getData(), 1, vertexOffsets.getData(), 1, mesh.getCommunicationGroup() ); } auto getVertexOwner = [&] ( GlobalIndexType local_idx ) -> int { const GlobalIndexType global_idx = mesh.template getGlobalIndices< 0 >()[ local_idx ]; for( int i = 0; i < nproc - 1; i++ ) if( vertexOffsets[ i ] <= global_idx && global_idx < vertexOffsets[ i + 1 ] ) return i; return nproc - 1; }; auto getEntityOwner = [&] ( GlobalIndexType local_idx ) -> int { auto entity = mesh.getLocalMesh().template getEntity< Dimension >( local_idx ); int owner = 0; for( LocalIndexType v = 0; v < entity.template getSubentitiesCount< 0 >(); v++ ) { const GlobalIndexType gv = entity.template getSubentityIndex< 0 >( v ); owner = TNL::max( owner, getVertexOwner( gv ) ); } return owner; }; // 1. identify local entities, set the GhostEntity tag on others LocalMesh& localMesh = mesh.getLocalMesh(); localMesh.template forAll< Dimension >( [&] ( GlobalIndexType i ) mutable { if( getEntityOwner( i ) != rank ) localMesh.template addEntityTag< Dimension >( i, EntityTags::GhostEntity ); }); // 2. reorder the entities to make sure that all ghost entities are after local entities // TODO: it would be nice if the mesh initializer could do this { // count local entities GlobalIndexType localEntitiesCount = 0; for( GlobalIndexType i = 0; i < localMesh.template getEntitiesCount< Dimension >(); i++ ) if( ! localMesh.template isGhostEntity< Dimension >( i ) ) ++localEntitiesCount; // create the permutation typename LocalMesh::GlobalIndexArray perm, iperm; perm.setSize( localMesh.template getEntitiesCount< Dimension >() ); iperm.setSize( localMesh.template getEntitiesCount< Dimension >() ); GlobalIndexType localsCount = 0; GlobalIndexType ghostsCount = 0; for( GlobalIndexType j = 0; j < iperm.getSize(); j++ ) { if( localMesh.template isGhostEntity< Dimension >( j ) ) { iperm[ j ] = localEntitiesCount + ghostsCount; perm[ localEntitiesCount + ghostsCount ] = j; ++ghostsCount; } else { iperm[ j ] = localsCount; perm[ localsCount ] = j; ++localsCount; } } // reorder the local mesh localMesh.template reorderEntities< Dimension >( perm, iperm ); } // 3. update entity tags layer (this is actually done as part of the mesh reordering) //localMesh.template updateEntityTagsLayer< Dimension >(); // 4. exchange the counts of local entities between ranks, compute offsets for global indices Containers::Vector< GlobalIndexType, Devices::Host, int > globalOffsets( nproc ); { Containers::Array< GlobalIndexType, Devices::Host, int > sendbuf( nproc ); sendbuf.setValue( localMesh.template getGhostEntitiesOffset< Dimension >() ); CommunicatorType::Alltoall( sendbuf.getData(), 1, globalOffsets.getData(), 1, mesh.getCommunicationGroup() ); } globalOffsets.template scan< Algorithms::ScanType::Exclusive >(); // 5. assign global indices to the local entities mesh.template getGlobalIndices< Dimension >().setSize( localMesh.template getEntitiesCount< Dimension >() ); localMesh.template forLocal< Dimension >( [&] ( GlobalIndexType i ) mutable { mesh.template getGlobalIndices< Dimension >()[ i ] = globalOffsets[ rank ] + i; }); // 6. exchange cell data to prepare the communication pattern DistributedMeshSynchronizer< DistributedMesh > synchronizer; synchronizer.initialize( mesh ); // 7. exchange local indices for ghost entities const auto sparseResult = synchronizer.synchronizeSparse( localMesh.template getSubentitiesMatrix< DistributedMesh::getMeshDimension(), Dimension >() ); const auto& rankOffsets = std::get< 0 >( sparseResult ); const auto& rowPointers = std::get< 1 >( sparseResult ); const auto& columnIndices = std::get< 2 >( sparseResult ); // 8. set the global indices of our ghost entities for( int i = 0; i < nproc; i++ ) { if( i == rank ) continue; for( GlobalIndexType cell = synchronizer.getGhostOffsets()[ i ]; cell < synchronizer.getGhostOffsets()[ i + 1 ]; cell++ ) { for( LocalIndexType e = 0; e < mesh.getLocalMesh().template getSubentitiesCount< DistributedMesh::getMeshDimension(), Dimension >( cell ); e++ ) { const GlobalIndexType entityIndex = mesh.getLocalMesh().template getSubentityIndex< DistributedMesh::getMeshDimension(), Dimension >( cell, e ); const int owner = getEntityOwner( entityIndex ); // pick the right owner as we might have received an index from multiple ranks if( owner == i ) { const GlobalIndexType ghostOffset = cell - synchronizer.getGhostOffsets()[ owner ]; // global index = owner's local index + owner's offset const GlobalIndexType globalEntityIndex = columnIndices[ rowPointers[ rankOffsets[ owner ] + ghostOffset ] + e ] + globalOffsets[ owner ]; mesh.template getGlobalIndices< Dimension >()[ entityIndex ] = globalEntityIndex; } } } } // 9. reorder the entities to make sure that global indices are sorted { // prepare vector with an identity permutation std::vector< GlobalIndexType > permutation( localMesh.template getEntitiesCount< Dimension >() ); std::iota( permutation.begin(), permutation.end(), (GlobalIndexType) 0 ); // sort the subarray corresponding to ghost entities by the global index std::stable_sort( permutation.begin() + mesh.getLocalMesh().template getGhostEntitiesOffset< Dimension >(), permutation.end(), [&mesh](auto& left, auto& right) { return mesh.template getGlobalIndices< Dimension >()[ left ] < mesh.template getGlobalIndices< Dimension >()[ right ]; }); // copy the permutation into TNL array and invert typename LocalMesh::GlobalIndexArray perm, iperm; perm.setSize( localMesh.template getEntitiesCount< Dimension >() ); iperm.setSize( localMesh.template getEntitiesCount< Dimension >() ); for( GlobalIndexType i = 0; i < localMesh.template getEntitiesCount< Dimension >(); i++ ) { perm[ i ] = permutation[ i ]; iperm[ perm[ i ] ] = i; } // reorder the mesh mesh.template reorderEntities< Dimension >( perm, iperm ); } } } // namespace DistributedMeshes } // namespace Meshes } // namespace TNL src/TNL/Meshes/Mesh.h +0 −2 Original line number Diff line number Diff line Loading @@ -261,8 +261,6 @@ class Mesh // Methods for the mesh initializer using StorageBaseType::getPoints; using StorageBaseType::setEntitiesCount; using StorageBaseType::getSubentitiesMatrix; using StorageBaseType::getSuperentitiesMatrix; friend Initializer< MeshConfig >; Loading src/UnitTests/Meshes/DistributedMeshes/DistributedMeshTest.h +14 −0 Original line number Diff line number Diff line Loading @@ -14,6 +14,7 @@ #include <TNL/Meshes/DefaultConfig.h> #include <TNL/Meshes/VTKTraits.h> #include <TNL/Meshes/DistributedMeshes/DistributedMesh.h> #include <TNL/Meshes/DistributedMeshes/distributeSubentities.h> #include <TNL/Meshes/DistributedMeshes/DistributedMeshSynchronizer.h> #include <TNL/Communicators/MpiCommunicator.h> #include <TNL/Communicators/MPIPrint.h> Loading Loading @@ -263,6 +264,11 @@ struct GridDistributor< TNL::Meshes::Grid< 2, Real, Device, Index > > // set the communication group mesh.setCommunicationGroup( group ); if( overlap > 0 ) { // distribute faces distributeSubentities< 1 >( mesh ); } } static std::map< Index, Index > renumberVertices( const GridType& grid, CoordinatesType rank_sizes ) Loading Loading @@ -607,9 +613,13 @@ void testSynchronizer( const Mesh& mesh ) { testSynchronizerOnDevice< Devices::Host, typename Mesh::Cell >( mesh ); testSynchronizerOnDevice< Devices::Host, typename Mesh::Vertex >( mesh ); if( mesh.template getGlobalIndices< 1 >().getSize() > 0 ) testSynchronizerOnDevice< Devices::Host, typename Mesh::Face >( mesh ); #ifdef HAVE_CUDA testSynchronizerOnDevice< Devices::Cuda, typename Mesh::Cell >( mesh ); testSynchronizerOnDevice< Devices::Cuda, typename Mesh::Vertex >( mesh ); if( mesh.template getGlobalIndices< 1 >().getSize() > 0 ) testSynchronizerOnDevice< Devices::Cuda, typename Mesh::Face >( mesh ); #endif } Loading Loading @@ -721,6 +731,10 @@ TEST( DistributedMeshTest, PVTUWriterReader ) EXPECT_EQ( reader.getMeshType(), "Meshes::DistributedMesh" ); Mesh loadedMesh; reader.loadMesh( loadedMesh ); // decomposition of faces is not stored in the VTK files if( mesh.getGhostLevels() > 0 ) { distributeSubentities< 1 >( loadedMesh ); } EXPECT_EQ( loadedMesh, mesh ); // cleanup Loading Loading
src/TNL/Meshes/DistributedMeshes/DistributedMeshSynchronizer.h +137 −0 Original line number Diff line number Diff line Loading @@ -220,6 +220,143 @@ public: CommunicatorType::WaitAll( requests.data(), requests.size() ); } // performs a synchronization of a sparse matrix // - row indices -- ghost indices (recv), ghost neighbors (send) // - column indices -- values to be synchronized (in a CSR-like format) // returns: a tuple of the received rankOffsets, rowPointers and columnIndices arrays template< typename SparsePattern > auto synchronizeSparse( const SparsePattern& pattern ) { TNL_ASSERT_EQ( pattern.getRows(), ghostOffsets[ ghostOffsets.getSize() - 1 ], "invalid sparse pattern matrix" ); const int rank = CommunicatorType::GetRank( group ); const int nproc = CommunicatorType::GetSize( group ); // buffer for asynchronous communication requests std::vector< typename CommunicatorType::Request > requests; Containers::Array< GlobalIndexType, Devices::Host, int > send_rankOffsets( nproc + 1 ), recv_rankOffsets( nproc + 1 ); Containers::Array< GlobalIndexType, Devices::Host, GlobalIndexType > send_rowPointers, send_columnIndices, recv_rowPointers, recv_columnIndices; // sending part { // set rank offsets send_rankOffsets[ 0 ] = 0; for( int i = 0; i < nproc; i++ ) send_rankOffsets[ i + 1 ] = send_rankOffsets[ i ] + ghostNeighborOffsets[ i + 1 ] - ghostNeighborOffsets[ i ]; // allocate row pointers send_rowPointers.setSize( send_rankOffsets[ nproc ] + 1 ); // set row pointers GlobalIndexType rowPtr = 0; send_rowPointers[ rowPtr ] = 0; for( int i = 0; i < nproc; i++ ) { if( i == rank ) continue; for( GlobalIndexType r = ghostNeighborOffsets[ i ]; r < ghostNeighborOffsets[ i + 1 ]; r++ ) { const GlobalIndexType rowIdx = ghostNeighbors[ r ]; const auto row = pattern.getRow( rowIdx ); send_rowPointers[ rowPtr + 1 ] = send_rowPointers[ rowPtr ] + row.getSize(); rowPtr++; } } // allocate column indices send_columnIndices.setSize( send_rowPointers[ send_rowPointers.getSize() - 1 ] ); // set column indices for( int i = 0; i < nproc; i++ ) { if( i == rank ) continue; const GlobalIndexType rankOffset = send_rankOffsets[ i ]; GlobalIndexType rowBegin = send_rowPointers[ rankOffset ]; for( GlobalIndexType r = ghostNeighborOffsets[ i ]; r < ghostNeighborOffsets[ i + 1 ]; r++ ) { const GlobalIndexType rowIdx = ghostNeighbors[ r ]; const auto row = pattern.getRow( rowIdx ); for( GlobalIndexType c = 0; c < row.getSize(); c++ ) send_columnIndices[ rowBegin + c ] = row.getColumnIndex( c ); rowBegin += row.getSize(); } } for( int i = 0; i < nproc; i++ ) { if( send_rankOffsets[ i + 1 ] == send_rankOffsets[ i ] ) continue; // issue async send operation requests.push_back( CommunicatorType::ISend( send_columnIndices.getData() + send_rowPointers[ send_rankOffsets[ i ] ], send_rowPointers[ send_rankOffsets[ i + 1 ] ] - send_rowPointers[ send_rankOffsets[ i ] ], i, 0, group ) ); } } // receiving part { // set rank offsets recv_rankOffsets[ 0 ] = 0; for( int i = 0; i < nproc; i++ ) recv_rankOffsets[ i + 1 ] = recv_rankOffsets[ i ] + ghostOffsets[ i + 1 ] - ghostOffsets[ i ]; // allocate row pointers recv_rowPointers.setSize( recv_rankOffsets[ nproc ] + 1 ); // set row pointers GlobalIndexType rowPtr = 0; recv_rowPointers[ rowPtr ] = 0; for( int i = 0; i < nproc; i++ ) { if( i == rank ) continue; for( GlobalIndexType rowIdx = ghostOffsets[ i ]; rowIdx < ghostOffsets[ i + 1 ]; rowIdx++ ) { const auto row = pattern.getRow( rowIdx ); recv_rowPointers[ rowPtr + 1 ] = recv_rowPointers[ rowPtr ] + row.getSize(); rowPtr++; } } // allocate column indices recv_columnIndices.setSize( recv_rowPointers[ recv_rowPointers.getSize() - 1 ] ); for( int i = 0; i < nproc; i++ ) { if( recv_rankOffsets[ i + 1 ] == recv_rankOffsets[ i ] ) continue; // issue async send operation requests.push_back( CommunicatorType::IRecv( recv_columnIndices.getData() + recv_rowPointers[ recv_rankOffsets[ i ] ], recv_rowPointers[ recv_rankOffsets[ i + 1 ] ] - recv_rowPointers[ recv_rankOffsets[ i ] ], i, 0, group ) ); } } // wait for all communications to finish CommunicatorType::WaitAll( requests.data(), requests.size() ); return std::make_tuple( recv_rankOffsets, recv_rowPointers, recv_columnIndices ); } // public const accessors for the communication pattern matrix and index arrays which were // created in the `initialize` method const auto& getGhostEntitiesCounts() const { return ghostEntitiesCounts; } const auto& getGhostOffsets() const { return ghostOffsets; } const auto& getGhostNeighborOffsets() const { return ghostNeighborOffsets; } const auto& getGhostNeighbors() const { return ghostNeighbors; } protected: // GOTCHA (see above) int gpu_id = 0; Loading
src/TNL/Meshes/DistributedMeshes/distributeSubentities.h 0 → 100644 +195 −0 Original line number Diff line number Diff line /*************************************************************************** distributeSubentities.h - description ------------------- begin : July 13, 2020 copyright : (C) 2020 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ // Implemented by: Jakub Klinkovský #pragma once #include <numeric> // std::iota #include <TNL/Meshes/DistributedMeshes/DistributedMeshSynchronizer.h> #include <TNL/Meshes/MeshDetails/layers/EntityTags/Traits.h> #include <TNL/Meshes/Geometry/getEntityCenter.h> namespace TNL { namespace Meshes { namespace DistributedMeshes { template< int Dimension, typename DistributedMesh > void distributeSubentities( DistributedMesh& mesh ) { using DeviceType = typename DistributedMesh::DeviceType; using GlobalIndexType = typename DistributedMesh::GlobalIndexType; using LocalIndexType = typename DistributedMesh::LocalIndexType; using LocalMesh = typename DistributedMesh::MeshType; using CommunicatorType = typename DistributedMesh::CommunicatorType; static_assert( ! std::is_same< DeviceType, Devices::Cuda >::value, "this method can be called only for host meshes" ); static_assert( 0 < Dimension && Dimension < DistributedMesh::getMeshDimension(), "Vertices and cells cannot be distributed using this method." ); if( mesh.getGhostLevels() <= 0 ) throw std::logic_error( "There are no ghost levels on the distributed mesh." ); const int rank = CommunicatorType::GetRank( mesh.getCommunicationGroup() ); const int nproc = CommunicatorType::GetSize( mesh.getCommunicationGroup() ); // exchange the global vertex index offsets so that each rank can determine the // owner of every vertex by its global index const GlobalIndexType ownVertexStart = mesh.template getGlobalIndices< 0 >().getElement( 0 ); Containers::Array< GlobalIndexType, Devices::Host, int > vertexOffsets( nproc ); { Containers::Array< GlobalIndexType, Devices::Host, int > sendbuf( nproc ); sendbuf.setValue( ownVertexStart ); CommunicatorType::Alltoall( sendbuf.getData(), 1, vertexOffsets.getData(), 1, mesh.getCommunicationGroup() ); } auto getVertexOwner = [&] ( GlobalIndexType local_idx ) -> int { const GlobalIndexType global_idx = mesh.template getGlobalIndices< 0 >()[ local_idx ]; for( int i = 0; i < nproc - 1; i++ ) if( vertexOffsets[ i ] <= global_idx && global_idx < vertexOffsets[ i + 1 ] ) return i; return nproc - 1; }; auto getEntityOwner = [&] ( GlobalIndexType local_idx ) -> int { auto entity = mesh.getLocalMesh().template getEntity< Dimension >( local_idx ); int owner = 0; for( LocalIndexType v = 0; v < entity.template getSubentitiesCount< 0 >(); v++ ) { const GlobalIndexType gv = entity.template getSubentityIndex< 0 >( v ); owner = TNL::max( owner, getVertexOwner( gv ) ); } return owner; }; // 1. identify local entities, set the GhostEntity tag on others LocalMesh& localMesh = mesh.getLocalMesh(); localMesh.template forAll< Dimension >( [&] ( GlobalIndexType i ) mutable { if( getEntityOwner( i ) != rank ) localMesh.template addEntityTag< Dimension >( i, EntityTags::GhostEntity ); }); // 2. reorder the entities to make sure that all ghost entities are after local entities // TODO: it would be nice if the mesh initializer could do this { // count local entities GlobalIndexType localEntitiesCount = 0; for( GlobalIndexType i = 0; i < localMesh.template getEntitiesCount< Dimension >(); i++ ) if( ! localMesh.template isGhostEntity< Dimension >( i ) ) ++localEntitiesCount; // create the permutation typename LocalMesh::GlobalIndexArray perm, iperm; perm.setSize( localMesh.template getEntitiesCount< Dimension >() ); iperm.setSize( localMesh.template getEntitiesCount< Dimension >() ); GlobalIndexType localsCount = 0; GlobalIndexType ghostsCount = 0; for( GlobalIndexType j = 0; j < iperm.getSize(); j++ ) { if( localMesh.template isGhostEntity< Dimension >( j ) ) { iperm[ j ] = localEntitiesCount + ghostsCount; perm[ localEntitiesCount + ghostsCount ] = j; ++ghostsCount; } else { iperm[ j ] = localsCount; perm[ localsCount ] = j; ++localsCount; } } // reorder the local mesh localMesh.template reorderEntities< Dimension >( perm, iperm ); } // 3. update entity tags layer (this is actually done as part of the mesh reordering) //localMesh.template updateEntityTagsLayer< Dimension >(); // 4. exchange the counts of local entities between ranks, compute offsets for global indices Containers::Vector< GlobalIndexType, Devices::Host, int > globalOffsets( nproc ); { Containers::Array< GlobalIndexType, Devices::Host, int > sendbuf( nproc ); sendbuf.setValue( localMesh.template getGhostEntitiesOffset< Dimension >() ); CommunicatorType::Alltoall( sendbuf.getData(), 1, globalOffsets.getData(), 1, mesh.getCommunicationGroup() ); } globalOffsets.template scan< Algorithms::ScanType::Exclusive >(); // 5. assign global indices to the local entities mesh.template getGlobalIndices< Dimension >().setSize( localMesh.template getEntitiesCount< Dimension >() ); localMesh.template forLocal< Dimension >( [&] ( GlobalIndexType i ) mutable { mesh.template getGlobalIndices< Dimension >()[ i ] = globalOffsets[ rank ] + i; }); // 6. exchange cell data to prepare the communication pattern DistributedMeshSynchronizer< DistributedMesh > synchronizer; synchronizer.initialize( mesh ); // 7. exchange local indices for ghost entities const auto sparseResult = synchronizer.synchronizeSparse( localMesh.template getSubentitiesMatrix< DistributedMesh::getMeshDimension(), Dimension >() ); const auto& rankOffsets = std::get< 0 >( sparseResult ); const auto& rowPointers = std::get< 1 >( sparseResult ); const auto& columnIndices = std::get< 2 >( sparseResult ); // 8. set the global indices of our ghost entities for( int i = 0; i < nproc; i++ ) { if( i == rank ) continue; for( GlobalIndexType cell = synchronizer.getGhostOffsets()[ i ]; cell < synchronizer.getGhostOffsets()[ i + 1 ]; cell++ ) { for( LocalIndexType e = 0; e < mesh.getLocalMesh().template getSubentitiesCount< DistributedMesh::getMeshDimension(), Dimension >( cell ); e++ ) { const GlobalIndexType entityIndex = mesh.getLocalMesh().template getSubentityIndex< DistributedMesh::getMeshDimension(), Dimension >( cell, e ); const int owner = getEntityOwner( entityIndex ); // pick the right owner as we might have received an index from multiple ranks if( owner == i ) { const GlobalIndexType ghostOffset = cell - synchronizer.getGhostOffsets()[ owner ]; // global index = owner's local index + owner's offset const GlobalIndexType globalEntityIndex = columnIndices[ rowPointers[ rankOffsets[ owner ] + ghostOffset ] + e ] + globalOffsets[ owner ]; mesh.template getGlobalIndices< Dimension >()[ entityIndex ] = globalEntityIndex; } } } } // 9. reorder the entities to make sure that global indices are sorted { // prepare vector with an identity permutation std::vector< GlobalIndexType > permutation( localMesh.template getEntitiesCount< Dimension >() ); std::iota( permutation.begin(), permutation.end(), (GlobalIndexType) 0 ); // sort the subarray corresponding to ghost entities by the global index std::stable_sort( permutation.begin() + mesh.getLocalMesh().template getGhostEntitiesOffset< Dimension >(), permutation.end(), [&mesh](auto& left, auto& right) { return mesh.template getGlobalIndices< Dimension >()[ left ] < mesh.template getGlobalIndices< Dimension >()[ right ]; }); // copy the permutation into TNL array and invert typename LocalMesh::GlobalIndexArray perm, iperm; perm.setSize( localMesh.template getEntitiesCount< Dimension >() ); iperm.setSize( localMesh.template getEntitiesCount< Dimension >() ); for( GlobalIndexType i = 0; i < localMesh.template getEntitiesCount< Dimension >(); i++ ) { perm[ i ] = permutation[ i ]; iperm[ perm[ i ] ] = i; } // reorder the mesh mesh.template reorderEntities< Dimension >( perm, iperm ); } } } // namespace DistributedMeshes } // namespace Meshes } // namespace TNL
src/TNL/Meshes/Mesh.h +0 −2 Original line number Diff line number Diff line Loading @@ -261,8 +261,6 @@ class Mesh // Methods for the mesh initializer using StorageBaseType::getPoints; using StorageBaseType::setEntitiesCount; using StorageBaseType::getSubentitiesMatrix; using StorageBaseType::getSuperentitiesMatrix; friend Initializer< MeshConfig >; Loading
src/UnitTests/Meshes/DistributedMeshes/DistributedMeshTest.h +14 −0 Original line number Diff line number Diff line Loading @@ -14,6 +14,7 @@ #include <TNL/Meshes/DefaultConfig.h> #include <TNL/Meshes/VTKTraits.h> #include <TNL/Meshes/DistributedMeshes/DistributedMesh.h> #include <TNL/Meshes/DistributedMeshes/distributeSubentities.h> #include <TNL/Meshes/DistributedMeshes/DistributedMeshSynchronizer.h> #include <TNL/Communicators/MpiCommunicator.h> #include <TNL/Communicators/MPIPrint.h> Loading Loading @@ -263,6 +264,11 @@ struct GridDistributor< TNL::Meshes::Grid< 2, Real, Device, Index > > // set the communication group mesh.setCommunicationGroup( group ); if( overlap > 0 ) { // distribute faces distributeSubentities< 1 >( mesh ); } } static std::map< Index, Index > renumberVertices( const GridType& grid, CoordinatesType rank_sizes ) Loading Loading @@ -607,9 +613,13 @@ void testSynchronizer( const Mesh& mesh ) { testSynchronizerOnDevice< Devices::Host, typename Mesh::Cell >( mesh ); testSynchronizerOnDevice< Devices::Host, typename Mesh::Vertex >( mesh ); if( mesh.template getGlobalIndices< 1 >().getSize() > 0 ) testSynchronizerOnDevice< Devices::Host, typename Mesh::Face >( mesh ); #ifdef HAVE_CUDA testSynchronizerOnDevice< Devices::Cuda, typename Mesh::Cell >( mesh ); testSynchronizerOnDevice< Devices::Cuda, typename Mesh::Vertex >( mesh ); if( mesh.template getGlobalIndices< 1 >().getSize() > 0 ) testSynchronizerOnDevice< Devices::Cuda, typename Mesh::Face >( mesh ); #endif } Loading Loading @@ -721,6 +731,10 @@ TEST( DistributedMeshTest, PVTUWriterReader ) EXPECT_EQ( reader.getMeshType(), "Meshes::DistributedMesh" ); Mesh loadedMesh; reader.loadMesh( loadedMesh ); // decomposition of faces is not stored in the VTK files if( mesh.getGhostLevels() > 0 ) { distributeSubentities< 1 >( loadedMesh ); } EXPECT_EQ( loadedMesh, mesh ); // cleanup Loading