Added function distributeSubentities and tests for DistributedMeshSynchronizer on faces (2a13c0dc) · Commits · TNL / tnl-dev

src/TNL/Meshes/DistributedMeshes/DistributedMeshSynchronizer.h

+137 −0

Original line number	Diff line number	Diff line
		@@ -220,6 +220,143 @@ public:
		CommunicatorType::WaitAll( requests.data(), requests.size() );
		}

		// performs a synchronization of a sparse matrix
		// - row indices -- ghost indices (recv), ghost neighbors (send)
		// - column indices -- values to be synchronized (in a CSR-like format)
		// returns: a tuple of the received rankOffsets, rowPointers and columnIndices arrays
		template< typename SparsePattern >
		auto
		synchronizeSparse( const SparsePattern& pattern )
		{
		TNL_ASSERT_EQ( pattern.getRows(), ghostOffsets[ ghostOffsets.getSize() - 1 ], "invalid sparse pattern matrix" );

		const int rank = CommunicatorType::GetRank( group );
		const int nproc = CommunicatorType::GetSize( group );

		// buffer for asynchronous communication requests
		std::vector< typename CommunicatorType::Request > requests;

		Containers::Array< GlobalIndexType, Devices::Host, int > send_rankOffsets( nproc + 1 ), recv_rankOffsets( nproc + 1 );
		Containers::Array< GlobalIndexType, Devices::Host, GlobalIndexType > send_rowPointers, send_columnIndices, recv_rowPointers, recv_columnIndices;

		// sending part
		{
		// set rank offsets
		send_rankOffsets[ 0 ] = 0;
		for( int i = 0; i < nproc; i++ )
		send_rankOffsets[ i + 1 ] = send_rankOffsets[ i ] + ghostNeighborOffsets[ i + 1 ] - ghostNeighborOffsets[ i ];

		// allocate row pointers
		send_rowPointers.setSize( send_rankOffsets[ nproc ] + 1 );

		// set row pointers
		GlobalIndexType rowPtr = 0;
		send_rowPointers[ rowPtr ] = 0;
		for( int i = 0; i < nproc; i++ ) {
		if( i == rank )
		continue;
		for( GlobalIndexType r = ghostNeighborOffsets[ i ]; r < ghostNeighborOffsets[ i + 1 ]; r++ ) {
		const GlobalIndexType rowIdx = ghostNeighbors[ r ];
		const auto row = pattern.getRow( rowIdx );
		send_rowPointers[ rowPtr + 1 ] = send_rowPointers[ rowPtr ] + row.getSize();
		rowPtr++;
		}
		}

		// allocate column indices
		send_columnIndices.setSize( send_rowPointers[ send_rowPointers.getSize() - 1 ] );

		// set column indices
		for( int i = 0; i < nproc; i++ ) {
		if( i == rank )
		continue;
		const GlobalIndexType rankOffset = send_rankOffsets[ i ];
		GlobalIndexType rowBegin = send_rowPointers[ rankOffset ];
		for( GlobalIndexType r = ghostNeighborOffsets[ i ]; r < ghostNeighborOffsets[ i + 1 ]; r++ ) {
		const GlobalIndexType rowIdx = ghostNeighbors[ r ];
		const auto row = pattern.getRow( rowIdx );
		for( GlobalIndexType c = 0; c < row.getSize(); c++ )
		send_columnIndices[ rowBegin + c ] = row.getColumnIndex( c );
		rowBegin += row.getSize();
		}
		}

		for( int i = 0; i < nproc; i++ ) {
		if( send_rankOffsets[ i + 1 ] == send_rankOffsets[ i ] )
		continue;
		// issue async send operation
		requests.push_back( CommunicatorType::ISend(
		send_columnIndices.getData() + send_rowPointers[ send_rankOffsets[ i ] ],
		send_rowPointers[ send_rankOffsets[ i + 1 ] ] - send_rowPointers[ send_rankOffsets[ i ] ],
		i, 0, group ) );
		}
		}

		// receiving part
		{
		// set rank offsets
		recv_rankOffsets[ 0 ] = 0;
		for( int i = 0; i < nproc; i++ )
		recv_rankOffsets[ i + 1 ] = recv_rankOffsets[ i ] + ghostOffsets[ i + 1 ] - ghostOffsets[ i ];

		// allocate row pointers
		recv_rowPointers.setSize( recv_rankOffsets[ nproc ] + 1 );

		// set row pointers
		GlobalIndexType rowPtr = 0;
		recv_rowPointers[ rowPtr ] = 0;
		for( int i = 0; i < nproc; i++ ) {
		if( i == rank )
		continue;
		for( GlobalIndexType rowIdx = ghostOffsets[ i ]; rowIdx < ghostOffsets[ i + 1 ]; rowIdx++ ) {
		const auto row = pattern.getRow( rowIdx );
		recv_rowPointers[ rowPtr + 1 ] = recv_rowPointers[ rowPtr ] + row.getSize();
		rowPtr++;
		}
		}

		// allocate column indices
		recv_columnIndices.setSize( recv_rowPointers[ recv_rowPointers.getSize() - 1 ] );

		for( int i = 0; i < nproc; i++ ) {
		if( recv_rankOffsets[ i + 1 ] == recv_rankOffsets[ i ] )
		continue;
		// issue async send operation
		requests.push_back( CommunicatorType::IRecv(
		recv_columnIndices.getData() + recv_rowPointers[ recv_rankOffsets[ i ] ],
		recv_rowPointers[ recv_rankOffsets[ i + 1 ] ] - recv_rowPointers[ recv_rankOffsets[ i ] ],
		i, 0, group ) );
		}
		}

		// wait for all communications to finish
		CommunicatorType::WaitAll( requests.data(), requests.size() );

		return std::make_tuple( recv_rankOffsets, recv_rowPointers, recv_columnIndices );
		}

		// public const accessors for the communication pattern matrix and index arrays which were
		// created in the `initialize` method
		const auto& getGhostEntitiesCounts() const
		{
		return ghostEntitiesCounts;
		}

		const auto& getGhostOffsets() const
		{
		return ghostOffsets;
		}

		const auto& getGhostNeighborOffsets() const
		{
		return ghostNeighborOffsets;
		}

		const auto& getGhostNeighbors() const
		{
		return ghostNeighbors;
		}

		protected:
		// GOTCHA (see above)
		int gpu_id = 0;

src/TNL/Meshes/DistributedMeshes/distributeSubentities.h

0 → 100644

+195 −0

Original line number	Diff line number	Diff line
		/***************************************************************************
		distributeSubentities.h - description
		-------------------
		begin : July 13, 2020
		copyright : (C) 2020 by Tomas Oberhuber et al.
		email : tomas.oberhuber@fjfi.cvut.cz
		***************************************************************************/

		/* See Copyright Notice in tnl/Copyright */

		// Implemented by: Jakub Klinkovský

		#pragma once

		#include <numeric> // std::iota

		#include <TNL/Meshes/DistributedMeshes/DistributedMeshSynchronizer.h>
		#include <TNL/Meshes/MeshDetails/layers/EntityTags/Traits.h>

		#include <TNL/Meshes/Geometry/getEntityCenter.h>

		namespace TNL {
		namespace Meshes {
		namespace DistributedMeshes {

		template< int Dimension, typename DistributedMesh >
		void
		distributeSubentities( DistributedMesh& mesh )
		{
		using DeviceType = typename DistributedMesh::DeviceType;
		using GlobalIndexType = typename DistributedMesh::GlobalIndexType;
		using LocalIndexType = typename DistributedMesh::LocalIndexType;
		using LocalMesh = typename DistributedMesh::MeshType;
		using CommunicatorType = typename DistributedMesh::CommunicatorType;

		static_assert( ! std::is_same< DeviceType, Devices::Cuda >::value,
		"this method can be called only for host meshes" );
		static_assert( 0 < Dimension && Dimension < DistributedMesh::getMeshDimension(),
		"Vertices and cells cannot be distributed using this method." );
		if( mesh.getGhostLevels() <= 0 )
		throw std::logic_error( "There are no ghost levels on the distributed mesh." );

		const int rank = CommunicatorType::GetRank( mesh.getCommunicationGroup() );
		const int nproc = CommunicatorType::GetSize( mesh.getCommunicationGroup() );

		// exchange the global vertex index offsets so that each rank can determine the
		// owner of every vertex by its global index
		const GlobalIndexType ownVertexStart = mesh.template getGlobalIndices< 0 >().getElement( 0 );
		Containers::Array< GlobalIndexType, Devices::Host, int > vertexOffsets( nproc );
		{
		Containers::Array< GlobalIndexType, Devices::Host, int > sendbuf( nproc );
		sendbuf.setValue( ownVertexStart );
		CommunicatorType::Alltoall( sendbuf.getData(), 1,
		vertexOffsets.getData(), 1,
		mesh.getCommunicationGroup() );
		}

		auto getVertexOwner = [&] ( GlobalIndexType local_idx ) -> int
		{
		const GlobalIndexType global_idx = mesh.template getGlobalIndices< 0 >()[ local_idx ];
		for( int i = 0; i < nproc - 1; i++ )
		if( vertexOffsets[ i ] <= global_idx && global_idx < vertexOffsets[ i + 1 ] )
		return i;
		return nproc - 1;
		};

		auto getEntityOwner = [&] ( GlobalIndexType local_idx ) -> int
		{
		auto entity = mesh.getLocalMesh().template getEntity< Dimension >( local_idx );
		int owner = 0;
		for( LocalIndexType v = 0; v < entity.template getSubentitiesCount< 0 >(); v++ ) {
		const GlobalIndexType gv = entity.template getSubentityIndex< 0 >( v );
		owner = TNL::max( owner, getVertexOwner( gv ) );
		}
		return owner;
		};

		// 1. identify local entities, set the GhostEntity tag on others
		LocalMesh& localMesh = mesh.getLocalMesh();
		localMesh.template forAll< Dimension >( [&] ( GlobalIndexType i ) mutable {
		if( getEntityOwner( i ) != rank )
		localMesh.template addEntityTag< Dimension >( i, EntityTags::GhostEntity );
		});

		// 2. reorder the entities to make sure that all ghost entities are after local entities
		// TODO: it would be nice if the mesh initializer could do this
		{
		// count local entities
		GlobalIndexType localEntitiesCount = 0;
		for( GlobalIndexType i = 0; i < localMesh.template getEntitiesCount< Dimension >(); i++ )
		if( ! localMesh.template isGhostEntity< Dimension >( i ) )
		++localEntitiesCount;

		// create the permutation
		typename LocalMesh::GlobalIndexArray perm, iperm;
		perm.setSize( localMesh.template getEntitiesCount< Dimension >() );
		iperm.setSize( localMesh.template getEntitiesCount< Dimension >() );
		GlobalIndexType localsCount = 0;
		GlobalIndexType ghostsCount = 0;
		for( GlobalIndexType j = 0; j < iperm.getSize(); j++ ) {
		if( localMesh.template isGhostEntity< Dimension >( j ) ) {
		iperm[ j ] = localEntitiesCount + ghostsCount;
		perm[ localEntitiesCount + ghostsCount ] = j;
		++ghostsCount;
		}
		else {
		iperm[ j ] = localsCount;
		perm[ localsCount ] = j;
		++localsCount;
		}
		}

		// reorder the local mesh
		localMesh.template reorderEntities< Dimension >( perm, iperm );
		}

		// 3. update entity tags layer (this is actually done as part of the mesh reordering)
		//localMesh.template updateEntityTagsLayer< Dimension >();

		// 4. exchange the counts of local entities between ranks, compute offsets for global indices
		Containers::Vector< GlobalIndexType, Devices::Host, int > globalOffsets( nproc );
		{
		Containers::Array< GlobalIndexType, Devices::Host, int > sendbuf( nproc );
		sendbuf.setValue( localMesh.template getGhostEntitiesOffset< Dimension >() );
		CommunicatorType::Alltoall( sendbuf.getData(), 1,
		globalOffsets.getData(), 1,
		mesh.getCommunicationGroup() );
		}
		globalOffsets.template scan< Algorithms::ScanType::Exclusive >();

		// 5. assign global indices to the local entities
		mesh.template getGlobalIndices< Dimension >().setSize( localMesh.template getEntitiesCount< Dimension >() );
		localMesh.template forLocal< Dimension >( [&] ( GlobalIndexType i ) mutable {
		mesh.template getGlobalIndices< Dimension >()[ i ] = globalOffsets[ rank ] + i;
		});

		// 6. exchange cell data to prepare the communication pattern
		DistributedMeshSynchronizer< DistributedMesh > synchronizer;
		synchronizer.initialize( mesh );

		// 7. exchange local indices for ghost entities
		const auto sparseResult = synchronizer.synchronizeSparse( localMesh.template getSubentitiesMatrix< DistributedMesh::getMeshDimension(), Dimension >() );
		const auto& rankOffsets = std::get< 0 >( sparseResult );
		const auto& rowPointers = std::get< 1 >( sparseResult );
		const auto& columnIndices = std::get< 2 >( sparseResult );

		// 8. set the global indices of our ghost entities
		for( int i = 0; i < nproc; i++ ) {
		if( i == rank )
		continue;
		for( GlobalIndexType cell = synchronizer.getGhostOffsets()[ i ]; cell < synchronizer.getGhostOffsets()[ i + 1 ]; cell++ ) {
		for( LocalIndexType e = 0; e < mesh.getLocalMesh().template getSubentitiesCount< DistributedMesh::getMeshDimension(), Dimension >( cell ); e++ ) {
		const GlobalIndexType entityIndex = mesh.getLocalMesh().template getSubentityIndex< DistributedMesh::getMeshDimension(), Dimension >( cell, e );
		const int owner = getEntityOwner( entityIndex );
		// pick the right owner as we might have received an index from multiple ranks
		if( owner == i ) {
		const GlobalIndexType ghostOffset = cell - synchronizer.getGhostOffsets()[ owner ];
		// global index = owner's local index + owner's offset
		const GlobalIndexType globalEntityIndex = columnIndices[ rowPointers[ rankOffsets[ owner ] + ghostOffset ] + e ] + globalOffsets[ owner ];
		mesh.template getGlobalIndices< Dimension >()[ entityIndex ] = globalEntityIndex;
		}
		}
		}
		}

		// 9. reorder the entities to make sure that global indices are sorted
		{
		// prepare vector with an identity permutation
		std::vector< GlobalIndexType > permutation( localMesh.template getEntitiesCount< Dimension >() );
		std::iota( permutation.begin(), permutation.end(), (GlobalIndexType) 0 );

		// sort the subarray corresponding to ghost entities by the global index
		std::stable_sort( permutation.begin() + mesh.getLocalMesh().template getGhostEntitiesOffset< Dimension >(),
		permutation.end(),
		[&mesh](auto& left, auto& right) {
		return mesh.template getGlobalIndices< Dimension >()[ left ] < mesh.template getGlobalIndices< Dimension >()[ right ];
		});

		// copy the permutation into TNL array and invert
		typename LocalMesh::GlobalIndexArray perm, iperm;
		perm.setSize( localMesh.template getEntitiesCount< Dimension >() );
		iperm.setSize( localMesh.template getEntitiesCount< Dimension >() );
		for( GlobalIndexType i = 0; i < localMesh.template getEntitiesCount< Dimension >(); i++ ) {
		perm[ i ] = permutation[ i ];
		iperm[ perm[ i ] ] = i;
		}

		// reorder the mesh
		mesh.template reorderEntities< Dimension >( perm, iperm );
		}
		}

		} // namespace DistributedMeshes
		} // namespace Meshes
		} // namespace TNL

src/TNL/Meshes/Mesh.h

+0 −2

Original line number	Diff line number	Diff line
		@@ -261,8 +261,6 @@ class Mesh
		// Methods for the mesh initializer
		using StorageBaseType::getPoints;
		using StorageBaseType::setEntitiesCount;
		using StorageBaseType::getSubentitiesMatrix;
		using StorageBaseType::getSuperentitiesMatrix;

		friend Initializer< MeshConfig >;

src/UnitTests/Meshes/DistributedMeshes/DistributedMeshTest.h

+14 −0

Original line number	Diff line number	Diff line
		@@ -14,6 +14,7 @@
		#include <TNL/Meshes/DefaultConfig.h>
		#include <TNL/Meshes/VTKTraits.h>
		#include <TNL/Meshes/DistributedMeshes/DistributedMesh.h>
		#include <TNL/Meshes/DistributedMeshes/distributeSubentities.h>
		#include <TNL/Meshes/DistributedMeshes/DistributedMeshSynchronizer.h>
		#include <TNL/Communicators/MpiCommunicator.h>
		#include <TNL/Communicators/MPIPrint.h>
		@@ -263,6 +264,11 @@ struct GridDistributor< TNL::Meshes::Grid< 2, Real, Device, Index > >

		// set the communication group
		mesh.setCommunicationGroup( group );

		if( overlap > 0 ) {
		// distribute faces
		distributeSubentities< 1 >( mesh );
		}
		}

		static std::map< Index, Index > renumberVertices( const GridType& grid, CoordinatesType rank_sizes )
		@@ -607,9 +613,13 @@ void testSynchronizer( const Mesh& mesh )
		{
		testSynchronizerOnDevice< Devices::Host, typename Mesh::Cell >( mesh );
		testSynchronizerOnDevice< Devices::Host, typename Mesh::Vertex >( mesh );
		if( mesh.template getGlobalIndices< 1 >().getSize() > 0 )
		testSynchronizerOnDevice< Devices::Host, typename Mesh::Face >( mesh );
		#ifdef HAVE_CUDA
		testSynchronizerOnDevice< Devices::Cuda, typename Mesh::Cell >( mesh );
		testSynchronizerOnDevice< Devices::Cuda, typename Mesh::Vertex >( mesh );
		if( mesh.template getGlobalIndices< 1 >().getSize() > 0 )
		testSynchronizerOnDevice< Devices::Cuda, typename Mesh::Face >( mesh );
		#endif
		}

		@@ -721,6 +731,10 @@ TEST( DistributedMeshTest, PVTUWriterReader )
		EXPECT_EQ( reader.getMeshType(), "Meshes::DistributedMesh" );
		Mesh loadedMesh;
		reader.loadMesh( loadedMesh );
		// decomposition of faces is not stored in the VTK files
		if( mesh.getGhostLevels() > 0 ) {
		distributeSubentities< 1 >( loadedMesh );
		}
		EXPECT_EQ( loadedMesh, mesh );

		// cleanup