updateBoundaryIndices for CUDA (d2468f3d) · Commits · TNL / tnl-dev

src/TNL/Meshes/MeshDetails/layers/MeshBoundaryTagsLayer.h

+63 −21

Original line number	Diff line number	Diff line
		@@ -93,6 +93,7 @@ public:

		bool updateBoundaryIndices( DimensionTag )
		{
		if( std::is_same< Device, Devices::Host >::value ) {
		// we can't just sum an array/vector of bools, because the result would also be bool
		// TODO: perhaps Containers::Vector::sum should provide a template parameter to force the result type
		// (by default it would be the RealType of the vector)
		@@ -105,7 +106,6 @@ public:
		! interiorIndices.setSize( boundaryTags.getSize() - boundaryEntities ) )
		return false;

		// TODO: parallelize, even on CUDA
		GlobalIndexType b = 0;
		GlobalIndexType i = 0;
		while( b + i < boundaryTags.getSize() ) {
		@@ -115,6 +115,48 @@ public:
		else
		interiorIndices[ i++ ] = e;
		}
		}
		// TODO: parallelize directly on the device
		else {
		using BoundaryTagsHostArray = typename BoundaryTagsArray::HostType;
		using OrderingHostArray = typename OrderingArray::HostType;

		BoundaryTagsHostArray hostBoundaryTags;
		OrderingHostArray hostBoundaryIndices;
		OrderingHostArray hostInteriorIndices;

		if( ! hostBoundaryTags.setLike( boundaryTags ) )
		return false;
		hostBoundaryTags = boundaryTags;

		// we can't just sum an array/vector of bools, because the result would also be bool
		// TODO: perhaps Containers::Vector::sum should provide a template parameter to force the result type
		// (by default it would be the RealType of the vector)
		GlobalIndexType boundaryEntities = 0;
		for( GlobalIndexType i = 0; i < boundaryTags.getSize(); i++ )
		if( hostBoundaryTags[ i ] )
		boundaryEntities++;

		if( ! hostBoundaryIndices.setSize( boundaryEntities ) \|\|
		! hostInteriorIndices.setSize( boundaryTags.getSize() - boundaryEntities ) )
		return false;

		GlobalIndexType b = 0;
		GlobalIndexType i = 0;
		while( b + i < boundaryTags.getSize() ) {
		const GlobalIndexType e = b + i;
		if( hostBoundaryTags[ e ] )
		hostBoundaryIndices[ b++ ] = e;
		else
		hostInteriorIndices[ i++ ] = e;
		}

		if( ! boundaryIndices.setLike( hostBoundaryIndices ) \|\|
		! interiorIndices.setLike( hostInteriorIndices ) )
		return false;
		boundaryIndices = hostBoundaryIndices;
		interiorIndices = hostInteriorIndices;
		}

		return true;
		}

src/UnitTests/Meshes/MeshTest.h

+16 −3

Original line number	Diff line number	Diff line
		@@ -81,24 +81,37 @@ void testCopyAssignment( const Object& obj )
		}

		template< typename Mesh >
		void testCopyToCuda( const Mesh& mesh )
		void testMeshOnCuda( const Mesh& mesh )
		{
		#ifdef HAVE_CUDA
		using DeviceMesh = Meshes::Mesh< typename Mesh::Config, Devices::Cuda >;

		// test host->CUDA copy
		DeviceMesh dmesh1( mesh );
		EXPECT_EQ( dmesh1, mesh );
		DeviceMesh dmesh2;
		dmesh2 = mesh;
		EXPECT_EQ( dmesh2, mesh );

		// test CUDA->CUDA copy
		testCopyAssignment( dmesh1 );

		// copy back to host
		// copy CUDA->host copy
		Mesh mesh2( dmesh1 );
		EXPECT_EQ( mesh2, mesh );
		Mesh mesh3;
		mesh3 = dmesh1;
		EXPECT_EQ( mesh2, mesh );

		// test load from file to CUDA
		ASSERT_TRUE( mesh.save( "mesh.tnl" ) );
		ASSERT_TRUE( dmesh1.load( "mesh.tnl" ) );
		EXPECT_EQ( dmesh1, mesh );

		// test save into file from CUDA
		ASSERT_TRUE( dmesh1.save( "mesh.tnl" ) );
		ASSERT_TRUE( mesh2.load( "mesh.tnl" ) );
		EXPECT_EQ( mesh2, mesh );
		#endif
		}

		@@ -111,7 +124,7 @@ void testFinishedMesh( const Mesh& mesh )
		ASSERT_EQ( mesh, mesh2 );
		compareStringRepresentation( mesh, mesh2 );
		testCopyAssignment( mesh );
		testCopyToCuda( mesh );
		testMeshOnCuda( mesh );
		}

		TEST( MeshTest, TwoTrianglesTest )