Commit d2468f3d authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

updateBoundaryIndices for CUDA

parent 38d4bde9
Loading
Loading
Loading
Loading
+63 −21
Original line number Diff line number Diff line
@@ -93,6 +93,7 @@ public:

   bool updateBoundaryIndices( DimensionTag )
   {
      if( std::is_same< Device, Devices::Host >::value ) {
         // we can't just sum an array/vector of bools, because the result would also be bool
         // TODO: perhaps Containers::Vector::sum should provide a template parameter to force the result type
         // (by default it would be the RealType of the vector)
@@ -105,7 +106,6 @@ public:
             ! interiorIndices.setSize( boundaryTags.getSize() - boundaryEntities ) )
            return false;

      // TODO: parallelize, even on CUDA
         GlobalIndexType b = 0;
         GlobalIndexType i = 0;
         while( b + i < boundaryTags.getSize() ) {
@@ -115,6 +115,48 @@ public:
            else
               interiorIndices[ i++ ] = e;
         }
      }
      // TODO: parallelize directly on the device
      else {
         using BoundaryTagsHostArray = typename BoundaryTagsArray::HostType;
         using OrderingHostArray     = typename OrderingArray::HostType;

         BoundaryTagsHostArray hostBoundaryTags;
         OrderingHostArray hostBoundaryIndices;
         OrderingHostArray hostInteriorIndices;

         if( ! hostBoundaryTags.setLike( boundaryTags ) )
            return false;
         hostBoundaryTags = boundaryTags;

         // we can't just sum an array/vector of bools, because the result would also be bool
         // TODO: perhaps Containers::Vector::sum should provide a template parameter to force the result type
         // (by default it would be the RealType of the vector)
         GlobalIndexType boundaryEntities = 0;
         for( GlobalIndexType i = 0; i < boundaryTags.getSize(); i++ )
            if( hostBoundaryTags[ i ] )
               boundaryEntities++;

         if( ! hostBoundaryIndices.setSize( boundaryEntities ) ||
             ! hostInteriorIndices.setSize( boundaryTags.getSize() - boundaryEntities ) )
            return false;

         GlobalIndexType b = 0;
         GlobalIndexType i = 0;
         while( b + i < boundaryTags.getSize() ) {
            const GlobalIndexType e = b + i;
            if( hostBoundaryTags[ e ] )
               hostBoundaryIndices[ b++ ] = e;
            else
               hostInteriorIndices[ i++ ] = e;
         }

         if( ! boundaryIndices.setLike( hostBoundaryIndices ) ||
             ! interiorIndices.setLike( hostInteriorIndices ) )
            return false;
         boundaryIndices = hostBoundaryIndices;
         interiorIndices = hostInteriorIndices;
      }

      return true;
   }
+16 −3
Original line number Diff line number Diff line
@@ -81,24 +81,37 @@ void testCopyAssignment( const Object& obj )
}

template< typename Mesh >
void testCopyToCuda( const Mesh& mesh )
void testMeshOnCuda( const Mesh& mesh )
{
#ifdef HAVE_CUDA
   using DeviceMesh = Meshes::Mesh< typename Mesh::Config, Devices::Cuda >;

   // test host->CUDA copy
   DeviceMesh dmesh1( mesh );
   EXPECT_EQ( dmesh1, mesh );
   DeviceMesh dmesh2;
   dmesh2 = mesh;
   EXPECT_EQ( dmesh2, mesh );

   // test CUDA->CUDA copy
   testCopyAssignment( dmesh1 );

   // copy back to host
   // copy CUDA->host copy
   Mesh mesh2( dmesh1 );
   EXPECT_EQ( mesh2, mesh );
   Mesh mesh3;
   mesh3 = dmesh1;
   EXPECT_EQ( mesh2, mesh );

   // test load from file to CUDA
   ASSERT_TRUE( mesh.save( "mesh.tnl" ) );
   ASSERT_TRUE( dmesh1.load( "mesh.tnl" ) );
   EXPECT_EQ( dmesh1, mesh );

   // test save into file from CUDA
   ASSERT_TRUE( dmesh1.save( "mesh.tnl" ) );
   ASSERT_TRUE( mesh2.load( "mesh.tnl" ) );
   EXPECT_EQ( mesh2, mesh );
#endif
}

@@ -111,7 +124,7 @@ void testFinishedMesh( const Mesh& mesh )
   ASSERT_EQ( mesh, mesh2 );
   compareStringRepresentation( mesh, mesh2 );
   testCopyAssignment( mesh );
   testCopyToCuda( mesh );
   testMeshOnCuda( mesh );
}

TEST( MeshTest, TwoTrianglesTest )