Commit 30c87144 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Testing the explicit solver performance in CUDA.

parent fa0a8417
Loading
Loading
Loading
Loading
+35 −27
Original line number Diff line number Diff line
@@ -289,32 +289,32 @@ template< typename Real,
          bool processOnlyBoundaryEntities >
__global__ void 
tnlGridTraverser2D(
   const tnlGrid< 2, Real, tnlCuda, Index >* grid,
   UserData* userData,
   const typename GridEntity::CoordinatesType* begin,
   const typename GridEntity::CoordinatesType* end,
   const typename GridEntity::CoordinatesType* entityOrientation,
   const typename GridEntity::CoordinatesType* entityBasis,   
   const tnlGrid< 2, Real, tnlCuda, Index > grid,
   UserData userData,
   const typename GridEntity::CoordinatesType begin,
   const typename GridEntity::CoordinatesType end,
   const typename GridEntity::CoordinatesType entityOrientation,
   const typename GridEntity::CoordinatesType entityBasis,   
   const Index gridXIdx,
   const Index gridYIdx )
{
   typedef tnlGrid< 2, Real, tnlCuda, Index > GridType;
   typename GridType::CoordinatesType coordinates;

   coordinates.x() = begin->x() + ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
   coordinates.y() = begin->y() + ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;  
   coordinates.x() = begin.x() + ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
   coordinates.y() = begin.y() + ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;  
   
   GridEntity entity( *grid, coordinates, *entityOrientation, *entityBasis );
   GridEntity entity( grid, coordinates, entityOrientation, entityBasis );

   if( entity.getCoordinates().x() <= end->x() &&
       entity.getCoordinates().y() <= end->y() )
   if( entity.getCoordinates().x() <= end.x() &&
       entity.getCoordinates().y() <= end.y() )
   {
      entity.refresh();
      if( ! processOnlyBoundaryEntities || entity.isBoundaryEntity() )
      {         
         EntitiesProcessor::processEntity
         ( *grid,
           *userData,
         ( grid,
           userData,
           entity );
      }
   }
@@ -341,12 +341,19 @@ processEntities(
   UserData& userData )
{
#ifdef HAVE_CUDA   
   CoordinatesType* kernelBegin = tnlCuda::passToDevice( begin );
   /*typedef tnlStaticArray< 4, CoordinatesType > Coords;
   Coords c;
   c[ 0 ] = begin;
   c[ 1 ] = end;
   c[ 2 ] = entityOrientation;
   c[ 3 ] = entityBasis;
   Coords* kernelC = tnlCuda::passToDevice( c );*/
   /*CoordinatesType* kernelBegin = tnlCuda::passToDevice( begin );
   CoordinatesType* kernelEnd = tnlCuda::passToDevice( end );
   CoordinatesType* kernelEntityOrientation = tnlCuda::passToDevice( entityOrientation );
   CoordinatesType* kernelEntityBasis = tnlCuda::passToDevice( entityBasis );
   typename GridEntity::MeshType* kernelGrid = tnlCuda::passToDevice( grid );
   UserData* kernelUserData = tnlCuda::passToDevice( userData );
   CoordinatesType* kernelEntityBasis = tnlCuda::passToDevice( entityBasis );*/
   //typename GridEntity::MeshType* kernelGrid = tnlCuda::passToDevice( grid );
   //UserData* kernelUserData = tnlCuda::passToDevice( userData );
      
   dim3 cudaBlockSize( 16, 16 );
   dim3 cudaBlocks;
@@ -359,23 +366,24 @@ processEntities(
      for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
         tnlGridTraverser2D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities >
            <<< cudaBlocks, cudaBlockSize >>>
            ( kernelGrid,
              kernelUserData,
              kernelBegin,
              kernelEnd,
              kernelEntityOrientation,
              kernelEntityBasis,
            ( grid,
              userData,
              begin,
              end,
              entityOrientation,
              entityBasis,
              gridXIdx,
              gridYIdx );
      
   cudaThreadSynchronize();
   checkCudaDevice;   
   tnlCuda::freeFromDevice( kernelGrid );
   tnlCuda::freeFromDevice( kernelBegin );
   //tnlCuda::freeFromDevice( kernelGrid );
   //tnlCuda::freeFromDevice( kernelC );
   /*tnlCuda::freeFromDevice( kernelBegin );
   tnlCuda::freeFromDevice( kernelEnd );
   tnlCuda::freeFromDevice( kernelEntityOrientation );
   tnlCuda::freeFromDevice( kernelEntityBasis );
   tnlCuda::freeFromDevice( kernelUserData );
   tnlCuda::freeFromDevice( kernelEntityBasis );*/
   //tnlCuda::freeFromDevice( kernelUserData );
   checkCudaDevice;
#endif
}
+2 −2
Original line number Diff line number Diff line
@@ -230,11 +230,11 @@ getExplicitRHS( const RealType& time,
      this->rightHandSide,
      this->u,
      fu );
   tnlBoundaryConditionsSetter< MeshFunctionType, BoundaryCondition > boundaryConditionsSetter;
   /*tnlBoundaryConditionsSetter< MeshFunctionType, BoundaryCondition > boundaryConditionsSetter;
   boundaryConditionsSetter.template apply< typename Mesh::Cell >(
      this->boundaryCondition,
      time + tau,
      this->u );
      this->u );*/
   
   //fu.write( "fu.txt", "gnuplot" );
   //this->u.write( "u.txt", "gnuplot");
+70 −22
Original line number Diff line number Diff line
@@ -28,17 +28,24 @@ template< typename Real,
          typename RightHandSide >
class tnlExplicitUpdaterTraverserUserData
{
   public:

      const Real *time;
      /*const DifferentialOperator differentialOperator;

      const BoundaryConditions boundaryConditions;

      const RightHandSide rightHandSide;

      const DifferentialOperator* differentialOperator;
      MeshFunction u, fu;*/
      
      const BoundaryConditions* boundaryConditions;
      char data[ sizeof( DifferentialOperator ) + 
                 sizeof( BoundaryConditions ) + 
                 sizeof( RightHandSide ) +
                 2 * sizeof( MeshFunction ) ];

      public:

      const RightHandSide* rightHandSide;
         const Real time;         

      MeshFunction *u, *fu;

      tnlExplicitUpdaterTraverserUserData( const Real& time,
                                           const DifferentialOperator& differentialOperator,
@@ -46,14 +53,55 @@ class tnlExplicitUpdaterTraverserUserData
                                           const RightHandSide& rightHandSide,
                                           MeshFunction& u,
                                           MeshFunction& fu )
      : time( &time ),
        differentialOperator( &differentialOperator ),
        boundaryConditions( &boundaryConditions ),
        rightHandSide( &rightHandSide ),
        u( &u ),
        fu( &fu )
      : time( time )
        /*differentialOperator( differentialOperator ),
        boundaryConditions( boundaryConditions ),
        rightHandSide( rightHandSide ),
        u( u ),
        fu( fu )*/
      {
         char* ptr = data;
         memcpy( ptr, &differentialOperator, sizeof( DifferentialOperator ) );
         ptr +=  sizeof( DifferentialOperator );
         memcpy( ptr, &boundaryConditions, sizeof( BoundaryConditions ) );
         ptr += sizeof( BoundaryConditions );
         memcpy( ptr, &rightHandSide, sizeof( RightHandSide ) );
         ptr += sizeof( RightHandSide );
         memcpy( ptr, &u, sizeof( MeshFunction ) );
         ptr += sizeof( MeshFunction );
         memcpy( ptr, &fu, sizeof( MeshFunction ) );
      };
      
      DifferentialOperator& differentialOperator()
      {
         return * ( DifferentialOperator* ) data;
      }
      
      BoundaryConditions& boundaryConditions()
      {
         return * ( BoundaryConditions* ) & data[ sizeof( DifferentialOperator ) ];
      }
      
      RightHandSide& rightHandSide()
      {
         return * ( RightHandSide* ) & data[ sizeof( DifferentialOperator ) +
                                             sizeof( BoundaryConditions ) ];
      }
      
      MeshFunction& u()
      {
         return * ( MeshFunction* ) & data[ sizeof( DifferentialOperator ) +
                                            sizeof( BoundaryConditions ) + 
                                            sizeof( RightHandSide )];
      }
      
      MeshFunction& fu()
      {
         return * ( MeshFunction* ) & data[ sizeof( DifferentialOperator ) +
                                            sizeof( BoundaryConditions ) + 
                                            sizeof( RightHandSide ) + 
                                            sizeof( MeshFunction ) ];
      }
};


@@ -102,10 +150,10 @@ class tnlExplicitUpdater
                                              TraverserUserData& userData,
                                              const GridEntity& entity )
            {
               ( *userData.u )( entity ) = userData.boundaryConditions->operator()
               ( *userData.u,
               ( userData.u() )( entity ) = userData.boundaryConditions().operator()
               ( userData.u(),
                 entity,
                 *userData.time );
                 userData.time );
            }

      };
@@ -122,18 +170,18 @@ class tnlExplicitUpdater
                                              TraverserUserData& userData,
                                              const EntityType& entity )
            {
               ( *userData.fu)( entity ) = 
                  userData.differentialOperator->operator()(
                     *userData.u,
               ( userData.fu())( entity ) = 
                  userData.differentialOperator().operator()(
                     userData.u(),
                     entity,
                     *userData.time );
                     userData.time );

               typedef tnlFunctionAdapter< MeshType, RightHandSide > FunctionAdapter;
               ( * userData.fu )( entity ) += 
               (  userData.fu() )( entity ) += 
                  FunctionAdapter::getValue(
                     *userData.rightHandSide,
                     userData.rightHandSide(),
                     entity,
                     *userData.time );
                     userData.time );
            }
      };
      
+4 −4
Original line number Diff line number Diff line
@@ -46,7 +46,7 @@ update( const RealType& time,
                                           typename MeshFunction::DeviceType,
                                           typename MeshFunction::IndexType > >::value != true,
      "Error: I am getting tnlVector instead of tnlMeshFunction or similar object. You might forget to bind DofVector into tnlMeshFunction in you method getExplicitRHS."  );
   if( std::is_same< DeviceType, tnlHost >::value )
   //if( std::is_same< DeviceType, tnlHost >::value )
   {
      TraverserUserData userData( time, differentialOperator, boundaryConditions, rightHandSide, u, fu );
      tnlTraverser< MeshType, EntityType > meshTraverser;
@@ -60,7 +60,7 @@ update( const RealType& time,
                                                      userData );

   }
   if( std::is_same< DeviceType, tnlCuda >::value )
   /*if( std::is_same< DeviceType, tnlCuda >::value )
   {
      if( this->gpuTransferTimer ) 
         this->gpuTransferTimer->start();
@@ -73,7 +73,7 @@ update( const RealType& time,
     if( this->gpuTransferTimer ) 
         this->gpuTransferTimer->stop();

      TraverserUserData userData( *kernelTime, *kernelDifferentialOperator, *kernelBoundaryConditions, *kernelRightHandSide, *kernelU, *kernelFu );
      //TraverserUserData userData( *kernelTime, *kernelDifferentialOperator, *kernelBoundaryConditions, *kernelRightHandSide, *kernelU, *kernelFu );
      checkCudaDevice;
      tnlTraverser< MeshType, EntityType > meshTraverser;
      meshTraverser.template processBoundaryEntities< TraverserUserData,
@@ -100,7 +100,7 @@ update( const RealType& time,
      if( this->gpuTransferTimer ) 
         this->gpuTransferTimer->stop();

   }
   }*/
}

#endif /* TNLEXPLICITUPDATER_IMPL_H_ */
+7 −5
Original line number Diff line number Diff line
@@ -220,7 +220,9 @@ __global__ void heatEquationKernel( const Real* u,
                                    const Real hx_inv,
                                    const Real hy_inv,
                                    const Index gridXSize,
                                    const Index gridYSize )
                                    const Index gridYSize,
                                    Data d1,
                                    Data d2 )
{
   const Index i = blockIdx.x * blockDim.x + threadIdx.x;
   const Index j = blockIdx.y * blockDim.y + threadIdx.y;
@@ -379,8 +381,8 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters )
      typedef tnlGrid< 2, Real, tnlCuda, int > Grid;
      Grid g;
      Grid* kernelGrid = tnlCuda::passToDevice( g );*/
      Data d;
      Data* kernelD = tnlCuda::passToDevice( d );
      Data d, d2;
      //Data* kernelD = tnlCuda::passToDevice( d );

      /****
       * Neumann boundary conditions
@@ -398,7 +400,7 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters )
       */
      //cout << "Laplace operator ... " << endl;
      heatEquationKernel<<< cudaGridSize, cudaBlockSize >>>
         ( cuda_u, cuda_aux, tau, hx_inv, hy_inv, gridXSize, gridYSize );
         ( cuda_u, cuda_aux, tau, hx_inv, hy_inv, gridXSize, gridYSize, d, d2 );
      if( cudaGetLastError() != cudaSuccess )
      {
         cerr << "Laplace operator failed." << endl;
@@ -436,7 +438,7 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters )
      
      
      
      tnlCuda::freeFromDevice( kernelD );
      //tnlCuda::freeFromDevice( kernelD );
      /*tnlCuda::freeFromDevice( kernelTau );
      tnlCuda::freeFromDevice( kernelC1 );
      tnlCuda::freeFromDevice( kernelC2 );