Commit f2b7e727 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Implementing CUDA benchmark.

parent 506ca03e
Loading
Loading
Loading
Loading
+84 −18
Original line number Diff line number Diff line
@@ -153,6 +153,59 @@ makeSnapshot( const RealType& time,
   return true;
}

#ifdef HAVE_CUDA

template< typename Real, typename Index >
__global__ void boundaryConditionsKernel( Real* u,
                                          Real* fu,
                                          const Index gridXSize, const Index gridYSize )
{
   const Index i = ( blockIdx.x ) * blockDim.x + threadIdx.x;
   const Index j = ( blockIdx.y ) * blockDim.y + threadIdx.y;
   if( i == 0 && j < gridYSize )
   {
      fu[ j * gridXSize ] = 0.0;
      u[ j * gridXSize ] = 0.0; //u[ j * gridXSize + 1 ];
   }
   if( i == gridXSize - 1 && j < gridYSize )
   {
      fu[ j * gridXSize + gridYSize - 1 ] = 0.0;
      u[ j * gridXSize + gridYSize - 1 ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ];      
   }
   if( j == 0 && i > 0 && i < gridXSize - 1 )
   {
      fu[ i ] = 0.0; //u[ j * gridXSize + 1 ];
      u[ i ] = 0.0; //u[ j * gridXSize + 1 ];
   }
   if( j == gridYSize -1  && i > 0 && i < gridXSize - 1 )
   {
      fu[ j * gridXSize + i ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ];      
      u[ j * gridXSize + i ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ];      
   }         
}


template< typename Real, typename Index >
__global__ void heatEquationKernel( const Real* u, 
                                    Real* fu,
                                    const Real tau,
                                    const Real hx_inv,
                                    const Real hy_inv,
                                    const Index gridXSize,
                                    const Index gridYSize )
{
   const Index i = blockIdx.x * blockDim.x + threadIdx.x;
   const Index j = blockIdx.y * blockDim.y + threadIdx.y;
   if( i > 0 && i < gridXSize - 1 &&
       j > 0 && j < gridYSize - 1 )
   {
      const Index c = j * gridXSize + i;
      fu[ c ] = ( u[ c - 1 ] - 2.0 * u[ c ] + u[ c + 1 ] ) * hx_inv +
                ( u[ c - gridXSize ] - 2.0 * u[ c ] + u[ c + gridXSize ] ) * hy_inv;
   }
}
#endif

template< typename Mesh,
          typename BoundaryCondition,
          typename RightHandSide,
@@ -200,24 +253,37 @@ getExplicitRHS( const RealType& time,
                              ( u[ c - gridXSize ] - 2.0 * u[ c ] + u[ c + gridXSize ] ) * hy_inv );
         }
   }
   if( std::is_same< DeviceType, tnlCuda >::value )
   {
      const IndexType gridXSize = mesh.getDimensions().x();
      const IndexType gridYSize = mesh.getDimensions().y();
      const RealType& hx_inv = mesh.template getSpaceStepsProducts< -2,  0 >();
      const RealType& hy_inv = mesh.template getSpaceStepsProducts<  0, -2 >();
      
      dim3 cudaBlockSize( 16, 16 );
      dim3 cudaGridSize( gridXSize / 16 + ( gridXSize % 16 != 0 ),
                         gridYSize / 16 + ( gridYSize % 16 != 0 ) );
      
   /*this->bindDofs( mesh, _u );
   tnlExplicitUpdater< Mesh, MeshFunctionType, DifferentialOperator, BoundaryCondition, RightHandSide > explicitUpdater;
   MeshFunctionType u( mesh, _u ); 
   MeshFunctionType fu( mesh, _fu ); 
   explicitUpdater.template update< typename Mesh::Cell >( time,
                                                           mesh,
                                                           this->differentialOperator,
                                                           this->boundaryCondition,
                                                           this->rightHandSide,
                                                           u,
                                                           fu );
   tnlBoundaryConditionsSetter< MeshFunctionType, BoundaryCondition > boundaryConditionsSetter; 
   boundaryConditionsSetter.template apply< typename Mesh::Cell >( 
      this->boundaryCondition, 
      time + tau, 
       u ); */
      int cudaErr;
      boundaryConditionsKernel<<< cudaGridSize, cudaBlockSize >>>( u.getData(), fu.getData(), gridXSize, gridYSize );
      if( ( cudaErr = cudaGetLastError() ) != cudaSuccess )
      {
         cerr << "Setting of boundary conditions failed. " << cudaErr << endl;
         return;
      }

      /****
       * Laplace operator
       */
      //cout << "Laplace operator ... " << endl;
      heatEquationKernel<<< cudaGridSize, cudaBlockSize >>>
         ( u.getData(), fu.getData(), tau, hx_inv, hy_inv, gridXSize, gridYSize );
      if( cudaGetLastError() != cudaSuccess )
      {
         cerr << "Laplace operator failed." << endl;
         return;
      }
   }
}

template< typename Mesh,
+16 −5
Original line number Diff line number Diff line
@@ -36,21 +36,32 @@ struct Data
#ifdef HAVE_CUDA

template< typename Real, typename Index >
__global__ void boundaryConditionsKernel( const Real* u, Real* aux,
__global__ void boundaryConditionsKernel( Real* u,
                                          Real* aux,
                                          const Index gridXSize, const Index gridYSize )
{
   const Index i = ( blockIdx.x ) * blockDim.x + threadIdx.x;
   const Index j = ( blockIdx.y ) * blockDim.y + threadIdx.y;
   if( i == 0 && j < gridYSize )
      aux[ j * gridXSize ] = 0.0; //u[ j * gridXSize + 1 ];
   {
      aux[ j * gridXSize ] = 0.0;
      u[ j * gridXSize ] = 0.0; //u[ j * gridXSize + 1 ];
   }
   if( i == gridXSize - 1 && j < gridYSize )
      aux[ j * gridXSize + gridYSize - 1 ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ];      
   {
      aux[ j * gridXSize + gridYSize - 1 ] = 0.0;
      u[ j * gridXSize + gridYSize - 1 ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ];      
   }
   if( j == 0 && i > 0 && i < gridXSize - 1 )
   {
      aux[ i ] = 0.0; //u[ j * gridXSize + 1 ];
      u[ i ] = 0.0; //u[ j * gridXSize + 1 ];
   }
   if( j == gridYSize -1  && i > 0 && i < gridXSize - 1 )
   {
      aux[ j * gridXSize + i ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ];      
   
    
      u[ j * gridXSize + i ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ];      
   }         
}


+16 −11
Original line number Diff line number Diff line
@@ -222,7 +222,9 @@ bool writeFunction(
   const Index xSize,
   const Index ySize,
   const Real& hx,
   const Real& hy )
   const Real& hy,
   const Real& originX,
   const Real& originY )
{
   fstream file;
   file.open( fileName, ios::out );
@@ -234,7 +236,7 @@ bool writeFunction(
   for( Index i = 0; i < xSize; i++ )
   {
      for( Index j = 0; j < ySize; j++ )
         file << i * hx << " " << j * hy << " " << data[ j * xSize + i ] << endl;
         file << i * hx - originX << " " << j * hy - originY << " " << data[ j * xSize + i ] << endl;
      file << endl;
   }
}
@@ -308,8 +310,11 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters,
      cerr << "Allocation failed. " << cudaErr << endl;
      return false;
   }
   //cudaMemcpy( aux, cuda_u, dofsCount * sizeof( Real ),  cudaMemcpyDeviceToHost );
   //writeFunction( "initial", aux, gridXSize, gridYSize, hx, hy );
   tnlVector< Real, tnlCuda, Index > vecAux;
   vecAux.bind( cuda_aux, gridXSize * gridYSize );
   vecAux.setValue( 0.0 );   
   cudaMemcpy( aux, cuda_u, dofsCount * sizeof( Real ),  cudaMemcpyDeviceToHost );
   writeFunction( "initial", aux, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 );
   
   /****
    * Explicit Euler solver
@@ -340,9 +345,9 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters,
         return false;
      computationTimer.stop();
      
      //cudaMemcpy( aux, cuda_aux, dofsCount * sizeof( Real ),  cudaMemcpyDeviceToHost );
      //writeFunction( "rhs", aux, gridXSize, gridYSize, hx, hy );
      //getchar();
      /*cudaMemcpy( aux, cuda_aux, dofsCount * sizeof( Real ),  cudaMemcpyDeviceToHost );
      writeFunction( "rhs", aux, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 );
      getchar();*/
        
      updateTimer.start();
      /****
@@ -355,9 +360,9 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters,
         cerr << "Update failed." << endl;
         return false;
      }
      //cudaMemcpy( aux, cuda_u, dofsCount * sizeof( Real ),  cudaMemcpyDeviceToHost );
      //writeFunction( "u", aux, gridXSize, gridYSize, hx, hy );
      //getchar();
      /*cudaMemcpy( aux, cuda_u, dofsCount * sizeof( Real ),  cudaMemcpyDeviceToHost );
      writeFunction( "u", aux, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 );
      getchar();*/
      
      
      cudaThreadSynchronize();
@@ -382,7 +387,7 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters,
   }
   timer.stop();
   cudaMemcpy( u, cuda_u, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost );
   writeFunction( "final", u, gridXSize, gridYSize, hx, hy );
   writeFunction( "final", u, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 );

   /****
    * Saving the result