Commit c1f16827 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Refactoring heat-equation benchmark.

parent 30c87144
Loading
Loading
Loading
Loading
+23 −23
Original line number Diff line number Diff line
@@ -318,29 +318,29 @@ endif( WITH_TESTS STREQUAL "yes" )
####
# Explicit template instantiation
#
if( WITH_TEMPLATE_INSTANTIATION STREQUAL "yes" )
   AddCompilerFlag( "-DTEMPLATE_EXPLICIT_INSTANTIATION " )

   if( INSTANTIATE_INT STREQUAL "yes" )
      AddCompilerFlag( "-DINSTANTIATE_INT " )
   endif()

   if( INSTANTIATE_LONG_INT STREQUAL "yes" )
      AddCompilerFlag( "-DINSTANTIATE_LONG_INT " )
   endif()

   if( INSTANTIATE_FLOAT STREQUAL "yes" )
      AddCompilerFlag( "-DINSTANTIATE_FLOAT " )
   endif()

   if( INSTANTIATE_DOUBLE STREQUAL "yes" )
      AddCompilerFlag( "-DINSTANTIATE_DOUBLE " )
   endif()

   if( INSTANTIATE_LONG_DOUBLE STREQUAL "yes" )
      AddCompilerFlag( "-DINSTANTIATE_LONG_DOUBLE " )
   endif()
endif()
#if( WITH_TEMPLATE_INSTANTIATION STREQUAL "yes" )
#   AddCompilerFlag( "-DTEMPLATE_EXPLICIT_INSTANTIATION " )
#
#   if( INSTANTIATE_INT STREQUAL "yes" )
#      AddCompilerFlag( "-DINSTANTIATE_INT " )
#   endif()
#
#   if( INSTANTIATE_LONG_INT STREQUAL "yes" )
#      AddCompilerFlag( "-DINSTANTIATE_LONG_INT " )
#   endif()
#
#   if( INSTANTIATE_FLOAT STREQUAL "yes" )
#      AddCompilerFlag( "-DINSTANTIATE_FLOAT " )
#   endif()
#
#   if( INSTANTIATE_DOUBLE STREQUAL "yes" )
#      AddCompilerFlag( "-DINSTANTIATE_DOUBLE " )
#   endif()
#
#   if( INSTANTIATE_LONG_DOUBLE STREQUAL "yes" )
#      AddCompilerFlag( "-DINSTANTIATE_LONG_DOUBLE " )
#   endif()
#endif()

set( CXX_TEST_FLAGS "-fprofile-arcs -ftest-coverage" )
set( LD_TEST_FLAGS "-lgcov -coverage" )
+1 −1
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@ WITH_TESTS="yes"

WITH_CUDA_ARCH="auto"
WITH_CUBLAS="no"
WITH_TEMPLATE_INSTANTIATION="yes"
WITH_TEMPLATE_INSTANTIATION="no"
INSTANTIATE_LONG_INT="no"
INSTANTIATE_INT="yes"
INSTANTIATE_LONG_DOUBLE="no"
+5 −13
Original line number Diff line number Diff line
@@ -13,31 +13,23 @@ IF( BUILD_CUDA )
    
    CUDA_ADD_EXECUTABLE( tnl-benchmark-linear-solvers${debugExt} tnl-benchmark-linear-solvers.cu )
    TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers${debugExt} tnl${debugExt}-${tnlVersion} ${CUSPARSE_LIBRARY} )                        

    CUDA_ADD_EXECUTABLE( tnl-benchmark-simple-heat-equation${debugExt} tnl-benchmark-simple-heat-equation.cu )
    TARGET_LINK_LIBRARIES( tnl-benchmark-simple-heat-equation${debugExt} tnl${debugExt}-${tnlVersion} )

ELSE()
    ADD_EXECUTABLE( tnl-benchmark-spmv${debugExt} tnl-benchmark-spmv.cpp )
    TARGET_LINK_LIBRARIES( tnl-benchmark-spmv${debugExt} tnl${debugExt}-${tnlVersion} )

    ADD_EXECUTABLE( tnl-benchmark-linear-solvers${debugExt} tnl-benchmark-linear-solvers.cpp )    
    TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers${debugExt} tnl${debugExt}-${tnlVersion} )

    ADD_EXECUTABLE( tnl-benchmark-simple-heat-equation${debugExt} tnl-benchmark-simple-heat-equation.cpp )    
    TARGET_LINK_LIBRARIES( tnl-benchmark-simple-heat-equation${debugExt} tnl${debugExt}-${tnlVersion} )
ENDIF()



if( BUILD_CUDA )                                                              
   INSTALL( TARGETS tnl-cuda-benchmarks${debugExt}
   INSTALL( TARGETS
                tnl-cuda-benchmarks${debugExt}
            RUNTIME DESTINATION bin )
endif()

INSTALL( TARGETS tnl-benchmark-spmv${debugExt}
INSTALL( TARGETS 
            tnl-benchmark-spmv${debugExt}
            tnl-benchmark-linear-solvers${debugExt}                 
                 tnl-benchmark-simple-heat-equation${debugExt}
         RUNTIME DESTINATION bin )


+9 −2
Original line number Diff line number Diff line
IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( tnl-benchmark-simple-heat-equation${debugExt} tnl-benchmark-simple-heat-equation.cu )
    TARGET_LINK_LIBRARIES( tnl-benchmark-simple-heat-equation${debugExt} tnl${debugExt}-${tnlVersion} )

    CUDA_ADD_EXECUTABLE( tnl-benchmark-heat-equation${debugExt} tnl-benchmark-heat-equation.cu )
    TARGET_LINK_LIBRARIES( tnl-benchmark-heat-equation${debugExt} tnl${debugExt}-${tnlVersion} )

ELSE()
    ADD_EXECUTABLE( tnl-benchmark-heat-equation${debugExt} tnl-benchmark-heat-equation.cpp )    
    TARGET_LINK_LIBRARIES( tnl-benchmark-heat-equation${debugExt} tnl${debugExt}-${tnlVersion} )

    ADD_EXECUTABLE( tnl-benchmark-simple-heat-equation${debugExt} tnl-benchmark-simple-heat-equation.cpp )    
    TARGET_LINK_LIBRARIES( tnl-benchmark-simple-heat-equation${debugExt} tnl${debugExt}-${tnlVersion} )
ENDIF()


INSTALL( TARGETS tnl-benchmark-heat-equation${debugExt}
INSTALL( TARGETS 
            tnl-benchmark-heat-equation${debugExt}
            tnl-benchmark-simple-heat-equation${debugExt}
         RUNTIME DESTINATION bin )


+139 −0
Original line number Diff line number Diff line
/***************************************************************************
                          pure-c-rhs.h  -  description
                             -------------------
    begin                : Apr 2, 2016
    copyright            : (C) 2016 by Tomas Oberhuber
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/

#ifndef PURE_C_RHS_H
#define	PURE_C_RHS_H

#include<cuda.h>

/****
 * Just testing data for measuring performance
 * with different ways of passing data to kernels.
 */
struct Data
{
   double time, tau;
   tnlStaticVector< 2, double > c1, c2, c3, c4;
   tnlGrid< 2, double > grid;
};

#ifdef HAVE_CUDA

template< typename Real, typename Index >
__global__ void boundaryConditionsKernel( const Real* u, Real* aux,
                                          const Index gridXSize, const Index gridYSize )
{
   const Index i = ( blockIdx.x ) * blockDim.x + threadIdx.x;
   const Index j = ( blockIdx.y ) * blockDim.y + threadIdx.y;
   if( i == 0 && j < gridYSize )
      aux[ j * gridXSize ] = 0.0; //u[ j * gridXSize + 1 ];
   if( i == gridXSize - 1 && j < gridYSize )
      aux[ j * gridXSize + gridXSize - 2 ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ];      
   if( j == 0 && i < gridXSize )
      aux[ j * gridXSize ] = 0.0; //u[ j * gridXSize + 1 ];
   if( j == gridYSize -1  && i < gridXSize )
      aux[ j * gridXSize + gridXSize - 2 ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ];      
    
}


template< typename Real, typename Index >
__global__ void heatEquationKernel( const Real* u, 
                                    Real* aux,
                                    const Real tau,
                                    const Real hx_inv,
                                    const Real hy_inv,
                                    const Index gridXSize,
                                    const Index gridYSize,
                                    Data d1,
                                    Data d2 )
{
   const Index i = blockIdx.x * blockDim.x + threadIdx.x;
   const Index j = blockIdx.y * blockDim.y + threadIdx.y;
   if( i > 0 && i < gridXSize - 1 &&
       j > 0 && j < gridYSize - 1 )
   {
      const Index c = j * gridXSize + i;
      aux[ c ] = tau * ( ( u[ c - 1 ] - 2.0 * u[ c ] + u[ c + 1 ] ) * hx_inv +
                       ( u[ c - gridXSize ] - 2.0 * u[ c ] + u[ c + gridXSize ] ) * hy_inv );
   }
}

template< typename RealType >
bool pureCRhsCuda( dim3 cudaGridSize,
                   dim3 cudaBlockSize,
                   RealType* cuda_u,
                   RealType* cuda_aux,
                   const RealType& tau,
                   const RealType& hx_inv,
                   const RealType& hy_inv,
                   int gridXSize,
                   int gridYSize )
{
   /*Real* kernelTime = tnlCuda::passToDevice( time );
   Real* kernelTau = tnlCuda::passToDevice( tau );
   typedef tnlStaticVector< 2, Real > Coordinates;
   Coordinates c;
   Coordinates* kernelC1 = tnlCuda::passToDevice( c );
   Coordinates* kernelC2 = tnlCuda::passToDevice( c );
   Coordinates* kernelC3 = tnlCuda::passToDevice( c );
   Coordinates* kernelC4 = tnlCuda::passToDevice( c );
   typedef tnlGrid< 2, Real, tnlCuda, int > Grid;
   Grid g;
   Grid* kernelGrid = tnlCuda::passToDevice( g );*/
   Data d, d2;
   //Data* kernelD = tnlCuda::passToDevice( d );

   int cudaErr;
   /****
    * Neumann boundary conditions
    */
   //cout << "Setting boundary conditions ... " << endl;
   boundaryConditionsKernel<<< cudaGridSize, cudaBlockSize >>>( cuda_u, cuda_aux, gridXSize, gridYSize );
   if( ( cudaErr = cudaGetLastError() ) != cudaSuccess )
   {
      cerr << "Setting of boundary conditions failed. " << cudaErr << endl;
      return false;
   }

   /****
    * Laplace operator
    */
   //cout << "Laplace operator ... " << endl;
   heatEquationKernel<<< cudaGridSize, cudaBlockSize >>>
      ( cuda_u, cuda_aux, tau, hx_inv, hy_inv, gridXSize, gridYSize, d, d2 );
   if( cudaGetLastError() != cudaSuccess )
   {
      cerr << "Laplace operator failed." << endl;
      return false;
   }

   //tnlCuda::freeFromDevice( kernelD );
   /*tnlCuda::freeFromDevice( kernelTau );
   tnlCuda::freeFromDevice( kernelC1 );
   tnlCuda::freeFromDevice( kernelC2 );
   tnlCuda::freeFromDevice( kernelC3 );
   tnlCuda::freeFromDevice( kernelC4 );
   tnlCuda::freeFromDevice( kernelGrid );*/

   return true;
}

#endif

#endif	/* PURE_C_RHS_H */
Loading