Loading CMakeLists.txt +23 −23 Original line number Diff line number Diff line Loading @@ -318,29 +318,29 @@ endif( WITH_TESTS STREQUAL "yes" ) #### # Explicit template instantiation # if( WITH_TEMPLATE_INSTANTIATION STREQUAL "yes" ) AddCompilerFlag( "-DTEMPLATE_EXPLICIT_INSTANTIATION " ) if( INSTANTIATE_INT STREQUAL "yes" ) AddCompilerFlag( "-DINSTANTIATE_INT " ) endif() if( INSTANTIATE_LONG_INT STREQUAL "yes" ) AddCompilerFlag( "-DINSTANTIATE_LONG_INT " ) endif() if( INSTANTIATE_FLOAT STREQUAL "yes" ) AddCompilerFlag( "-DINSTANTIATE_FLOAT " ) endif() if( INSTANTIATE_DOUBLE STREQUAL "yes" ) AddCompilerFlag( "-DINSTANTIATE_DOUBLE " ) endif() if( INSTANTIATE_LONG_DOUBLE STREQUAL "yes" ) AddCompilerFlag( "-DINSTANTIATE_LONG_DOUBLE " ) endif() endif() #if( WITH_TEMPLATE_INSTANTIATION STREQUAL "yes" ) # AddCompilerFlag( "-DTEMPLATE_EXPLICIT_INSTANTIATION " ) # # if( INSTANTIATE_INT STREQUAL "yes" ) # AddCompilerFlag( "-DINSTANTIATE_INT " ) # endif() # # if( INSTANTIATE_LONG_INT STREQUAL "yes" ) # AddCompilerFlag( "-DINSTANTIATE_LONG_INT " ) # endif() # # if( INSTANTIATE_FLOAT STREQUAL "yes" ) # AddCompilerFlag( "-DINSTANTIATE_FLOAT " ) # endif() # # if( INSTANTIATE_DOUBLE STREQUAL "yes" ) # AddCompilerFlag( "-DINSTANTIATE_DOUBLE " ) # endif() # # if( INSTANTIATE_LONG_DOUBLE STREQUAL "yes" ) # AddCompilerFlag( "-DINSTANTIATE_LONG_DOUBLE " ) # endif() #endif() set( CXX_TEST_FLAGS "-fprofile-arcs -ftest-coverage" ) set( LD_TEST_FLAGS "-lgcov -coverage" ) Loading build +1 −1 Original line number Diff line number Diff line Loading @@ -7,7 +7,7 @@ WITH_TESTS="yes" WITH_CUDA_ARCH="auto" WITH_CUBLAS="no" WITH_TEMPLATE_INSTANTIATION="yes" WITH_TEMPLATE_INSTANTIATION="no" INSTANTIATE_LONG_INT="no" INSTANTIATE_INT="yes" INSTANTIATE_LONG_DOUBLE="no" Loading tests/benchmarks/CMakeLists.txt +5 −13 Original line number Diff line number Diff line Loading @@ -13,31 +13,23 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( tnl-benchmark-linear-solvers${debugExt} tnl-benchmark-linear-solvers.cu ) TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers${debugExt} tnl${debugExt}-${tnlVersion} ${CUSPARSE_LIBRARY} ) CUDA_ADD_EXECUTABLE( tnl-benchmark-simple-heat-equation${debugExt} tnl-benchmark-simple-heat-equation.cu ) TARGET_LINK_LIBRARIES( tnl-benchmark-simple-heat-equation${debugExt} tnl${debugExt}-${tnlVersion} ) ELSE() ADD_EXECUTABLE( tnl-benchmark-spmv${debugExt} tnl-benchmark-spmv.cpp ) TARGET_LINK_LIBRARIES( tnl-benchmark-spmv${debugExt} tnl${debugExt}-${tnlVersion} ) ADD_EXECUTABLE( tnl-benchmark-linear-solvers${debugExt} tnl-benchmark-linear-solvers.cpp ) TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers${debugExt} tnl${debugExt}-${tnlVersion} ) ADD_EXECUTABLE( tnl-benchmark-simple-heat-equation${debugExt} tnl-benchmark-simple-heat-equation.cpp ) TARGET_LINK_LIBRARIES( tnl-benchmark-simple-heat-equation${debugExt} tnl${debugExt}-${tnlVersion} ) ENDIF() if( BUILD_CUDA ) INSTALL( TARGETS tnl-cuda-benchmarks${debugExt} INSTALL( TARGETS tnl-cuda-benchmarks${debugExt} RUNTIME DESTINATION bin ) endif() INSTALL( TARGETS tnl-benchmark-spmv${debugExt} INSTALL( TARGETS tnl-benchmark-spmv${debugExt} tnl-benchmark-linear-solvers${debugExt} tnl-benchmark-simple-heat-equation${debugExt} RUNTIME DESTINATION bin ) Loading tests/benchmarks/heat-equation-benchmark/CMakeLists.txt +9 −2 Original line number Diff line number Diff line IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( tnl-benchmark-simple-heat-equation${debugExt} tnl-benchmark-simple-heat-equation.cu ) TARGET_LINK_LIBRARIES( tnl-benchmark-simple-heat-equation${debugExt} tnl${debugExt}-${tnlVersion} ) CUDA_ADD_EXECUTABLE( tnl-benchmark-heat-equation${debugExt} tnl-benchmark-heat-equation.cu ) TARGET_LINK_LIBRARIES( tnl-benchmark-heat-equation${debugExt} tnl${debugExt}-${tnlVersion} ) ELSE() ADD_EXECUTABLE( tnl-benchmark-heat-equation${debugExt} tnl-benchmark-heat-equation.cpp ) TARGET_LINK_LIBRARIES( tnl-benchmark-heat-equation${debugExt} tnl${debugExt}-${tnlVersion} ) ADD_EXECUTABLE( tnl-benchmark-simple-heat-equation${debugExt} tnl-benchmark-simple-heat-equation.cpp ) TARGET_LINK_LIBRARIES( tnl-benchmark-simple-heat-equation${debugExt} tnl${debugExt}-${tnlVersion} ) ENDIF() INSTALL( TARGETS tnl-benchmark-heat-equation${debugExt} INSTALL( TARGETS tnl-benchmark-heat-equation${debugExt} tnl-benchmark-simple-heat-equation${debugExt} RUNTIME DESTINATION bin ) Loading tests/benchmarks/heat-equation-benchmark/pure-c-rhs.h 0 → 100644 +139 −0 Original line number Diff line number Diff line /*************************************************************************** pure-c-rhs.h - description ------------------- begin : Apr 2, 2016 copyright : (C) 2016 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ #ifndef PURE_C_RHS_H #define PURE_C_RHS_H #include<cuda.h> /**** * Just testing data for measuring performance * with different ways of passing data to kernels. */ struct Data { double time, tau; tnlStaticVector< 2, double > c1, c2, c3, c4; tnlGrid< 2, double > grid; }; #ifdef HAVE_CUDA template< typename Real, typename Index > __global__ void boundaryConditionsKernel( const Real* u, Real* aux, const Index gridXSize, const Index gridYSize ) { const Index i = ( blockIdx.x ) * blockDim.x + threadIdx.x; const Index j = ( blockIdx.y ) * blockDim.y + threadIdx.y; if( i == 0 && j < gridYSize ) aux[ j * gridXSize ] = 0.0; //u[ j * gridXSize + 1 ]; if( i == gridXSize - 1 && j < gridYSize ) aux[ j * gridXSize + gridXSize - 2 ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ]; if( j == 0 && i < gridXSize ) aux[ j * gridXSize ] = 0.0; //u[ j * gridXSize + 1 ]; if( j == gridYSize -1 && i < gridXSize ) aux[ j * gridXSize + gridXSize - 2 ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ]; } template< typename Real, typename Index > __global__ void heatEquationKernel( const Real* u, Real* aux, const Real tau, const Real hx_inv, const Real hy_inv, const Index gridXSize, const Index gridYSize, Data d1, Data d2 ) { const Index i = blockIdx.x * blockDim.x + threadIdx.x; const Index j = blockIdx.y * blockDim.y + threadIdx.y; if( i > 0 && i < gridXSize - 1 && j > 0 && j < gridYSize - 1 ) { const Index c = j * gridXSize + i; aux[ c ] = tau * ( ( u[ c - 1 ] - 2.0 * u[ c ] + u[ c + 1 ] ) * hx_inv + ( u[ c - gridXSize ] - 2.0 * u[ c ] + u[ c + gridXSize ] ) * hy_inv ); } } template< typename RealType > bool pureCRhsCuda( dim3 cudaGridSize, dim3 cudaBlockSize, RealType* cuda_u, RealType* cuda_aux, const RealType& tau, const RealType& hx_inv, const RealType& hy_inv, int gridXSize, int gridYSize ) { /*Real* kernelTime = tnlCuda::passToDevice( time ); Real* kernelTau = tnlCuda::passToDevice( tau ); typedef tnlStaticVector< 2, Real > Coordinates; Coordinates c; Coordinates* kernelC1 = tnlCuda::passToDevice( c ); Coordinates* kernelC2 = tnlCuda::passToDevice( c ); Coordinates* kernelC3 = tnlCuda::passToDevice( c ); Coordinates* kernelC4 = tnlCuda::passToDevice( c ); typedef tnlGrid< 2, Real, tnlCuda, int > Grid; Grid g; Grid* kernelGrid = tnlCuda::passToDevice( g );*/ Data d, d2; //Data* kernelD = tnlCuda::passToDevice( d ); int cudaErr; /**** * Neumann boundary conditions */ //cout << "Setting boundary conditions ... " << endl; boundaryConditionsKernel<<< cudaGridSize, cudaBlockSize >>>( cuda_u, cuda_aux, gridXSize, gridYSize ); if( ( cudaErr = cudaGetLastError() ) != cudaSuccess ) { cerr << "Setting of boundary conditions failed. " << cudaErr << endl; return false; } /**** * Laplace operator */ //cout << "Laplace operator ... " << endl; heatEquationKernel<<< cudaGridSize, cudaBlockSize >>> ( cuda_u, cuda_aux, tau, hx_inv, hy_inv, gridXSize, gridYSize, d, d2 ); if( cudaGetLastError() != cudaSuccess ) { cerr << "Laplace operator failed." << endl; return false; } //tnlCuda::freeFromDevice( kernelD ); /*tnlCuda::freeFromDevice( kernelTau ); tnlCuda::freeFromDevice( kernelC1 ); tnlCuda::freeFromDevice( kernelC2 ); tnlCuda::freeFromDevice( kernelC3 ); tnlCuda::freeFromDevice( kernelC4 ); tnlCuda::freeFromDevice( kernelGrid );*/ return true; } #endif #endif /* PURE_C_RHS_H */ Loading
CMakeLists.txt +23 −23 Original line number Diff line number Diff line Loading @@ -318,29 +318,29 @@ endif( WITH_TESTS STREQUAL "yes" ) #### # Explicit template instantiation # if( WITH_TEMPLATE_INSTANTIATION STREQUAL "yes" ) AddCompilerFlag( "-DTEMPLATE_EXPLICIT_INSTANTIATION " ) if( INSTANTIATE_INT STREQUAL "yes" ) AddCompilerFlag( "-DINSTANTIATE_INT " ) endif() if( INSTANTIATE_LONG_INT STREQUAL "yes" ) AddCompilerFlag( "-DINSTANTIATE_LONG_INT " ) endif() if( INSTANTIATE_FLOAT STREQUAL "yes" ) AddCompilerFlag( "-DINSTANTIATE_FLOAT " ) endif() if( INSTANTIATE_DOUBLE STREQUAL "yes" ) AddCompilerFlag( "-DINSTANTIATE_DOUBLE " ) endif() if( INSTANTIATE_LONG_DOUBLE STREQUAL "yes" ) AddCompilerFlag( "-DINSTANTIATE_LONG_DOUBLE " ) endif() endif() #if( WITH_TEMPLATE_INSTANTIATION STREQUAL "yes" ) # AddCompilerFlag( "-DTEMPLATE_EXPLICIT_INSTANTIATION " ) # # if( INSTANTIATE_INT STREQUAL "yes" ) # AddCompilerFlag( "-DINSTANTIATE_INT " ) # endif() # # if( INSTANTIATE_LONG_INT STREQUAL "yes" ) # AddCompilerFlag( "-DINSTANTIATE_LONG_INT " ) # endif() # # if( INSTANTIATE_FLOAT STREQUAL "yes" ) # AddCompilerFlag( "-DINSTANTIATE_FLOAT " ) # endif() # # if( INSTANTIATE_DOUBLE STREQUAL "yes" ) # AddCompilerFlag( "-DINSTANTIATE_DOUBLE " ) # endif() # # if( INSTANTIATE_LONG_DOUBLE STREQUAL "yes" ) # AddCompilerFlag( "-DINSTANTIATE_LONG_DOUBLE " ) # endif() #endif() set( CXX_TEST_FLAGS "-fprofile-arcs -ftest-coverage" ) set( LD_TEST_FLAGS "-lgcov -coverage" ) Loading
build +1 −1 Original line number Diff line number Diff line Loading @@ -7,7 +7,7 @@ WITH_TESTS="yes" WITH_CUDA_ARCH="auto" WITH_CUBLAS="no" WITH_TEMPLATE_INSTANTIATION="yes" WITH_TEMPLATE_INSTANTIATION="no" INSTANTIATE_LONG_INT="no" INSTANTIATE_INT="yes" INSTANTIATE_LONG_DOUBLE="no" Loading
tests/benchmarks/CMakeLists.txt +5 −13 Original line number Diff line number Diff line Loading @@ -13,31 +13,23 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( tnl-benchmark-linear-solvers${debugExt} tnl-benchmark-linear-solvers.cu ) TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers${debugExt} tnl${debugExt}-${tnlVersion} ${CUSPARSE_LIBRARY} ) CUDA_ADD_EXECUTABLE( tnl-benchmark-simple-heat-equation${debugExt} tnl-benchmark-simple-heat-equation.cu ) TARGET_LINK_LIBRARIES( tnl-benchmark-simple-heat-equation${debugExt} tnl${debugExt}-${tnlVersion} ) ELSE() ADD_EXECUTABLE( tnl-benchmark-spmv${debugExt} tnl-benchmark-spmv.cpp ) TARGET_LINK_LIBRARIES( tnl-benchmark-spmv${debugExt} tnl${debugExt}-${tnlVersion} ) ADD_EXECUTABLE( tnl-benchmark-linear-solvers${debugExt} tnl-benchmark-linear-solvers.cpp ) TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers${debugExt} tnl${debugExt}-${tnlVersion} ) ADD_EXECUTABLE( tnl-benchmark-simple-heat-equation${debugExt} tnl-benchmark-simple-heat-equation.cpp ) TARGET_LINK_LIBRARIES( tnl-benchmark-simple-heat-equation${debugExt} tnl${debugExt}-${tnlVersion} ) ENDIF() if( BUILD_CUDA ) INSTALL( TARGETS tnl-cuda-benchmarks${debugExt} INSTALL( TARGETS tnl-cuda-benchmarks${debugExt} RUNTIME DESTINATION bin ) endif() INSTALL( TARGETS tnl-benchmark-spmv${debugExt} INSTALL( TARGETS tnl-benchmark-spmv${debugExt} tnl-benchmark-linear-solvers${debugExt} tnl-benchmark-simple-heat-equation${debugExt} RUNTIME DESTINATION bin ) Loading
tests/benchmarks/heat-equation-benchmark/CMakeLists.txt +9 −2 Original line number Diff line number Diff line IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( tnl-benchmark-simple-heat-equation${debugExt} tnl-benchmark-simple-heat-equation.cu ) TARGET_LINK_LIBRARIES( tnl-benchmark-simple-heat-equation${debugExt} tnl${debugExt}-${tnlVersion} ) CUDA_ADD_EXECUTABLE( tnl-benchmark-heat-equation${debugExt} tnl-benchmark-heat-equation.cu ) TARGET_LINK_LIBRARIES( tnl-benchmark-heat-equation${debugExt} tnl${debugExt}-${tnlVersion} ) ELSE() ADD_EXECUTABLE( tnl-benchmark-heat-equation${debugExt} tnl-benchmark-heat-equation.cpp ) TARGET_LINK_LIBRARIES( tnl-benchmark-heat-equation${debugExt} tnl${debugExt}-${tnlVersion} ) ADD_EXECUTABLE( tnl-benchmark-simple-heat-equation${debugExt} tnl-benchmark-simple-heat-equation.cpp ) TARGET_LINK_LIBRARIES( tnl-benchmark-simple-heat-equation${debugExt} tnl${debugExt}-${tnlVersion} ) ENDIF() INSTALL( TARGETS tnl-benchmark-heat-equation${debugExt} INSTALL( TARGETS tnl-benchmark-heat-equation${debugExt} tnl-benchmark-simple-heat-equation${debugExt} RUNTIME DESTINATION bin ) Loading
tests/benchmarks/heat-equation-benchmark/pure-c-rhs.h 0 → 100644 +139 −0 Original line number Diff line number Diff line /*************************************************************************** pure-c-rhs.h - description ------------------- begin : Apr 2, 2016 copyright : (C) 2016 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ #ifndef PURE_C_RHS_H #define PURE_C_RHS_H #include<cuda.h> /**** * Just testing data for measuring performance * with different ways of passing data to kernels. */ struct Data { double time, tau; tnlStaticVector< 2, double > c1, c2, c3, c4; tnlGrid< 2, double > grid; }; #ifdef HAVE_CUDA template< typename Real, typename Index > __global__ void boundaryConditionsKernel( const Real* u, Real* aux, const Index gridXSize, const Index gridYSize ) { const Index i = ( blockIdx.x ) * blockDim.x + threadIdx.x; const Index j = ( blockIdx.y ) * blockDim.y + threadIdx.y; if( i == 0 && j < gridYSize ) aux[ j * gridXSize ] = 0.0; //u[ j * gridXSize + 1 ]; if( i == gridXSize - 1 && j < gridYSize ) aux[ j * gridXSize + gridXSize - 2 ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ]; if( j == 0 && i < gridXSize ) aux[ j * gridXSize ] = 0.0; //u[ j * gridXSize + 1 ]; if( j == gridYSize -1 && i < gridXSize ) aux[ j * gridXSize + gridXSize - 2 ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ]; } template< typename Real, typename Index > __global__ void heatEquationKernel( const Real* u, Real* aux, const Real tau, const Real hx_inv, const Real hy_inv, const Index gridXSize, const Index gridYSize, Data d1, Data d2 ) { const Index i = blockIdx.x * blockDim.x + threadIdx.x; const Index j = blockIdx.y * blockDim.y + threadIdx.y; if( i > 0 && i < gridXSize - 1 && j > 0 && j < gridYSize - 1 ) { const Index c = j * gridXSize + i; aux[ c ] = tau * ( ( u[ c - 1 ] - 2.0 * u[ c ] + u[ c + 1 ] ) * hx_inv + ( u[ c - gridXSize ] - 2.0 * u[ c ] + u[ c + gridXSize ] ) * hy_inv ); } } template< typename RealType > bool pureCRhsCuda( dim3 cudaGridSize, dim3 cudaBlockSize, RealType* cuda_u, RealType* cuda_aux, const RealType& tau, const RealType& hx_inv, const RealType& hy_inv, int gridXSize, int gridYSize ) { /*Real* kernelTime = tnlCuda::passToDevice( time ); Real* kernelTau = tnlCuda::passToDevice( tau ); typedef tnlStaticVector< 2, Real > Coordinates; Coordinates c; Coordinates* kernelC1 = tnlCuda::passToDevice( c ); Coordinates* kernelC2 = tnlCuda::passToDevice( c ); Coordinates* kernelC3 = tnlCuda::passToDevice( c ); Coordinates* kernelC4 = tnlCuda::passToDevice( c ); typedef tnlGrid< 2, Real, tnlCuda, int > Grid; Grid g; Grid* kernelGrid = tnlCuda::passToDevice( g );*/ Data d, d2; //Data* kernelD = tnlCuda::passToDevice( d ); int cudaErr; /**** * Neumann boundary conditions */ //cout << "Setting boundary conditions ... " << endl; boundaryConditionsKernel<<< cudaGridSize, cudaBlockSize >>>( cuda_u, cuda_aux, gridXSize, gridYSize ); if( ( cudaErr = cudaGetLastError() ) != cudaSuccess ) { cerr << "Setting of boundary conditions failed. " << cudaErr << endl; return false; } /**** * Laplace operator */ //cout << "Laplace operator ... " << endl; heatEquationKernel<<< cudaGridSize, cudaBlockSize >>> ( cuda_u, cuda_aux, tau, hx_inv, hy_inv, gridXSize, gridYSize, d, d2 ); if( cudaGetLastError() != cudaSuccess ) { cerr << "Laplace operator failed." << endl; return false; } //tnlCuda::freeFromDevice( kernelD ); /*tnlCuda::freeFromDevice( kernelTau ); tnlCuda::freeFromDevice( kernelC1 ); tnlCuda::freeFromDevice( kernelC2 ); tnlCuda::freeFromDevice( kernelC3 ); tnlCuda::freeFromDevice( kernelC4 ); tnlCuda::freeFromDevice( kernelGrid );*/ return true; } #endif #endif /* PURE_C_RHS_H */