Loading tests/benchmarks/CMakeLists.txt +10 −1 Original line number Diff line number Diff line ADD_SUBDIRECTORY( share ) IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( tnl-cuda-benchmarks${debugExt} tnl-cuda-benchmarks.cu OPTIONS ${CUDA_ADD_EXECUTABLE_OPTIONS} ) SET_TARGET_PROPERTIES( tnl-cuda-benchmarks${debugExt} PROPERTIES CUDA_COMPILE_FLAGS "${CXX_OPTIMIZE_FLAGS}" ) TARGET_LINK_LIBRARIES( tnl-cuda-benchmarks${debugExt} tnl${debugExt}-${tnlVersion} ${CUSPARSE_LIBRARY} ) CUDA_ADD_EXECUTABLE( tnl-benchmark-spmv${debugExt} tnl-benchmark-spmv.cu OPTIONS ${CUDA_ADD_EXECUTABLE_OPTIONS} ) SET_TARGET_PROPERTIES( tnl-benchmark-spmv${debugExt} PROPERTIES CUDA_COMPILE_FLAGS "${CXX_OPTIMIZE_FLAGS}" ) Loading @@ -19,6 +23,11 @@ ENDIF() TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers${debugExt} tnl${debugExt}-${tnlVersion} ) if( BUILD_CUDA ) INSTALL( TARGETS tnl-cuda-benchmarks${debugExt} RUNTIME DESTINATION bin ) endif() INSTALL( TARGETS tnl-benchmark-spmv${debugExt} tnl-benchmark-linear-solvers${debugExt} RUNTIME DESTINATION bin ) Loading tests/benchmarks/tnl-benchmarks.cpp→tests/benchmarks/tnl-cuda-benchmarks.cu +52 −0 Original line number Diff line number Diff line /*************************************************************************** tnl-benchmarks.cpp - description tnl-cuda-benchmarks.cu - description ------------------- begin : Nov 25, 2010 copyright : (C) 2010 by Tomas Oberhuber begin : May 28, 2015 copyright : (C) 2015 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ Loading @@ -15,42 +15,37 @@ * * ***************************************************************************/ #include <core/vectors/tnlVectorHost.h> #include <core/vectors/tnlVectorCUDA.h> #include <tnl-benchmarks.h> #include <core/vectors/tnlVector.h> #include <core/tnlTimerRT.h> int main( int argc, char* argv[] ) { #ifdef HAVE_CUDA tnlTimerRT timer; const double oneGB = 1024.0 * 1024.0 * 1024.0; cout << "Benchmarking memory bandwidth when transfering int ..." << endl; const int size = 1 << 22; double host_to_host_band_width; double host_to_device_band_width; double device_to_host_band_width; double device_to_device_band_width; transferBenchmark< int >( size, host_to_host_band_width, host_to_device_band_width, device_to_host_band_width, device_to_device_band_width ); tnlVector< int, tnlHost > hostVector; tnlVector< int, tnlCuda > deviceVector; hostVector.setSize( size ); deviceVector.setSize( size ); hostVector.setValue( 1.0 ); deviceVector.setValue( 0.0 ); timer.reset(); timer.start(); deviceVector = hostVector; timer.stop(); cout << "Benchmarking reduction of int ..." << endl; for( int i = 0; i <= 6; i ++ ) reductionBenchmark< int >( size, i ); double bandwidth = ( double ) ( size ) * sizeof( int ) / timer.getTime() / oneGB; cout << "Benchmarking reduction of float ..." << endl; for( int i = 0; i <= 6; i ++ ) reductionBenchmark< float >( size, i ); cout << bandwidth << " GB/sec." << endl; cout << "Benchmarking reduction of double ..." << endl; for( int i = 0; i <= 6; i ++ ) reductionBenchmark< double >( size / 2, i ); #endif return EXIT_SUCCESS; Loading tests/benchmarks/tnl-benchmarks.h→tests/benchmarks/tnl-cuda-benchmarks.h +0 −0 File moved. View file Loading
tests/benchmarks/CMakeLists.txt +10 −1 Original line number Diff line number Diff line ADD_SUBDIRECTORY( share ) IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( tnl-cuda-benchmarks${debugExt} tnl-cuda-benchmarks.cu OPTIONS ${CUDA_ADD_EXECUTABLE_OPTIONS} ) SET_TARGET_PROPERTIES( tnl-cuda-benchmarks${debugExt} PROPERTIES CUDA_COMPILE_FLAGS "${CXX_OPTIMIZE_FLAGS}" ) TARGET_LINK_LIBRARIES( tnl-cuda-benchmarks${debugExt} tnl${debugExt}-${tnlVersion} ${CUSPARSE_LIBRARY} ) CUDA_ADD_EXECUTABLE( tnl-benchmark-spmv${debugExt} tnl-benchmark-spmv.cu OPTIONS ${CUDA_ADD_EXECUTABLE_OPTIONS} ) SET_TARGET_PROPERTIES( tnl-benchmark-spmv${debugExt} PROPERTIES CUDA_COMPILE_FLAGS "${CXX_OPTIMIZE_FLAGS}" ) Loading @@ -19,6 +23,11 @@ ENDIF() TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers${debugExt} tnl${debugExt}-${tnlVersion} ) if( BUILD_CUDA ) INSTALL( TARGETS tnl-cuda-benchmarks${debugExt} RUNTIME DESTINATION bin ) endif() INSTALL( TARGETS tnl-benchmark-spmv${debugExt} tnl-benchmark-linear-solvers${debugExt} RUNTIME DESTINATION bin ) Loading
tests/benchmarks/tnl-benchmarks.cpp→tests/benchmarks/tnl-cuda-benchmarks.cu +52 −0 Original line number Diff line number Diff line /*************************************************************************** tnl-benchmarks.cpp - description tnl-cuda-benchmarks.cu - description ------------------- begin : Nov 25, 2010 copyright : (C) 2010 by Tomas Oberhuber begin : May 28, 2015 copyright : (C) 2015 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ Loading @@ -15,42 +15,37 @@ * * ***************************************************************************/ #include <core/vectors/tnlVectorHost.h> #include <core/vectors/tnlVectorCUDA.h> #include <tnl-benchmarks.h> #include <core/vectors/tnlVector.h> #include <core/tnlTimerRT.h> int main( int argc, char* argv[] ) { #ifdef HAVE_CUDA tnlTimerRT timer; const double oneGB = 1024.0 * 1024.0 * 1024.0; cout << "Benchmarking memory bandwidth when transfering int ..." << endl; const int size = 1 << 22; double host_to_host_band_width; double host_to_device_band_width; double device_to_host_band_width; double device_to_device_band_width; transferBenchmark< int >( size, host_to_host_band_width, host_to_device_band_width, device_to_host_band_width, device_to_device_band_width ); tnlVector< int, tnlHost > hostVector; tnlVector< int, tnlCuda > deviceVector; hostVector.setSize( size ); deviceVector.setSize( size ); hostVector.setValue( 1.0 ); deviceVector.setValue( 0.0 ); timer.reset(); timer.start(); deviceVector = hostVector; timer.stop(); cout << "Benchmarking reduction of int ..." << endl; for( int i = 0; i <= 6; i ++ ) reductionBenchmark< int >( size, i ); double bandwidth = ( double ) ( size ) * sizeof( int ) / timer.getTime() / oneGB; cout << "Benchmarking reduction of float ..." << endl; for( int i = 0; i <= 6; i ++ ) reductionBenchmark< float >( size, i ); cout << bandwidth << " GB/sec." << endl; cout << "Benchmarking reduction of double ..." << endl; for( int i = 0; i <= 6; i ++ ) reductionBenchmark< double >( size / 2, i ); #endif return EXIT_SUCCESS; Loading