Loading CMakeLists.txt +8 −1 Original line number Diff line number Diff line Loading @@ -186,7 +186,14 @@ endif() # Gbenchmark install # if( ${BUILD_BENCHMARKS} ) include( BuildGBenchmark ) #include( BuildGBenchmark ) find_package(PkgConfig) pkg_check_modules( BENCHMARK benchmark ) if( BENCHMARK_FOUND ) #message( "${BENCHMARK_INCLUDE_DIRS} ${BENCHMARK_LIBRARIES}" ) set( CXX_BENCHMARKS_FLAGS ${CXX_BENCHMARKS_FLAGS} -DHAVE_BENCHMARK -I${BENCHMARK_INCLUDE_DIRS} ) set( BENCHMARK_LIBS ${BENCHMARK_LIBRARIES}) endif() endif() Loading cmake/BuildGBenchmark.cmake +3 −2 Original line number Diff line number Diff line Loading @@ -9,13 +9,14 @@ # compatibility with the GTest package set( GTEST_BOTH_LIBRARIES gtest gtest_main ) #set( GTEST_BOTH_LIBRARIES gtest gtest_main ) set( CXX_TESTS_FLAGS ${CXX_TESTS_FLAGS} -DHAVE_GBENCHMARK ) # Download and unpack googletest at configure time # -DBENCHMARK_ENABLE_GTEST_TESTS=OFF configure_file(cmake/Gbenchmark.cmake.in googlebenchmark-download/CMakeLists.txt) execute_process(COMMAND ${CMAKE_COMMAND} -DBENCHMARK_ENABLE_GTEST_TESTS=OFF -G "${CMAKE_GENERATOR}" . execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" "-DCMAKE_CROSSCOMPILING=1 -DRUN_HAVE_STD_REGEX=0 -DRUN_HAVE_POSIX_REGEX=0" . RESULT_VARIABLE result WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googlebenchmark-download ) if(result) Loading cmake/Gbenchmark.cmake.in +1 −1 Original line number Diff line number Diff line Loading @@ -11,7 +11,7 @@ ExternalProject_Add(googlebenchmark GIT_REPOSITORY https://github.com/google/benchmark.git #GIT_TAG master # build from a stable branch instead of master (which gets broken pretty often) GIT_TAG v1.5.3 #GIT_TAG v1.5.3 SOURCE_DIR "${CMAKE_BINARY_DIR}/googlebenchmark-src" BINARY_DIR "${CMAKE_BINARY_DIR}/googlebenchmark-build" CONFIGURE_COMMAND "" Loading src/Benchmarks/SpMV/CMakeLists.txt +2 −1 Original line number Diff line number Diff line Loading @@ -12,11 +12,12 @@ if( BUILD_CUDA ) cuda_include_directories( ${CXX_BENCHMARKS_INCLUDE_DIRS} ) message( STATUS ${CXX_BENCHMARKS_FLAGS} ) CUDA_ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cu OPTIONS ${CXX_BENCHMARKS_FLAGS} ) TARGET_LINK_LIBRARIES( tnl-benchmark-spmv ${CUDA_cusparse_LIBRARY} ${CUDA_cudadevrt_LIBRARY} ) TARGET_LINK_LIBRARIES( tnl-benchmark-spmv ${CUDA_cusparse_LIBRARY} ${CUDA_cudadevrt_LIBRARY} ${BENCHMARK_LIBS} ) else() ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cpp ) target_compile_options( tnl-benchmark-spmv PRIVATE ${CXX_BENCHMARKS_FLAGS} ) target_include_directories( tnl-benchmark-spmv PRIVATE ${CXX_BENCHMARKS_INCLUDE_DIRS} ) TARGET_LINK_LIBRARIES( tnl-benchmark-spmv ${BENCHMARK_LIBS} ) endif() install( TARGETS tnl-benchmark-spmv RUNTIME DESTINATION bin ) src/Benchmarks/SpMV/spmv.h +99 −4 Original line number Diff line number Diff line Loading @@ -16,6 +16,10 @@ #include <cstdint> #ifdef HAVE_BENCHMARK #include <benchmark/benchmark.h> #endif #include "../Benchmarks.h" #include "SpmvBenchmarkResult.h" Loading @@ -39,8 +43,8 @@ #include <TNL/Algorithms/Segments/BiEllpack.h> // Comment the following to turn off some groups of SpMV benchmarks and speed-up the compilation #define WITH_TNL_BENCHMARK_SPMV_GENERAL_MATRICES #define WITH_TNL_BENCHMARK_SPMV_SYMMETRIC_MATRICES //#define WITH_TNL_BENCHMARK_SPMV_GENERAL_MATRICES //#define WITH_TNL_BENCHMARK_SPMV_SYMMETRIC_MATRICES #define WITH_TNL_BENCHMARK_SPMV_LEGACY_FORMATS // Uncomment the following line to enable benchmarking the sandbox sparse matrix. Loading Loading @@ -182,6 +186,84 @@ using SparseMatrixLegacy_CSR_LightWithoutAtomic = Benchmarks::SpMV::ReferenceFor template< typename Real, typename Device, typename Index > using SlicedEllpackAlias = Benchmarks::SpMV::ReferenceFormats::Legacy::SlicedEllpack< Real, Device, Index >; template< typename Real > static void Benchmark_Cusparse( benchmark::State& state, const TNL::CusparseCSR< Real >& cusparseMatrix, const Containers::Vector< Real, Devices::Cuda, int > inVector, Containers::Vector< Real, Devices::Cuda, int >& outVector ) { cusparseMatrix.vectorProduct( inVector, outVector ); } TNL::String inputFileName_; template< typename Matrix > struct LegacyFixture : public benchmark::Fixture { using MatrixType = Matrix; using RealType = typename MatrixType::RealType; using DeviceType = typename MatrixType::DeviceType; using IndexType = typename MatrixType::IndexType; using HostMatrixType = typename Matrix::template Self< RealType, TNL::Devices::Host, IndexType >; using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; void SetUp(const ::benchmark::State& state) { /*if( std::is_same< DeviceType, TNL::Devices::Host >::value ) { SpMV::ReferenceFormats::Legacy::LegacyMatrixReader< MatrixType >:: readMtxFile( inputFileName_, matrix ); } else*/ //{ HostMatrixType hostMatrix; SpMV::ReferenceFormats::Legacy::LegacyMatrixReader< HostMatrixType >:: readMtxFile( inputFileName_, hostMatrix ); matrix = hostMatrix; //} inVector.setSize( matrix.getColumns() ); outVector.setSize( matrix.getRows() ); inVector = 1.0; outVector = 0.0; } Matrix matrix; VectorType inVector, outVector; }; BENCHMARK_TEMPLATE_F( LegacyFixture, CSR_Scalar_float_Host, SparseMatrixLegacy_CSR_Scalar< float, TNL::Devices::Host, int > )( benchmark::State& state ) { matrix.vectorProduct( inVector, outVector ); } BENCHMARK_TEMPLATE_F( LegacyFixture, CSR_Scalar_double_Host, SparseMatrixLegacy_CSR_Scalar< double, TNL::Devices::Host, int > )( benchmark::State& state ) { matrix.vectorProduct( inVector, outVector ); } BENCHMARK_TEMPLATE_F( LegacyFixture, CSR_Scalar_float_Cuda, SparseMatrixLegacy_CSR_Scalar< float, TNL::Devices::Cuda, int > )( benchmark::State& state ) { matrix.vectorProduct( inVector, outVector ); } BENCHMARK_TEMPLATE_F( LegacyFixture, CSR_Scalar_double_Cuda, SparseMatrixLegacy_CSR_Scalar< double, TNL::Devices::Cuda, int > )( benchmark::State& state ) { matrix.vectorProduct( inVector, outVector ); } /*template< typename Matrix, typename Real = typename Matrix::RealType, typename Device = typename Matrix::DeviceType > static void Benchmark_LegacyMatrix( benchmark::State& state, const Matrix& matrix, const Containers::Vector< Real, Device, int > inVector, const Containers::Vector< Real, Device, int > outVector ) { cusparseMatrix.vectorProduct( inVector, outVector ); }*/ // Get the name (with extension) of input matrix file std::string getMatrixFileName( const String& InputFileName ) { Loading Loading @@ -272,7 +354,12 @@ benchmarkSpMVLegacy( Benchmark& benchmark, }; SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults ); //benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults ); #ifdef HAVE_BENCHMARK inputFileName_ = inputFileName; //BENCHMARK_TEMPLATE_F() //BENCHMARK_CAPTURE( Benchmark_LegacyMatrix, hostMatrix, hostInVector, hostOutVector ); #endif ///// // Benchmark SpMV on CUDA Loading Loading @@ -470,7 +557,11 @@ benchmarkSpmv( Benchmark& benchmark, }; SpmvBenchmarkResult< Real, Devices::Host, int > cudaBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cudaBenchmarkResults ); //benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cudaBenchmarkResults ); #ifdef HAVE_BENCHMARK BENCHMARK( Benchmark_Cusparse, cusparseMatrix, cudaInVector, cudaOutVector ); #endif #ifdef HAVE_CSR5 //// Loading Loading @@ -609,6 +700,10 @@ benchmarkSpmv( Benchmark& benchmark, benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_BiEllpack >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, verboseMR ); } #endif //::benchmark::Initialize(&argc, argv); //if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return; ::benchmark::RunSpecifiedBenchmarks(); } } // namespace SpMVLegacy Loading Loading
CMakeLists.txt +8 −1 Original line number Diff line number Diff line Loading @@ -186,7 +186,14 @@ endif() # Gbenchmark install # if( ${BUILD_BENCHMARKS} ) include( BuildGBenchmark ) #include( BuildGBenchmark ) find_package(PkgConfig) pkg_check_modules( BENCHMARK benchmark ) if( BENCHMARK_FOUND ) #message( "${BENCHMARK_INCLUDE_DIRS} ${BENCHMARK_LIBRARIES}" ) set( CXX_BENCHMARKS_FLAGS ${CXX_BENCHMARKS_FLAGS} -DHAVE_BENCHMARK -I${BENCHMARK_INCLUDE_DIRS} ) set( BENCHMARK_LIBS ${BENCHMARK_LIBRARIES}) endif() endif() Loading
cmake/BuildGBenchmark.cmake +3 −2 Original line number Diff line number Diff line Loading @@ -9,13 +9,14 @@ # compatibility with the GTest package set( GTEST_BOTH_LIBRARIES gtest gtest_main ) #set( GTEST_BOTH_LIBRARIES gtest gtest_main ) set( CXX_TESTS_FLAGS ${CXX_TESTS_FLAGS} -DHAVE_GBENCHMARK ) # Download and unpack googletest at configure time # -DBENCHMARK_ENABLE_GTEST_TESTS=OFF configure_file(cmake/Gbenchmark.cmake.in googlebenchmark-download/CMakeLists.txt) execute_process(COMMAND ${CMAKE_COMMAND} -DBENCHMARK_ENABLE_GTEST_TESTS=OFF -G "${CMAKE_GENERATOR}" . execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" "-DCMAKE_CROSSCOMPILING=1 -DRUN_HAVE_STD_REGEX=0 -DRUN_HAVE_POSIX_REGEX=0" . RESULT_VARIABLE result WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googlebenchmark-download ) if(result) Loading
cmake/Gbenchmark.cmake.in +1 −1 Original line number Diff line number Diff line Loading @@ -11,7 +11,7 @@ ExternalProject_Add(googlebenchmark GIT_REPOSITORY https://github.com/google/benchmark.git #GIT_TAG master # build from a stable branch instead of master (which gets broken pretty often) GIT_TAG v1.5.3 #GIT_TAG v1.5.3 SOURCE_DIR "${CMAKE_BINARY_DIR}/googlebenchmark-src" BINARY_DIR "${CMAKE_BINARY_DIR}/googlebenchmark-build" CONFIGURE_COMMAND "" Loading
src/Benchmarks/SpMV/CMakeLists.txt +2 −1 Original line number Diff line number Diff line Loading @@ -12,11 +12,12 @@ if( BUILD_CUDA ) cuda_include_directories( ${CXX_BENCHMARKS_INCLUDE_DIRS} ) message( STATUS ${CXX_BENCHMARKS_FLAGS} ) CUDA_ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cu OPTIONS ${CXX_BENCHMARKS_FLAGS} ) TARGET_LINK_LIBRARIES( tnl-benchmark-spmv ${CUDA_cusparse_LIBRARY} ${CUDA_cudadevrt_LIBRARY} ) TARGET_LINK_LIBRARIES( tnl-benchmark-spmv ${CUDA_cusparse_LIBRARY} ${CUDA_cudadevrt_LIBRARY} ${BENCHMARK_LIBS} ) else() ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cpp ) target_compile_options( tnl-benchmark-spmv PRIVATE ${CXX_BENCHMARKS_FLAGS} ) target_include_directories( tnl-benchmark-spmv PRIVATE ${CXX_BENCHMARKS_INCLUDE_DIRS} ) TARGET_LINK_LIBRARIES( tnl-benchmark-spmv ${BENCHMARK_LIBS} ) endif() install( TARGETS tnl-benchmark-spmv RUNTIME DESTINATION bin )
src/Benchmarks/SpMV/spmv.h +99 −4 Original line number Diff line number Diff line Loading @@ -16,6 +16,10 @@ #include <cstdint> #ifdef HAVE_BENCHMARK #include <benchmark/benchmark.h> #endif #include "../Benchmarks.h" #include "SpmvBenchmarkResult.h" Loading @@ -39,8 +43,8 @@ #include <TNL/Algorithms/Segments/BiEllpack.h> // Comment the following to turn off some groups of SpMV benchmarks and speed-up the compilation #define WITH_TNL_BENCHMARK_SPMV_GENERAL_MATRICES #define WITH_TNL_BENCHMARK_SPMV_SYMMETRIC_MATRICES //#define WITH_TNL_BENCHMARK_SPMV_GENERAL_MATRICES //#define WITH_TNL_BENCHMARK_SPMV_SYMMETRIC_MATRICES #define WITH_TNL_BENCHMARK_SPMV_LEGACY_FORMATS // Uncomment the following line to enable benchmarking the sandbox sparse matrix. Loading Loading @@ -182,6 +186,84 @@ using SparseMatrixLegacy_CSR_LightWithoutAtomic = Benchmarks::SpMV::ReferenceFor template< typename Real, typename Device, typename Index > using SlicedEllpackAlias = Benchmarks::SpMV::ReferenceFormats::Legacy::SlicedEllpack< Real, Device, Index >; template< typename Real > static void Benchmark_Cusparse( benchmark::State& state, const TNL::CusparseCSR< Real >& cusparseMatrix, const Containers::Vector< Real, Devices::Cuda, int > inVector, Containers::Vector< Real, Devices::Cuda, int >& outVector ) { cusparseMatrix.vectorProduct( inVector, outVector ); } TNL::String inputFileName_; template< typename Matrix > struct LegacyFixture : public benchmark::Fixture { using MatrixType = Matrix; using RealType = typename MatrixType::RealType; using DeviceType = typename MatrixType::DeviceType; using IndexType = typename MatrixType::IndexType; using HostMatrixType = typename Matrix::template Self< RealType, TNL::Devices::Host, IndexType >; using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; void SetUp(const ::benchmark::State& state) { /*if( std::is_same< DeviceType, TNL::Devices::Host >::value ) { SpMV::ReferenceFormats::Legacy::LegacyMatrixReader< MatrixType >:: readMtxFile( inputFileName_, matrix ); } else*/ //{ HostMatrixType hostMatrix; SpMV::ReferenceFormats::Legacy::LegacyMatrixReader< HostMatrixType >:: readMtxFile( inputFileName_, hostMatrix ); matrix = hostMatrix; //} inVector.setSize( matrix.getColumns() ); outVector.setSize( matrix.getRows() ); inVector = 1.0; outVector = 0.0; } Matrix matrix; VectorType inVector, outVector; }; BENCHMARK_TEMPLATE_F( LegacyFixture, CSR_Scalar_float_Host, SparseMatrixLegacy_CSR_Scalar< float, TNL::Devices::Host, int > )( benchmark::State& state ) { matrix.vectorProduct( inVector, outVector ); } BENCHMARK_TEMPLATE_F( LegacyFixture, CSR_Scalar_double_Host, SparseMatrixLegacy_CSR_Scalar< double, TNL::Devices::Host, int > )( benchmark::State& state ) { matrix.vectorProduct( inVector, outVector ); } BENCHMARK_TEMPLATE_F( LegacyFixture, CSR_Scalar_float_Cuda, SparseMatrixLegacy_CSR_Scalar< float, TNL::Devices::Cuda, int > )( benchmark::State& state ) { matrix.vectorProduct( inVector, outVector ); } BENCHMARK_TEMPLATE_F( LegacyFixture, CSR_Scalar_double_Cuda, SparseMatrixLegacy_CSR_Scalar< double, TNL::Devices::Cuda, int > )( benchmark::State& state ) { matrix.vectorProduct( inVector, outVector ); } /*template< typename Matrix, typename Real = typename Matrix::RealType, typename Device = typename Matrix::DeviceType > static void Benchmark_LegacyMatrix( benchmark::State& state, const Matrix& matrix, const Containers::Vector< Real, Device, int > inVector, const Containers::Vector< Real, Device, int > outVector ) { cusparseMatrix.vectorProduct( inVector, outVector ); }*/ // Get the name (with extension) of input matrix file std::string getMatrixFileName( const String& InputFileName ) { Loading Loading @@ -272,7 +354,12 @@ benchmarkSpMVLegacy( Benchmark& benchmark, }; SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults ); //benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults ); #ifdef HAVE_BENCHMARK inputFileName_ = inputFileName; //BENCHMARK_TEMPLATE_F() //BENCHMARK_CAPTURE( Benchmark_LegacyMatrix, hostMatrix, hostInVector, hostOutVector ); #endif ///// // Benchmark SpMV on CUDA Loading Loading @@ -470,7 +557,11 @@ benchmarkSpmv( Benchmark& benchmark, }; SpmvBenchmarkResult< Real, Devices::Host, int > cudaBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cudaBenchmarkResults ); //benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cudaBenchmarkResults ); #ifdef HAVE_BENCHMARK BENCHMARK( Benchmark_Cusparse, cusparseMatrix, cudaInVector, cudaOutVector ); #endif #ifdef HAVE_CSR5 //// Loading Loading @@ -609,6 +700,10 @@ benchmarkSpmv( Benchmark& benchmark, benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_BiEllpack >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, verboseMR ); } #endif //::benchmark::Initialize(&argc, argv); //if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return; ::benchmark::RunSpecifiedBenchmarks(); } } // namespace SpMVLegacy Loading