Trying google benchmark. (2d4b2761) · Commits · TNL / tnl-dev

CMakeLists.txt

+8 −1

Original line number	Diff line number	Diff line
		@@ -186,7 +186,14 @@ endif()
		# Gbenchmark install
		#
		if( ${BUILD_BENCHMARKS} )
		include( BuildGBenchmark )
		#include( BuildGBenchmark )
		find_package(PkgConfig)
		pkg_check_modules( BENCHMARK benchmark )
		if( BENCHMARK_FOUND )
		#message( "${BENCHMARK_INCLUDE_DIRS} ${BENCHMARK_LIBRARIES}" )
		set( CXX_BENCHMARKS_FLAGS ${CXX_BENCHMARKS_FLAGS} -DHAVE_BENCHMARK -I${BENCHMARK_INCLUDE_DIRS} )
		set( BENCHMARK_LIBS ${BENCHMARK_LIBRARIES})
		endif()
		endif()

cmake/BuildGBenchmark.cmake

+3 −2

Original line number	Diff line number	Diff line
		@@ -9,13 +9,14 @@


		# compatibility with the GTest package
		set( GTEST_BOTH_LIBRARIES gtest gtest_main )
		#set( GTEST_BOTH_LIBRARIES gtest gtest_main )
		set( CXX_TESTS_FLAGS ${CXX_TESTS_FLAGS} -DHAVE_GBENCHMARK )


		# Download and unpack googletest at configure time
		# -DBENCHMARK_ENABLE_GTEST_TESTS=OFF
		configure_file(cmake/Gbenchmark.cmake.in googlebenchmark-download/CMakeLists.txt)
		execute_process(COMMAND ${CMAKE_COMMAND} -DBENCHMARK_ENABLE_GTEST_TESTS=OFF -G "${CMAKE_GENERATOR}" .
		execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" "-DCMAKE_CROSSCOMPILING=1 -DRUN_HAVE_STD_REGEX=0 -DRUN_HAVE_POSIX_REGEX=0" .
		RESULT_VARIABLE result
		WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googlebenchmark-download )
		if(result)

cmake/Gbenchmark.cmake.in

+1 −1

Original line number	Diff line number	Diff line
		@@ -11,7 +11,7 @@ ExternalProject_Add(googlebenchmark
		GIT_REPOSITORY https://github.com/google/benchmark.git
		#GIT_TAG master
		# build from a stable branch instead of master (which gets broken pretty often)
		GIT_TAG v1.5.3
		#GIT_TAG v1.5.3
		SOURCE_DIR "${CMAKE_BINARY_DIR}/googlebenchmark-src"
		BINARY_DIR "${CMAKE_BINARY_DIR}/googlebenchmark-build"
		CONFIGURE_COMMAND ""

src/Benchmarks/SpMV/CMakeLists.txt

+2 −1

Original line number	Diff line number	Diff line
		@@ -12,11 +12,12 @@ if( BUILD_CUDA )
		cuda_include_directories( ${CXX_BENCHMARKS_INCLUDE_DIRS} )
		message( STATUS ${CXX_BENCHMARKS_FLAGS} )
		CUDA_ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cu OPTIONS ${CXX_BENCHMARKS_FLAGS} )
		TARGET_LINK_LIBRARIES( tnl-benchmark-spmv ${CUDA_cusparse_LIBRARY} ${CUDA_cudadevrt_LIBRARY} )
		TARGET_LINK_LIBRARIES( tnl-benchmark-spmv ${CUDA_cusparse_LIBRARY} ${CUDA_cudadevrt_LIBRARY} ${BENCHMARK_LIBS} )
		else()
		ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cpp )
		target_compile_options( tnl-benchmark-spmv PRIVATE ${CXX_BENCHMARKS_FLAGS} )
		target_include_directories( tnl-benchmark-spmv PRIVATE ${CXX_BENCHMARKS_INCLUDE_DIRS} )
		TARGET_LINK_LIBRARIES( tnl-benchmark-spmv ${BENCHMARK_LIBS} )
		endif()

		install( TARGETS tnl-benchmark-spmv RUNTIME DESTINATION bin )

src/Benchmarks/SpMV/spmv.h

+99 −4

Original line number	Diff line number	Diff line
		@@ -16,6 +16,10 @@

		#include <cstdint>

		#ifdef HAVE_BENCHMARK
		#include <benchmark/benchmark.h>
		#endif

		#include "../Benchmarks.h"
		#include "SpmvBenchmarkResult.h"

		@@ -39,8 +43,8 @@
		#include <TNL/Algorithms/Segments/BiEllpack.h>

		// Comment the following to turn off some groups of SpMV benchmarks and speed-up the compilation
		#define WITH_TNL_BENCHMARK_SPMV_GENERAL_MATRICES
		#define WITH_TNL_BENCHMARK_SPMV_SYMMETRIC_MATRICES
		//#define WITH_TNL_BENCHMARK_SPMV_GENERAL_MATRICES
		//#define WITH_TNL_BENCHMARK_SPMV_SYMMETRIC_MATRICES
		#define WITH_TNL_BENCHMARK_SPMV_LEGACY_FORMATS

		// Uncomment the following line to enable benchmarking the sandbox sparse matrix.
		@@ -182,6 +186,84 @@ using SparseMatrixLegacy_CSR_LightWithoutAtomic = Benchmarks::SpMV::ReferenceFor
		template< typename Real, typename Device, typename Index >
		using SlicedEllpackAlias = Benchmarks::SpMV::ReferenceFormats::Legacy::SlicedEllpack< Real, Device, Index >;

		template< typename Real >
		static void Benchmark_Cusparse( benchmark::State& state,
		const TNL::CusparseCSR< Real >& cusparseMatrix,
		const Containers::Vector< Real, Devices::Cuda, int > inVector,
		Containers::Vector< Real, Devices::Cuda, int >& outVector )
		{
		cusparseMatrix.vectorProduct( inVector, outVector );
		}

		TNL::String inputFileName_;

		template< typename Matrix >
		struct LegacyFixture : public benchmark::Fixture
		{
		using MatrixType = Matrix;
		using RealType = typename MatrixType::RealType;
		using DeviceType = typename MatrixType::DeviceType;
		using IndexType = typename MatrixType::IndexType;
		using HostMatrixType = typename Matrix::template Self< RealType, TNL::Devices::Host, IndexType >;
		using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;

		void SetUp(const ::benchmark::State& state)
		{
		/*if( std::is_same< DeviceType, TNL::Devices::Host >::value )
		{
		SpMV::ReferenceFormats::Legacy::LegacyMatrixReader< MatrixType >::
		readMtxFile( inputFileName_, matrix );
		}
		else*/
		//{
		HostMatrixType hostMatrix;
		SpMV::ReferenceFormats::Legacy::LegacyMatrixReader< HostMatrixType >::
		readMtxFile( inputFileName_, hostMatrix );
		matrix = hostMatrix;
		//}
		inVector.setSize( matrix.getColumns() );
		outVector.setSize( matrix.getRows() );
		inVector = 1.0;
		outVector = 0.0;
		}

		Matrix matrix;

		VectorType inVector, outVector;
		};

		BENCHMARK_TEMPLATE_F( LegacyFixture, CSR_Scalar_float_Host, SparseMatrixLegacy_CSR_Scalar< float, TNL::Devices::Host, int > )( benchmark::State& state )
		{
		matrix.vectorProduct( inVector, outVector );
		}

		BENCHMARK_TEMPLATE_F( LegacyFixture, CSR_Scalar_double_Host, SparseMatrixLegacy_CSR_Scalar< double, TNL::Devices::Host, int > )( benchmark::State& state )
		{
		matrix.vectorProduct( inVector, outVector );
		}

		BENCHMARK_TEMPLATE_F( LegacyFixture, CSR_Scalar_float_Cuda, SparseMatrixLegacy_CSR_Scalar< float, TNL::Devices::Cuda, int > )( benchmark::State& state )
		{
		matrix.vectorProduct( inVector, outVector );
		}

		BENCHMARK_TEMPLATE_F( LegacyFixture, CSR_Scalar_double_Cuda, SparseMatrixLegacy_CSR_Scalar< double, TNL::Devices::Cuda, int > )( benchmark::State& state )
		{
		matrix.vectorProduct( inVector, outVector );
		}


		/*template< typename Matrix,
		typename Real = typename Matrix::RealType,
		typename Device = typename Matrix::DeviceType >
		static void Benchmark_LegacyMatrix( benchmark::State& state,
		const Matrix& matrix,
		const Containers::Vector< Real, Device, int > inVector,
		const Containers::Vector< Real, Device, int > outVector )
		{
		cusparseMatrix.vectorProduct( inVector, outVector );
		}*/

		// Get the name (with extension) of input matrix file
		std::string getMatrixFileName( const String& InputFileName )
		{
		@@ -272,7 +354,12 @@ benchmarkSpMVLegacy( Benchmark& benchmark,

		};
		SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() );
		benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults );
		//benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults );
		#ifdef HAVE_BENCHMARK
		inputFileName_ = inputFileName;
		//BENCHMARK_TEMPLATE_F()
		//BENCHMARK_CAPTURE( Benchmark_LegacyMatrix, hostMatrix, hostInVector, hostOutVector );
		#endif

		/////
		// Benchmark SpMV on CUDA
		@@ -470,7 +557,11 @@ benchmarkSpmv( Benchmark& benchmark,
		};

		SpmvBenchmarkResult< Real, Devices::Host, int > cudaBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
		benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cudaBenchmarkResults );
		//benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cudaBenchmarkResults );

		#ifdef HAVE_BENCHMARK
		BENCHMARK( Benchmark_Cusparse, cusparseMatrix, cudaInVector, cudaOutVector );
		#endif

		#ifdef HAVE_CSR5
		////
		@@ -609,6 +700,10 @@ benchmarkSpmv( Benchmark& benchmark,
		benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_BiEllpack >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, verboseMR );
		}
		#endif

		//::benchmark::Initialize(&argc, argv);
		//if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return;
		::benchmark::RunSpecifiedBenchmarks();
		}

		} // namespace SpMVLegacy