Added LightSpMV Warp kernel benchmark. (92bb20c0) · Commits · TNL / tnl-dev

src/Benchmarks/SpMV/ReferenceFormats/LightSpMVBenchmark.h

+5 −0

Original line number	Diff line number	Diff line
		@@ -126,6 +126,11 @@ struct LightSpMVBenchmark
		#endif
		}

		void setKernelType( LightSpMVBenchmarkKernelType type )
		{
		this->kernelType = type;
		}

		void resetVectors()
		{
		this->inVectorView = 1.0;

src/Benchmarks/SpMV/spmv.h

+21 −12

Original line number	Diff line number	Diff line
		@@ -39,9 +39,9 @@
		#include <TNL/Algorithms/Segments/BiEllpack.h>

		// Comment the following to turn off some groups of SpMV benchmarks and speed-up the compilation
		//#define WITH_TNL_BENCHMARK_SPMV_GENERAL_MATRICES
		//#define WITH_TNL_BENCHMARK_SPMV_SYMMETRIC_MATRICES
		//#define WITH_TNL_BENCHMARK_SPMV_LEGACY_FORMATS
		#define WITH_TNL_BENCHMARK_SPMV_GENERAL_MATRICES
		#define WITH_TNL_BENCHMARK_SPMV_SYMMETRIC_MATRICES
		#define WITH_TNL_BENCHMARK_SPMV_LEGACY_FORMATS

		// Uncomment the following line to enable benchmarking the sandbox sparse matrix.
		//#define WITH_TNL_BENCHMARK_SPMV_SANDBOX_MATRIX
		@@ -374,7 +374,7 @@ benchmarkSpMV( Benchmark& benchmark,
		template< typename Real = double,
		typename Index = int >
		void
		benchmarkSpmvSynthetic( Benchmark& benchmark,
		benchmarkSpmv( Benchmark& benchmark,
		const String& inputFileName,
		const Config::ParameterContainer& parameters,
		bool verboseMR )
		@@ -469,8 +469,8 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
		cusparseMatrix.vectorProduct( cudaInVector, cudaOutVector );
		};

		SpmvBenchmarkResult< Real, Devices::Host, int > cusparseBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
		benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cusparseBenchmarkResults );
		SpmvBenchmarkResult< Real, Devices::Host, int > cudaBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
		benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cudaBenchmarkResults );

		#ifdef HAVE_CSR5
		////
		@@ -489,7 +489,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
		auto csr5SpMV = [&]() {
		csr5Benchmark.vectorProduct();
		};
		benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", csr5SpMV, cusparseBenchmarkResults );
		benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", csr5SpMV, cudaBenchmarkResults );
		std::cerr << "CSR5 error = " << max( abs( cudaOutVector - cudaOutVector2 ) ) << std::endl;
		csrCudaMatrix.reset();
		#endif
		@@ -501,7 +501,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
		{ "matrix name", convertToString( inputFileName ) },
		{ "rows", convertToString( csrHostMatrix.getRows() ) },
		{ "columns", convertToString( csrHostMatrix.getColumns() ) },
		{ "matrix format", String( "LightSpMV" ) }
		{ "matrix format", String( "LightSpMV Vector" ) }
		} ));

		LightSpMVCSRHostMatrix lightSpMVCSRHostMatrix;
		@@ -514,7 +514,16 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
		auto spmvLightSpMV = [&]() {
		lightSpMVBenchmark.vectorProduct();
		};
		benchmark.time< Devices::Cuda >( resetLightSpMVVectors, "GPU", spmvLightSpMV, cusparseBenchmarkResults );
		benchmark.time< Devices::Cuda >( resetLightSpMVVectors, "GPU", spmvLightSpMV, cudaBenchmarkResults );

		benchmark.setMetadataColumns( Benchmark::MetadataColumns({
		{ "matrix name", convertToString( inputFileName ) },
		{ "rows", convertToString( csrHostMatrix.getRows() ) },
		{ "columns", convertToString( csrHostMatrix.getColumns() ) },
		{ "matrix format", String( "LightSpMV Warp" ) }
		} ));
		lightSpMVBenchmark.setKernelType( LightSpMVBenchmarkKernelWarp );
		benchmark.time< Devices::Cuda >( resetLightSpMVVectors, "GPU", spmvLightSpMV, cudaBenchmarkResults );
		#endif
		csrHostMatrix.reset();

src/Benchmarks/SpMV/tnl-benchmark-spmv.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -45,7 +45,7 @@ runSpMVBenchmarks( Benchmark & benchmark,
		metadata );
		// Start the actual benchmark in spmv.h
		try {
		SpMVLegacy::benchmarkSpmvSynthetic< Real >( benchmark, inputFileName, parameters, verboseMR );
		SpMVLegacy::benchmarkSpmv< Real >( benchmark, inputFileName, parameters, verboseMR );
		}
		catch( const std::exception& ex ) {
		std::cerr << ex.what() << std::endl;