Working on JSON SpMV benchmark. (0179d4a0) · Commits · TNL / tnl-dev

src/Benchmarks/Benchmarks.h

+13 −7

Original line number	Diff line number	Diff line
		@@ -34,10 +34,11 @@ namespace Benchmarks {
		const double oneGB = 1024.0 * 1024.0 * 1024.0;


		template< typename Logger = Logging >
		struct BenchmarkResult
		{
		using HeaderElements = Logging::HeaderElements;
		using RowElements = Logging::RowElements;
		using HeaderElements = typename Logger::HeaderElements;
		using RowElements = typename Logger::RowElements;

		double time = std::numeric_limits<double>::quiet_NaN();
		double stddev = std::numeric_limits<double>::quiet_NaN();
		@@ -71,6 +72,11 @@ public:
		using typename Logger::MetadataColumns;
		using SolverMonitorType = Solvers::IterativeSolverMonitor< double, int >;

		using typename Logger::CommonLogs;
		using Logger::addCommonLogs;
		using Logger::addLogsMetadata;
		using Logger::writeHeader;

		Benchmark( int loops = 10,
		bool verbose = true )
		: Logger(verbose), loops(loops)
		@@ -202,7 +208,7 @@ public:
		time( ResetFunction reset,
		const String & performer,
		ComputeFunction & compute,
		BenchmarkResult & result )
		BenchmarkResult< Logger > & result )
		{
		result.time = std::numeric_limits<double>::quiet_NaN();
		result.stddev = std::numeric_limits<double>::quiet_NaN();
		@@ -247,7 +253,7 @@ public:
		const String & performer,
		ComputeFunction & compute )
		{
		BenchmarkResult result;
		BenchmarkResult< Logger > result;
		return time< Device, ResetFunction, ComputeFunction >( reset, performer, compute, result );
		}

		@@ -259,7 +265,7 @@ public:
		double
		time( const String & performer,
		ComputeFunction & compute,
		BenchmarkResult & result )
		BenchmarkResult< Logger > & result )
		{
		result.time = std::numeric_limits<double>::quiet_NaN();
		result.stddev = std::numeric_limits<double>::quiet_NaN();
		@@ -295,7 +301,7 @@ public:
		time( const String & performer,
		ComputeFunction & compute )
		{
		BenchmarkResult result;
		BenchmarkResult< Logger > result;
		return time< Device, ComputeFunction >( performer, compute, result );
		}

		@@ -310,7 +316,7 @@ public:
		std::cerr << msg << std::endl;
		}

		using Logging::save;
		using Logger::save;

		SolverMonitorType& getMonitor() {
		return monitor;

src/Benchmarks/JsonLogging.h

+45 −2

Original line number	Diff line number	Diff line
		@@ -68,6 +68,7 @@ class JsonLoggingRowElements

		auto cend() const noexcept { return elements.cend(); }

		size_t size() const noexcept { return this->elements.size(); };
		protected:
		std::list< String > elements;

		@@ -81,8 +82,11 @@ public:
		using MetadataMap = std::map< const char*, String >;
		using MetadataColumns = std::vector<MetadataElement>;

		using CommonLogs = std::vector< std::pair< const char*, String > >;
		using LogsMetadata = std::vector< String >;

		using HeaderElements = std::vector< String >;
		using RowElements = LoggingRowElements;
		using RowElements = JsonLoggingRowElements;

		JsonLogging( int verbose = true )
		: verbose(verbose)
		@@ -94,6 +98,42 @@ public:
		this->verbose = verbose;
		}

		void addCommonLogs( const CommonLogs& logs )
		{
		for( auto lg : logs )
		{
		if( verbose )
		std::cout << lg.first << " = " << lg.second << std::endl;
		log << "\"" << lg.first << "\" = \"" << lg.second << std::endl;
		}
		};

		void resetLogsMetadat() { this->logsMetadata.clear(); };

		void addLogsMetadata( const std::vector< String >& md )
		{
		this->logsMetadata.insert( this->logsMetadata.end(), md.begin(), md.end() );
		}

		void writeHeader()
		{
		for( auto md : this->logsMetadata )
		std::cout << md << "\t";
		std::cout << std::endl;
		}

		void writeRow( const RowElements& rowEls )
		{
		TNL_ASSERT_EQ( rowEls.size(), this->logsMetadata.size(), "" );
		auto md = this->logsMetadata.begin();
		for( auto el : rowEls )
		{
		if( verbose )
		std::cout << el << "\t";
		log << " \"" << *md++ << "\" = \"" << el << "," << std::endl;
		}
		}

		void
		writeTitle( const String & title )
		{
		@@ -178,7 +218,7 @@ public:
		std::cout << std::setw( 20 ) << it.second;
		}
		// spanning element is printed as usual column to stdout
		std::cout << std::setw( 15 ) << spanningElement;
		//std::cout << std::setw( 15 ) << spanningElement;
		for( auto & it : subElements ) {
		std::cout << std::setw( 15 ) << it;
		}
		@@ -279,6 +319,9 @@ protected:
		MetadataColumns metadataColumns;
		bool header_changed = true;
		std::vector< std::pair< String, int > > horizontalGroups;

		// new JSON implementation
		LogsMetadata logsMetadata;
		};

		} // namespace Benchmarks

src/Benchmarks/Logging.h

+15 −0

Original line number	Diff line number	Diff line
		@@ -81,6 +81,8 @@ public:
		using MetadataMap = std::map< const char*, String >;
		using MetadataColumns = std::vector<MetadataElement>;

		using CommonLogs = std::vector< std::pair< const char*, String > >;

		using HeaderElements = std::vector< String >;
		using RowElements = LoggingRowElements;

		@@ -102,6 +104,19 @@ public:
		log << ": title = " << title << std::endl;
		}

		void addCommonLogs( const CommonLogs& logs )
		{
		for( auto log : logs )
		{
		if( verbose )
		std::cout << log.first << " = " << log.second << std::endl;
		}
		};

		void addLogsMetadata( const std::vector< String >& md ){};

		void writeHeader(){};

		void
		writeMetadata( const MetadataMap & metadata )
		{

src/Benchmarks/SpMV/SpmvBenchmarkResult.h

+21 −7

Original line number	Diff line number	Diff line
		@@ -17,9 +17,10 @@ namespace Benchmarks {

		template< typename Real,
		typename Device,
		typename Index >
		typename Index,
		typename Logger = JsonLogging >
		struct SpmvBenchmarkResult
		: public BenchmarkResult
		: public BenchmarkResult< Logger >
		{
		using RealType = Real;
		using DeviceType = Device;
		@@ -27,23 +28,35 @@ struct SpmvBenchmarkResult
		using HostVector = Containers::Vector< Real, Devices::Host, Index >;
		using BenchmarkVector = Containers::Vector< Real, Device, Index >;

		SpmvBenchmarkResult( const HostVector& csrResult,
		using typename Logger::HeaderElements;
		using typename Logger::RowElements;
		using BenchmarkResult< Logger >::stddev;
		using BenchmarkResult< Logger >::bandwidth;
		using BenchmarkResult< Logger >::speedup;


		SpmvBenchmarkResult( const String& format,
		const HostVector& csrResult,
		const BenchmarkVector& benchmarkResult,
		const IndexType nonzeros )
		: csrResult( csrResult ), benchmarkResult( benchmarkResult ), nonzeros( nonzeros ){};
		: format( format ), csrResult( csrResult ), benchmarkResult( benchmarkResult ), nonzeros( nonzeros ){};

		virtual HeaderElements getTableHeader() const override
		{
		return HeaderElements( {"non-zeros", "time", "stddev", "stddev/time", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2"} );
		return HeaderElements( {"format", "device", "non-zeros", "time", "stddev", "stddev/time", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2"} );
		}

		void setFormat( const String& format ) { this->format = format; };

		virtual RowElements getRowElements() const override
		{
		HostVector benchmarkResultCopy;
		benchmarkResultCopy = benchmarkResult;
		auto diff = csrResult - benchmarkResultCopy;
		RowElements elements;
		elements << nonzeros << time << stddev << stddev/time << bandwidth;
		elements << format
		<< ( std::is_same< Device, Devices::Host >::value ? "CPU" : "GPU" )
		<< nonzeros << time << stddev << stddev/time << bandwidth;
		if( speedup != 0.0 )
		elements << speedup;
		else elements << "N/A";
		@@ -51,6 +64,7 @@ struct SpmvBenchmarkResult
		return elements;
		}

		String format;
		const HostVector& csrResult;
		const BenchmarkVector& benchmarkResult;
		const IndexType nonzeros;

src/Benchmarks/SpMV/spmv.h

+40 −26

Original line number	Diff line number	Diff line
		@@ -17,6 +17,7 @@
		#include <cstdint>

		#include "../Benchmarks.h"
		#include "../JsonLogging.h"
		#include "SpmvBenchmarkResult.h"

		#include <TNL/Pointers/DevicePointer.h>
		@@ -58,7 +59,9 @@ using namespace TNL::Matrices;

		namespace TNL {
		namespace Benchmarks {
		namespace SpMVLegacy {
		namespace SpMV {

		using BenchmarkType = TNL::Benchmarks::Benchmark< JsonLogging >;

		/////
		// General sparse matrix aliases
		@@ -218,7 +221,7 @@ std::string getFormatShort( const Matrix& matrix )
		}

		// Print information about the matrix.
		template< typename Matrix >
		/*template< typename Matrix >
		void printMatrixInfo( const Matrix& matrix,
		std::ostream& str )
		{
		@@ -226,13 +229,13 @@ void printMatrixInfo( const Matrix& matrix,
		str << " Rows: " << matrix.getRows() << std::endl;
		str << " Cols: " << matrix.getColumns() << std::endl;
		str << " Nonzero Elements: " << matrix.getNumberOfNonzeroMatrixElements() << std::endl;
		}
		}*/

		template< typename Real,
		template< typename, typename, typename > class Matrix,
		template< typename, typename, typename, typename > class Vector = Containers::Vector >
		void
		benchmarkSpMVLegacy( Benchmark<>& benchmark,
		benchmarkSpMVLegacy( BenchmarkType& benchmark,
		const TNL::Containers::Vector< Real, Devices::Host, int >& csrResultVector,
		const String& inputFileName,
		bool verboseMR )
		@@ -247,12 +250,12 @@ benchmarkSpMVLegacy( Benchmark<>& benchmark,

		SpMV::ReferenceFormats::Legacy::LegacyMatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR );

		benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({
		/*benchmark.setMetadataColumns( BenchmarkType::MetadataColumns({
		{ "matrix name", convertToString( inputFileName ) },
		{ "rows", convertToString( hostMatrix.getRows() ) },
		{ "columns", convertToString( hostMatrix.getColumns() ) },
		{ "matrix format", MatrixInfo< HostMatrix >::getFormat() }
		} ));
		} ));*/
		const int elements = hostMatrix.getNonzeroElementsCount();
		const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
		benchmark.setOperation( datasetSize );
		@@ -271,7 +274,7 @@ benchmarkSpMVLegacy( Benchmark<>& benchmark,
		hostMatrix.vectorProduct( hostInVector, hostOutVector );

		};
		SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() );
		SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() );
		benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults );

		/////
		@@ -289,7 +292,7 @@ benchmarkSpMVLegacy( Benchmark<>& benchmark,
		auto spmvCuda = [&]() {
		cudaMatrix.vectorProduct( cudaInVector, cudaOutVector );
		};
		SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() );
		SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() );
		benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults );
		#endif
		std::cout << std::endl;
		@@ -300,7 +303,7 @@ template< typename Real,
		template< typename, typename, typename > class Matrix,
		template< typename, typename, typename, typename > class Vector = Containers::Vector >
		void
		benchmarkSpMV( Benchmark<>& benchmark,
		benchmarkSpMV( BenchmarkType& benchmark,
		const InputMatrix& inputMatrix,
		const TNL::Containers::Vector< Real, Devices::Host, int >& csrResultVector,
		const String& inputFileName,
		@@ -322,7 +325,7 @@ benchmarkSpMV( Benchmark<>& benchmark,
		return;
		}

		benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({
		benchmark.setMetadataColumns( BenchmarkType::MetadataColumns({
		{ "matrix name", convertToString( inputFileName ) },
		{ "rows", convertToString( hostMatrix.getRows() ) },
		{ "columns", convertToString( hostMatrix.getColumns() ) },
		@@ -346,7 +349,7 @@ benchmarkSpMV( Benchmark<>& benchmark,
		hostMatrix.vectorProduct( hostInVector, hostOutVector );

		};
		SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() );
		SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() );
		benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults );

		/////
		@@ -365,7 +368,7 @@ benchmarkSpMV( Benchmark<>& benchmark,
		auto spmvCuda = [&]() {
		cudaMatrix.vectorProduct( cudaInVector, cudaOutVector );
		};
		SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() );
		SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() );
		benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults );
		#endif
		std::cout << std::endl;
		@@ -374,7 +377,7 @@ benchmarkSpMV( Benchmark<>& benchmark,
		template< typename Real = double,
		typename Index = int >
		void
		benchmarkSpmv( Benchmark<>& benchmark,
		benchmarkSpmv( BenchmarkType& benchmark,
		const String& inputFileName,
		const Config::ParameterContainer& parameters,
		bool verboseMR )
		@@ -417,12 +420,17 @@ benchmarkSpmv( Benchmark<>& benchmark,
		////
		// Perform benchmark on host with CSR as a reference CPU format
		//
		benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({
		benchmark.addCommonLogs( BenchmarkType::CommonLogs( {
		{ "matrix name", convertToString( inputFileName ) },
		{ "rows", convertToString( csrHostMatrix.getRows() ) },
		{ "columns", convertToString( csrHostMatrix.getColumns() ) } } ) );

		/*benchmark.setMetadataColumns( BenchmarkType::MetadataColumns({
		{ "matrix name", convertToString( inputFileName ) },
		{ "rows", convertToString( csrHostMatrix.getRows() ) },
		{ "columns", convertToString( csrHostMatrix.getColumns() ) },
		{ "matrix format", String( "CSR" ) }
		} ));
		} ));*/

		HostVector hostInVector( csrHostMatrix.getRows() ), hostOutVector( csrHostMatrix.getRows() );

		@@ -435,19 +443,21 @@ benchmarkSpmv( Benchmark<>& benchmark,
		csrHostMatrix.vectorProduct( hostInVector, hostOutVector );
		};

		SpmvBenchmarkResult< Real, Devices::Host, int > csrBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
		benchmark.time< Devices::Cuda >( resetHostVectors, "CPU", spmvCSRHost, csrBenchmarkResults );
		SpmvBenchmarkResult< Real, Devices::Host, int > csrBenchmarkResults( String( "CSR" ), hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
		benchmark.addLogsMetadata( csrBenchmarkResults.getTableHeader() );
		benchmark.writeHeader();
		benchmark.time< Devices::Host >( resetHostVectors, "", spmvCSRHost, csrBenchmarkResults );

		#ifdef HAVE_CUDA
		////
		// Perform benchmark on CUDA device with cuSparse as a reference GPU format
		//
		benchmark.setMetadataColumns( Benchmark::MetadataColumns({
		/*benchmark.setMetadataColumns( Benchmark::MetadataColumns({
		{ "matrix name", convertToString( inputFileName ) },
		{ "rows", convertToString( csrHostMatrix.getRows() ) },
		{ "columns", convertToString( csrHostMatrix.getColumns() ) },
		{ "matrix format", String( "cuSparse" ) }
		} ));
		} ));*/

		cusparseHandle_t cusparseHandle;
		cusparseCreate( &cusparseHandle );
		@@ -469,19 +479,20 @@ benchmarkSpmv( Benchmark<>& benchmark,
		cusparseMatrix.vectorProduct( cudaInVector, cudaOutVector );
		};

		SpmvBenchmarkResult< Real, Devices::Host, int > cudaBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
		SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( String( "cusprase" ), hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
		benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cudaBenchmarkResults );

		#ifdef HAVE_CSR5
		////
		// Perform benchmark on CUDA device with CSR5 as a reference GPU format
		//
		benchmark.setMetadataColumns( Benchmark::MetadataColumns({
		cudaBenchmarkResults.setFormat( String( "CSR5" ) );
		/*benchmark.setMetadataColumns( Benchmark::MetadataColumns({
		{ "matrix name", convertToString( inputFileName ) },
		{ "rows", convertToString( csrHostMatrix.getRows() ) },
		{ "columns", convertToString( csrHostMatrix.getColumns() ) },
		{ "matrix format", String( "CSR5" ) }
		} ));
		} ));*/

		CudaVector cudaOutVector2( cudaOutVector );
		CSR5Benchmark::CSR5Benchmark< CSRCudaMatrix > csr5Benchmark( csrCudaMatrix, cudaInVector, cudaOutVector );
		@@ -489,6 +500,7 @@ benchmarkSpmv( Benchmark<>& benchmark,
		auto csr5SpMV = [&]() {
		csr5Benchmark.vectorProduct();
		};

		benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", csr5SpMV, cudaBenchmarkResults );
		std::cerr << "CSR5 error = " << max( abs( cudaOutVector - cudaOutVector2 ) ) << std::endl;
		csrCudaMatrix.reset();
		@@ -497,12 +509,13 @@ benchmarkSpmv( Benchmark<>& benchmark,
		////
		// Perform benchmark on CUDA device with LightSpMV as a reference GPU format
		//
		benchmark.setMetadataColumns( Benchmark::MetadataColumns({
		cudaBenchmarkResults.setFormat( String( "LightSpMV Vector" ) );
		/*benchmark.setMetadataColumns( Benchmark::MetadataColumns({
		{ "matrix name", convertToString( inputFileName ) },
		{ "rows", convertToString( csrHostMatrix.getRows() ) },
		{ "columns", convertToString( csrHostMatrix.getColumns() ) },
		{ "matrix format", String( "LightSpMV Vector" ) }
		} ));
		} ));*/

		LightSpMVCSRHostMatrix lightSpMVCSRHostMatrix;
		lightSpMVCSRHostMatrix = csrHostMatrix;
		@@ -516,12 +529,13 @@ benchmarkSpmv( Benchmark<>& benchmark,
		};
		benchmark.time< Devices::Cuda >( resetLightSpMVVectors, "GPU", spmvLightSpMV, cudaBenchmarkResults );

		benchmark.setMetadataColumns( Benchmark::MetadataColumns({
		cudaBenchmarkResults.setFormat( String( "LightSpMV Warp" ) );
		/*benchmark.setMetadataColumns( Benchmark::MetadataColumns({
		{ "matrix name", convertToString( inputFileName ) },
		{ "rows", convertToString( csrHostMatrix.getRows() ) },
		{ "columns", convertToString( csrHostMatrix.getColumns() ) },
		{ "matrix format", String( "LightSpMV Warp" ) }
		} ));
		} ));*/
		lightSpMVBenchmark.setKernelType( LightSpMVBenchmarkKernelWarp );
		benchmark.time< Devices::Cuda >( resetLightSpMVVectors, "GPU", spmvLightSpMV, cudaBenchmarkResults );
		#endif