Programming SPMV benchmark. (0c6f26e5) · Commits · TNL / tnl-dev

tests/benchmarks/tnl-benchmark-spmv.h

+105 −50

Original line number	Diff line number	Diff line
		@@ -130,7 +130,7 @@ double computeThroughput( const long int nonzeroElements,

		template< typename Matrix,
		typename Vector >
		void benchmarkHostMatrix( const Matrix& matrix,
		void benchmarkMatrix( const Matrix& matrix,
		const Vector& x,
		Vector& b,
		const long int nonzeroElements,
		@@ -160,6 +160,12 @@ void benchmarkHostMatrix( const Matrix& matrix,
		logFile << " " << throughput << endl;
		}

		void writeTestFailed( fstream& logFile )
		{
		logFile << "N/A" << endl;
		logFile << "N/A" << endl;
		}

		template< typename Real >
		bool setupBenchmark( const tnlParameterContainer& parameters )
		{
		@@ -213,7 +219,7 @@ bool setupBenchmark( const tnlParameterContainer& parameters )
		rowLengthsCuda.setSize( csrMatrix.getRows() );
		rowLengthsCuda = rowLengthsHost;
		#endif
		benchmarkHostMatrix( csrMatrix,
		benchmarkMatrix( csrMatrix,
		hostX,
		hostB,
		nonzeroElements,
		@@ -226,7 +232,7 @@ bool setupBenchmark( const tnlParameterContainer& parameters )
		typedef tnlEllpackMatrix< Real, tnlHost, int > EllpackMatrixType;
		EllpackMatrixType ellpackMatrix;
		ellpackMatrix.copyFrom( csrMatrix, rowLengthsHost );
		benchmarkHostMatrix( ellpackMatrix,
		benchmarkMatrix( ellpackMatrix,
		hostX,
		hostB,
		nonzeroElements,
		@@ -241,10 +247,12 @@ bool setupBenchmark( const tnlParameterContainer& parameters )
		if( ! cudaEllpackMatrix.copyFrom( ellpackMatrix, rowLengthsCuda ) )
		{
		cerr << "I am not able to transfer the matrix on GPU." << endl;
		return false;
		writeTestFailed( logFile );
		}
		else
		{
		cout << " done." << endl;
		benchmarkHostMatrix( cudaEllpackMatrix,
		benchmarkMatrix( cudaEllpackMatrix,
		cudaX,
		cudaB,
		nonzeroElements,
		@@ -252,6 +260,7 @@ bool setupBenchmark( const tnlParameterContainer& parameters )
		stopTime,
		verbose,
		logFile );
		}
		cudaEllpackMatrix.reset();
		#endif
		ellpackMatrix.reset();
		@@ -259,7 +268,7 @@ bool setupBenchmark( const tnlParameterContainer& parameters )
		typedef tnlSlicedEllpackMatrix< Real, tnlHost, int > SlicedEllpackMatrixType;
		SlicedEllpackMatrixType slicedEllpackMatrix;
		slicedEllpackMatrix.copyFrom( csrMatrix, rowLengthsHost );
		benchmarkHostMatrix( slicedEllpackMatrix,
		benchmarkMatrix( slicedEllpackMatrix,
		hostX,
		hostB,
		nonzeroElements,
		@@ -267,12 +276,35 @@ bool setupBenchmark( const tnlParameterContainer& parameters )
		stopTime,
		verbose,
		logFile );
		#ifdef HAVE_CUDA
		typedef tnlSlicedEllpackMatrix< Real, tnlCuda, int > SlicedEllpackMatrixCudaType;
		SlicedEllpackMatrixCudaType cudaSlicedEllpackMatrix;
		cout << "Copying matrix to GPU... ";
		if( ! cudaSlicedEllpackMatrix.copyFrom( slicedEllpackMatrix, rowLengthsCuda ) )
		{
		cerr << "I am not able to transfer the matrix on GPU." << endl;
		writeTestFailed( logFile );
		}
		else
		{
		cout << " done." << endl;
		benchmarkMatrix( cudaSlicedEllpackMatrix,
		cudaX,
		cudaB,
		nonzeroElements,
		"SlicedEllpack Cuda",
		stopTime,
		verbose,
		logFile );
		}
		cudaSlicedEllpackMatrix.reset();
		#endif
		slicedEllpackMatrix.reset();

		typedef tnlChunkedEllpackMatrix< Real, tnlHost, int > ChunkedEllpackMatrixType;
		ChunkedEllpackMatrixType chunkedEllpackMatrix;
		chunkedEllpackMatrix.copyFrom( csrMatrix, rowLengthsHost );
		benchmarkHostMatrix( chunkedEllpackMatrix,
		benchmarkMatrix( chunkedEllpackMatrix,
		hostX,
		hostB,
		nonzeroElements,
		@@ -280,6 +312,29 @@ bool setupBenchmark( const tnlParameterContainer& parameters )
		stopTime,
		verbose,
		logFile );
		#ifdef HAVE_CUDA
		typedef tnlChunkedEllpackMatrix< Real, tnlCuda, int > ChunkedEllpackMatrixCudaType;
		ChunkedEllpackMatrixCudaType cudaChunkedEllpackMatrix;
		cout << "Copying matrix to GPU... ";
		if( ! cudaChunkedEllpackMatrix.copyFrom( chunkedEllpackMatrix, rowLengthsCuda ) )
		{
		cerr << "I am not able to transfer the matrix on GPU." << endl;
		writeTestFailed( logFile );
		}
		else
		{
		cout << " done." << endl;
		benchmarkMatrix( cudaChunkedEllpackMatrix,
		cudaX,
		cudaB,
		nonzeroElements,
		"ChunkedEllpack Cuda",
		stopTime,
		verbose,
		logFile );
		}
		cudaChunkedEllpackMatrix.reset();
		#endif
		chunkedEllpackMatrix.reset();
		}
		}