Merge branch 'TO/matrices' into 'develop' (2da7e995) · Commits · TNL / tnl-dev

src/Benchmarks/SpMV/SpmvBenchmarkResult.h

0 → 100644

+52 −0

Original line number	Diff line number	Diff line
		/***************************************************************************
		SpmvBenchmarkResult.h - description
		-------------------
		begin : Mar 5, 2020
		copyright : (C) 2020 by Tomas Oberhuber
		email : tomas.oberhuber@fjfi.cvut.cz
		***************************************************************************/

		/* See Copyright Notice in tnl/Copyright */

		#pragma once

		#include "../Benchmarks.h"

		namespace TNL {
		namespace Benchmarks {

		template< typename Real = double,
		typename Index = int >
		struct SpmvBenchmarkResult
		: public BenchmarkResult
		{
		using RealType = Real;
		using IndexType = Index;
		using HostVector = Containers::Vector< Real, Devices::Host, Index >;
		using CudaVector = Containers::Vector< Real, Devices::Cuda, Index >;

		SpmvBenchmarkResult( CudaVector& cudaResult, HostVector& hostResult, CudaVector& cusparseResult )
		: hostResult( hostResult ), cudaResult( cudaResult), cusparseResult( cusparseResult ){};

		virtual HeaderElements getTableHeader() const override
		{
		return HeaderElements({"time", "stddev", "stddev/time", "speedup", "Host.Diff.Max", "Host.Diff.L2", "Cusparse.Diff.Max", "Cusparse.Diff.L2"});
		}

		virtual RowElements getRowElements() const override
		{
		HostVector cudaCopy, cusparseCopy, a, b;
		cudaCopy = cudaResult;
		cusparseCopy = cusparseResult;
		a = cudaCopy - hostResult;
		b = cudaCopy - cusparseCopy;
		return RowElements({ time, stddev, stddev/time, speedup, max( abs( a ) ), lpNorm( a, 2.0 ), max( abs( b ) ), lpNorm( b, 2.0 ) });
		}

		HostVector &hostResult;

		CudaVector &cudaResult, &cusparseResult;
		};

		} //namespace Benchmarks
		} //namespace TNL

src/Benchmarks/SpMV/spmv.h

+102 −132

Original line number	Diff line number	Diff line
		@@ -15,6 +15,7 @@
		#pragma once

		#include "../Benchmarks.h"
		#include "SpmvBenchmarkResult.h"

		#include <TNL/Pointers/DevicePointer.h>
		#include <TNL/Matrices/Legacy/CSR.h>
		@@ -114,8 +115,8 @@ benchmarkSpMV( Benchmark& benchmark,
		bool verboseMR )
		{
		// Setup CSR for cuSPARSE. It will compared to the format given as a template parameter to this function
		typedef Matrices::Legacy::CSR< Real, Devices::Host, int > CSR_HostMatrix;
		typedef Matrices::Legacy::CSR< Real, Devices::Cuda, int > CSR_DeviceMatrix;
		using CSR_HostMatrix = Matrices::Legacy::CSR< Real, Devices::Host, int >;
		using CSR_DeviceMatrix = Matrices::Legacy::CSR< Real, Devices::Cuda, int >;

		CSR_HostMatrix CSRhostMatrix;
		CSR_DeviceMatrix CSRdeviceMatrix;
		@@ -148,7 +149,7 @@ benchmarkSpMV( Benchmark& benchmark,
		HostMatrix hostMatrix;
		DeviceMatrix deviceMatrix;
		HostVector hostVector, hostVector2;
		CudaVector deviceVector, deviceVector2;
		CudaVector deviceVector, deviceVector2, cusparseVector;

		// Load the format
		MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR );
		@@ -171,20 +172,26 @@ benchmarkSpMV( Benchmark& benchmark,
		deviceMatrix = hostMatrix;
		deviceVector.setSize( hostMatrix.getColumns() );
		deviceVector2.setSize( hostMatrix.getRows() );
		cusparseVector.setSize( hostMatrix.getRows() );
		#endif

		// reset function
		auto reset = [&]() {
		hostVector.setValue( 1.0 );
		hostVector2.setValue( 0.0 );
		auto resetHostVectors = [&]() {
		hostVector = 1.0;
		hostVector2 = 0.0;
		};
		#ifdef HAVE_CUDA
		deviceVector.setValue( 1.0 );
		deviceVector2.setValue( 0.0 );
		#endif
		auto resetCudaVectors = [&]() {
		deviceVector = 1.0;
		deviceVector2 = 0.0;
		};
		auto resetCusparseVectors = [&]() {
		deviceVector = 1.0;
		cusparseVector == 0.0;
		};
		#endif

		const int elements = hostMatrix.getNumberOfNonzeroMatrixElements();

		const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;

		// compute functions
		@@ -197,12 +204,12 @@ benchmarkSpMV( Benchmark& benchmark,
		};

		auto spmvCusparse = [&]() {
		cusparseCSR.vectorProduct( deviceVector, deviceVector2 );
		cusparseCSR.vectorProduct( deviceVector, cusparseVector );
		};
		#endif

		benchmark.setOperation( datasetSize );
		benchmark.time< Devices::Host >( reset, "CPU", spmvHost );
		benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost );

		// Initialize the host vector to be compared.
		// (The values in hostVector2 will be reset when spmvCuda starts)
		@@ -214,7 +221,7 @@ benchmarkSpMV( Benchmark& benchmark,
		resultHostVector2 = hostVector2;

		#ifdef HAVE_CUDA
		benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda );
		benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda );

		// Initialize the device vector to be compared.
		// (The values in deviceVector2 will be reset when spmvCusparse starts)
		@@ -235,46 +242,9 @@ benchmarkSpMV( Benchmark& benchmark,
		{ "matrix format", convertToString( "CSR-cuSPARSE-" + getFormatShort( hostMatrix ) ) }
		} ));

		benchmark.time< Devices::Cuda >( reset, "GPU", spmvCusparse );

		HostVector resultcuSPARSEDeviceVector2;
		resultcuSPARSEDeviceVector2.setSize( deviceVector2.getSize() );
		resultcuSPARSEDeviceVector2.setValue( 0.0 );

		resultcuSPARSEDeviceVector2 = deviceVector2;

		// Difference between GPU (current format) and GPU-cuSPARSE results
		//Real cuSparseDifferenceAbsMax = resultDeviceVector2.differenceAbsMax( resultcuSPARSEDeviceVector2 );
		Real cuSparseDifferenceAbsMax = max( abs( resultDeviceVector2 - resultcuSPARSEDeviceVector2 ) );
		//Real cuSparseDifferenceLpNorm = resultDeviceVector2.differenceLpNorm( resultcuSPARSEDeviceVector2, 1 );
		Real cuSparseDifferenceLpNorm = lpNorm( resultDeviceVector2 - resultcuSPARSEDeviceVector2, 1 );

		std::string GPUxGPUcuSparse_resultDifferenceAbsMax = "GPUxGPUcuSPARSE differenceAbsMax = " + std::to_string( cuSparseDifferenceAbsMax );
		std::string GPUxGPUcuSparse_resultDifferenceLpNorm = "GPUxGPUcuSPARSE differenceLpNorm = " + std::to_string( cuSparseDifferenceLpNorm );

		char *GPUcuSparse_absMax = &GPUxGPUcuSparse_resultDifferenceAbsMax[ 0u ];
		char *GPUcuSparse_lpNorm = &GPUxGPUcuSparse_resultDifferenceLpNorm[ 0u ];


		// Difference between CPU and GPU results for the current format
		//Real differenceAbsMax = resultHostVector2.differenceAbsMax( resultDeviceVector2 );
		Real differenceAbsMax = max( abs( resultHostVector2 - resultDeviceVector2 ) );
		//Real differenceLpNorm = resultHostVector2.differenceLpNorm( resultDeviceVector2, 1 );
		Real differenceLpNorm = lpNorm( resultHostVector2 - resultDeviceVector2, 1 );

		std::string CPUxGPU_resultDifferenceAbsMax = "CPUxGPU differenceAbsMax = " + std::to_string( differenceAbsMax );
		std::string CPUxGPU_resultDifferenceLpNorm = "CPUxGPU differenceLpNorm = " + std::to_string( differenceLpNorm );

		char *CPUxGPU_absMax = &CPUxGPU_resultDifferenceAbsMax[ 0u ];
		char *CPUxGPU_lpNorm = &CPUxGPU_resultDifferenceLpNorm[ 0u ];

		// Print result differences of CPU and GPU of current format
		std::cout << CPUxGPU_absMax << std::endl;
		std::cout << CPUxGPU_lpNorm << std::endl;
		SpmvBenchmarkResult< Real, int > benchmarkResult( deviceVector2, hostVector2, cusparseVector );
		benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, benchmarkResult );

		// Print result differences of GPU of current format and GPU with cuSPARSE.
		std::cout << GPUcuSparse_absMax << std::endl;
		std::cout << GPUcuSparse_lpNorm << std::endl;
		#endif

		std::cout << std::endl;