Loading src/Benchmarks/SpMV/SpmvBenchmarkResult.h 0 → 100644 +52 −0 Original line number Diff line number Diff line /*************************************************************************** SpmvBenchmarkResult.h - description ------------------- begin : Mar 5, 2020 copyright : (C) 2020 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ #pragma once #include "../Benchmarks.h" namespace TNL { namespace Benchmarks { template< typename Real = double, typename Index = int > struct SpmvBenchmarkResult : public BenchmarkResult { using RealType = Real; using IndexType = Index; using HostVector = Containers::Vector< Real, Devices::Host, Index >; using CudaVector = Containers::Vector< Real, Devices::Cuda, Index >; SpmvBenchmarkResult( CudaVector& cudaResult, HostVector& hostResult, CudaVector& cusparseResult ) : hostResult( hostResult ), cudaResult( cudaResult), cusparseResult( cusparseResult ){}; virtual HeaderElements getTableHeader() const override { return HeaderElements({"time", "stddev", "stddev/time", "speedup", "Host.Diff.Max", "Host.Diff.L2", "Cusparse.Diff.Max", "Cusparse.Diff.L2"}); } virtual RowElements getRowElements() const override { HostVector cudaCopy, cusparseCopy, a, b; cudaCopy = cudaResult; cusparseCopy = cusparseResult; a = cudaCopy - hostResult; b = cudaCopy - cusparseCopy; return RowElements({ time, stddev, stddev/time, speedup, max( abs( a ) ), lpNorm( a, 2.0 ), max( abs( b ) ), lpNorm( b, 2.0 ) }); } HostVector &hostResult; CudaVector &cudaResult, &cusparseResult; }; } //namespace Benchmarks } //namespace TNL src/Benchmarks/SpMV/spmv.h +102 −132 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ #pragma once #include "../Benchmarks.h" #include "SpmvBenchmarkResult.h" #include <TNL/Pointers/DevicePointer.h> #include <TNL/Matrices/Legacy/CSR.h> Loading Loading @@ -114,8 +115,8 @@ benchmarkSpMV( Benchmark& benchmark, bool verboseMR ) { // Setup CSR for cuSPARSE. It will compared to the format given as a template parameter to this function typedef Matrices::Legacy::CSR< Real, Devices::Host, int > CSR_HostMatrix; typedef Matrices::Legacy::CSR< Real, Devices::Cuda, int > CSR_DeviceMatrix; using CSR_HostMatrix = Matrices::Legacy::CSR< Real, Devices::Host, int >; using CSR_DeviceMatrix = Matrices::Legacy::CSR< Real, Devices::Cuda, int >; CSR_HostMatrix CSRhostMatrix; CSR_DeviceMatrix CSRdeviceMatrix; Loading Loading @@ -148,7 +149,7 @@ benchmarkSpMV( Benchmark& benchmark, HostMatrix hostMatrix; DeviceMatrix deviceMatrix; HostVector hostVector, hostVector2; CudaVector deviceVector, deviceVector2; CudaVector deviceVector, deviceVector2, cusparseVector; // Load the format MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR ); Loading @@ -171,20 +172,26 @@ benchmarkSpMV( Benchmark& benchmark, deviceMatrix = hostMatrix; deviceVector.setSize( hostMatrix.getColumns() ); deviceVector2.setSize( hostMatrix.getRows() ); cusparseVector.setSize( hostMatrix.getRows() ); #endif // reset function auto reset = [&]() { hostVector.setValue( 1.0 ); hostVector2.setValue( 0.0 ); auto resetHostVectors = [&]() { hostVector = 1.0; hostVector2 = 0.0; }; #ifdef HAVE_CUDA deviceVector.setValue( 1.0 ); deviceVector2.setValue( 0.0 ); #endif auto resetCudaVectors = [&]() { deviceVector = 1.0; deviceVector2 = 0.0; }; auto resetCusparseVectors = [&]() { deviceVector = 1.0; cusparseVector == 0.0; }; #endif const int elements = hostMatrix.getNumberOfNonzeroMatrixElements(); const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB; // compute functions Loading @@ -197,12 +204,12 @@ benchmarkSpMV( Benchmark& benchmark, }; auto spmvCusparse = [&]() { cusparseCSR.vectorProduct( deviceVector, deviceVector2 ); cusparseCSR.vectorProduct( deviceVector, cusparseVector ); }; #endif benchmark.setOperation( datasetSize ); benchmark.time< Devices::Host >( reset, "CPU", spmvHost ); benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost ); // Initialize the host vector to be compared. // (The values in hostVector2 will be reset when spmvCuda starts) Loading @@ -214,7 +221,7 @@ benchmarkSpMV( Benchmark& benchmark, resultHostVector2 = hostVector2; #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda ); benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda ); // Initialize the device vector to be compared. // (The values in deviceVector2 will be reset when spmvCusparse starts) Loading @@ -235,46 +242,9 @@ benchmarkSpMV( Benchmark& benchmark, { "matrix format", convertToString( "CSR-cuSPARSE-" + getFormatShort( hostMatrix ) ) } } )); benchmark.time< Devices::Cuda >( reset, "GPU", spmvCusparse ); HostVector resultcuSPARSEDeviceVector2; resultcuSPARSEDeviceVector2.setSize( deviceVector2.getSize() ); resultcuSPARSEDeviceVector2.setValue( 0.0 ); resultcuSPARSEDeviceVector2 = deviceVector2; // Difference between GPU (current format) and GPU-cuSPARSE results //Real cuSparseDifferenceAbsMax = resultDeviceVector2.differenceAbsMax( resultcuSPARSEDeviceVector2 ); Real cuSparseDifferenceAbsMax = max( abs( resultDeviceVector2 - resultcuSPARSEDeviceVector2 ) ); //Real cuSparseDifferenceLpNorm = resultDeviceVector2.differenceLpNorm( resultcuSPARSEDeviceVector2, 1 ); Real cuSparseDifferenceLpNorm = lpNorm( resultDeviceVector2 - resultcuSPARSEDeviceVector2, 1 ); std::string GPUxGPUcuSparse_resultDifferenceAbsMax = "GPUxGPUcuSPARSE differenceAbsMax = " + std::to_string( cuSparseDifferenceAbsMax ); std::string GPUxGPUcuSparse_resultDifferenceLpNorm = "GPUxGPUcuSPARSE differenceLpNorm = " + std::to_string( cuSparseDifferenceLpNorm ); char *GPUcuSparse_absMax = &GPUxGPUcuSparse_resultDifferenceAbsMax[ 0u ]; char *GPUcuSparse_lpNorm = &GPUxGPUcuSparse_resultDifferenceLpNorm[ 0u ]; // Difference between CPU and GPU results for the current format //Real differenceAbsMax = resultHostVector2.differenceAbsMax( resultDeviceVector2 ); Real differenceAbsMax = max( abs( resultHostVector2 - resultDeviceVector2 ) ); //Real differenceLpNorm = resultHostVector2.differenceLpNorm( resultDeviceVector2, 1 ); Real differenceLpNorm = lpNorm( resultHostVector2 - resultDeviceVector2, 1 ); std::string CPUxGPU_resultDifferenceAbsMax = "CPUxGPU differenceAbsMax = " + std::to_string( differenceAbsMax ); std::string CPUxGPU_resultDifferenceLpNorm = "CPUxGPU differenceLpNorm = " + std::to_string( differenceLpNorm ); char *CPUxGPU_absMax = &CPUxGPU_resultDifferenceAbsMax[ 0u ]; char *CPUxGPU_lpNorm = &CPUxGPU_resultDifferenceLpNorm[ 0u ]; // Print result differences of CPU and GPU of current format std::cout << CPUxGPU_absMax << std::endl; std::cout << CPUxGPU_lpNorm << std::endl; SpmvBenchmarkResult< Real, int > benchmarkResult( deviceVector2, hostVector2, cusparseVector ); benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, benchmarkResult ); // Print result differences of GPU of current format and GPU with cuSPARSE. std::cout << GPUcuSparse_absMax << std::endl; std::cout << GPUcuSparse_lpNorm << std::endl; #endif std::cout << std::endl; Loading Loading
src/Benchmarks/SpMV/SpmvBenchmarkResult.h 0 → 100644 +52 −0 Original line number Diff line number Diff line /*************************************************************************** SpmvBenchmarkResult.h - description ------------------- begin : Mar 5, 2020 copyright : (C) 2020 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ #pragma once #include "../Benchmarks.h" namespace TNL { namespace Benchmarks { template< typename Real = double, typename Index = int > struct SpmvBenchmarkResult : public BenchmarkResult { using RealType = Real; using IndexType = Index; using HostVector = Containers::Vector< Real, Devices::Host, Index >; using CudaVector = Containers::Vector< Real, Devices::Cuda, Index >; SpmvBenchmarkResult( CudaVector& cudaResult, HostVector& hostResult, CudaVector& cusparseResult ) : hostResult( hostResult ), cudaResult( cudaResult), cusparseResult( cusparseResult ){}; virtual HeaderElements getTableHeader() const override { return HeaderElements({"time", "stddev", "stddev/time", "speedup", "Host.Diff.Max", "Host.Diff.L2", "Cusparse.Diff.Max", "Cusparse.Diff.L2"}); } virtual RowElements getRowElements() const override { HostVector cudaCopy, cusparseCopy, a, b; cudaCopy = cudaResult; cusparseCopy = cusparseResult; a = cudaCopy - hostResult; b = cudaCopy - cusparseCopy; return RowElements({ time, stddev, stddev/time, speedup, max( abs( a ) ), lpNorm( a, 2.0 ), max( abs( b ) ), lpNorm( b, 2.0 ) }); } HostVector &hostResult; CudaVector &cudaResult, &cusparseResult; }; } //namespace Benchmarks } //namespace TNL
src/Benchmarks/SpMV/spmv.h +102 −132 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ #pragma once #include "../Benchmarks.h" #include "SpmvBenchmarkResult.h" #include <TNL/Pointers/DevicePointer.h> #include <TNL/Matrices/Legacy/CSR.h> Loading Loading @@ -114,8 +115,8 @@ benchmarkSpMV( Benchmark& benchmark, bool verboseMR ) { // Setup CSR for cuSPARSE. It will compared to the format given as a template parameter to this function typedef Matrices::Legacy::CSR< Real, Devices::Host, int > CSR_HostMatrix; typedef Matrices::Legacy::CSR< Real, Devices::Cuda, int > CSR_DeviceMatrix; using CSR_HostMatrix = Matrices::Legacy::CSR< Real, Devices::Host, int >; using CSR_DeviceMatrix = Matrices::Legacy::CSR< Real, Devices::Cuda, int >; CSR_HostMatrix CSRhostMatrix; CSR_DeviceMatrix CSRdeviceMatrix; Loading Loading @@ -148,7 +149,7 @@ benchmarkSpMV( Benchmark& benchmark, HostMatrix hostMatrix; DeviceMatrix deviceMatrix; HostVector hostVector, hostVector2; CudaVector deviceVector, deviceVector2; CudaVector deviceVector, deviceVector2, cusparseVector; // Load the format MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR ); Loading @@ -171,20 +172,26 @@ benchmarkSpMV( Benchmark& benchmark, deviceMatrix = hostMatrix; deviceVector.setSize( hostMatrix.getColumns() ); deviceVector2.setSize( hostMatrix.getRows() ); cusparseVector.setSize( hostMatrix.getRows() ); #endif // reset function auto reset = [&]() { hostVector.setValue( 1.0 ); hostVector2.setValue( 0.0 ); auto resetHostVectors = [&]() { hostVector = 1.0; hostVector2 = 0.0; }; #ifdef HAVE_CUDA deviceVector.setValue( 1.0 ); deviceVector2.setValue( 0.0 ); #endif auto resetCudaVectors = [&]() { deviceVector = 1.0; deviceVector2 = 0.0; }; auto resetCusparseVectors = [&]() { deviceVector = 1.0; cusparseVector == 0.0; }; #endif const int elements = hostMatrix.getNumberOfNonzeroMatrixElements(); const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB; // compute functions Loading @@ -197,12 +204,12 @@ benchmarkSpMV( Benchmark& benchmark, }; auto spmvCusparse = [&]() { cusparseCSR.vectorProduct( deviceVector, deviceVector2 ); cusparseCSR.vectorProduct( deviceVector, cusparseVector ); }; #endif benchmark.setOperation( datasetSize ); benchmark.time< Devices::Host >( reset, "CPU", spmvHost ); benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost ); // Initialize the host vector to be compared. // (The values in hostVector2 will be reset when spmvCuda starts) Loading @@ -214,7 +221,7 @@ benchmarkSpMV( Benchmark& benchmark, resultHostVector2 = hostVector2; #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda ); benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda ); // Initialize the device vector to be compared. // (The values in deviceVector2 will be reset when spmvCusparse starts) Loading @@ -235,46 +242,9 @@ benchmarkSpMV( Benchmark& benchmark, { "matrix format", convertToString( "CSR-cuSPARSE-" + getFormatShort( hostMatrix ) ) } } )); benchmark.time< Devices::Cuda >( reset, "GPU", spmvCusparse ); HostVector resultcuSPARSEDeviceVector2; resultcuSPARSEDeviceVector2.setSize( deviceVector2.getSize() ); resultcuSPARSEDeviceVector2.setValue( 0.0 ); resultcuSPARSEDeviceVector2 = deviceVector2; // Difference between GPU (current format) and GPU-cuSPARSE results //Real cuSparseDifferenceAbsMax = resultDeviceVector2.differenceAbsMax( resultcuSPARSEDeviceVector2 ); Real cuSparseDifferenceAbsMax = max( abs( resultDeviceVector2 - resultcuSPARSEDeviceVector2 ) ); //Real cuSparseDifferenceLpNorm = resultDeviceVector2.differenceLpNorm( resultcuSPARSEDeviceVector2, 1 ); Real cuSparseDifferenceLpNorm = lpNorm( resultDeviceVector2 - resultcuSPARSEDeviceVector2, 1 ); std::string GPUxGPUcuSparse_resultDifferenceAbsMax = "GPUxGPUcuSPARSE differenceAbsMax = " + std::to_string( cuSparseDifferenceAbsMax ); std::string GPUxGPUcuSparse_resultDifferenceLpNorm = "GPUxGPUcuSPARSE differenceLpNorm = " + std::to_string( cuSparseDifferenceLpNorm ); char *GPUcuSparse_absMax = &GPUxGPUcuSparse_resultDifferenceAbsMax[ 0u ]; char *GPUcuSparse_lpNorm = &GPUxGPUcuSparse_resultDifferenceLpNorm[ 0u ]; // Difference between CPU and GPU results for the current format //Real differenceAbsMax = resultHostVector2.differenceAbsMax( resultDeviceVector2 ); Real differenceAbsMax = max( abs( resultHostVector2 - resultDeviceVector2 ) ); //Real differenceLpNorm = resultHostVector2.differenceLpNorm( resultDeviceVector2, 1 ); Real differenceLpNorm = lpNorm( resultHostVector2 - resultDeviceVector2, 1 ); std::string CPUxGPU_resultDifferenceAbsMax = "CPUxGPU differenceAbsMax = " + std::to_string( differenceAbsMax ); std::string CPUxGPU_resultDifferenceLpNorm = "CPUxGPU differenceLpNorm = " + std::to_string( differenceLpNorm ); char *CPUxGPU_absMax = &CPUxGPU_resultDifferenceAbsMax[ 0u ]; char *CPUxGPU_lpNorm = &CPUxGPU_resultDifferenceLpNorm[ 0u ]; // Print result differences of CPU and GPU of current format std::cout << CPUxGPU_absMax << std::endl; std::cout << CPUxGPU_lpNorm << std::endl; SpmvBenchmarkResult< Real, int > benchmarkResult( deviceVector2, hostVector2, cusparseVector ); benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, benchmarkResult ); // Print result differences of GPU of current format and GPU with cuSPARSE. std::cout << GPUcuSparse_absMax << std::endl; std::cout << GPUcuSparse_lpNorm << std::endl; #endif std::cout << std::endl; Loading