Commit a147f47a authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Improving SpMV benchmark with benchmark results.

parent ab65956e
Loading
Loading
Loading
Loading
+52 −0
Original line number Diff line number Diff line
/***************************************************************************
                          SpmvBenchmarkResult.h  -  description
                             -------------------
    begin                : Mar 5, 2020
    copyright            : (C) 2020 by Tomas Oberhuber
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

/* See Copyright Notice in tnl/Copyright */

#pragma once

#include "../Benchmarks.h"

namespace TNL {
namespace Benchmarks {

template< typename Real = double,
          typename Index = int >
struct SpmvBenchmarkResult
: public BenchmarkResult
{
   using RealType = Real;
   using IndexType = Index;
   using HostVector = Containers::Vector< Real, Devices::Host, Index >;
   using CudaVector = Containers::Vector< Real, Devices::Cuda, Index >;

   SpmvBenchmarkResult( CudaVector& cudaResult, HostVector& hostResult, CudaVector& cusparseResult )
   : hostResult( hostResult ), cudaResult( cudaResult), cusparseResult( cusparseResult ){};

   virtual HeaderElements getTableHeader() const override
   {
      return HeaderElements({"time", "stddev", "stddev/time", "speedup", "Host.Diff.Max", "Host.Diff.L2", "Cusparse.Diff.Max", "Cusparse.Diff.L2"});
   }

   virtual RowElements getRowElements() const override
   {
      HostVector cudaCopy, cusparseCopy, a, b;
      cudaCopy = cudaResult;
      cusparseCopy = cusparseResult;
      a = cudaCopy - hostResult;
      b = cudaCopy - cusparseCopy;
      return RowElements({ time, stddev, stddev/time, speedup, max( abs( a ) ), lpNorm( a, 2.0 ), max( abs( b ) ), lpNorm( b, 2.0 ) });
   }

   HostVector &hostResult;

   CudaVector &cudaResult, &cusparseResult;
};
   
} //namespace Benchmarks
} //namespace TNL
+102 −132
Original line number Diff line number Diff line
@@ -15,6 +15,7 @@
#pragma once

#include "../Benchmarks.h"
#include "SpmvBenchmarkResult.h"

#include <TNL/Pointers/DevicePointer.h>
#include <TNL/Matrices/Legacy/CSR.h>
@@ -114,8 +115,8 @@ benchmarkSpMV( Benchmark& benchmark,
               bool verboseMR )
{
   // Setup CSR for cuSPARSE. It will compared to the format given as a template parameter to this function
    typedef Matrices::Legacy::CSR< Real, Devices::Host, int > CSR_HostMatrix;
    typedef Matrices::Legacy::CSR< Real, Devices::Cuda, int > CSR_DeviceMatrix;
   using CSR_HostMatrix = Matrices::Legacy::CSR< Real, Devices::Host, int >;
   using CSR_DeviceMatrix = Matrices::Legacy::CSR< Real, Devices::Cuda, int >;

   CSR_HostMatrix CSRhostMatrix;
   CSR_DeviceMatrix CSRdeviceMatrix;
@@ -148,7 +149,7 @@ benchmarkSpMV( Benchmark& benchmark,
   HostMatrix hostMatrix;
   DeviceMatrix deviceMatrix;
   HostVector hostVector, hostVector2;
    CudaVector deviceVector, deviceVector2;
   CudaVector deviceVector, deviceVector2, cusparseVector;

   // Load the format
   MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR );
@@ -171,20 +172,26 @@ benchmarkSpMV( Benchmark& benchmark,
   deviceMatrix = hostMatrix;
   deviceVector.setSize( hostMatrix.getColumns() );
   deviceVector2.setSize( hostMatrix.getRows() );
   cusparseVector.setSize( hostMatrix.getRows() );
#endif

   // reset function
    auto reset = [&]() {
       hostVector.setValue( 1.0 );
       hostVector2.setValue( 0.0 );
   auto resetHostVectors = [&]() {
      hostVector = 1.0;
      hostVector2 = 0.0;
   };
#ifdef HAVE_CUDA
       deviceVector.setValue( 1.0 );
       deviceVector2.setValue( 0.0 );
 #endif
   auto resetCudaVectors = [&]() {
      deviceVector = 1.0;
      deviceVector2 = 0.0;
   };
   auto resetCusparseVectors = [&]() {
      deviceVector = 1.0;
      cusparseVector == 0.0;
   };
 #endif

   const int elements = hostMatrix.getNumberOfNonzeroMatrixElements();

   const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;

    // compute functions
@@ -197,12 +204,12 @@ benchmarkSpMV( Benchmark& benchmark,
   };

   auto spmvCusparse = [&]() {
        cusparseCSR.vectorProduct( deviceVector, deviceVector2 );
       cusparseCSR.vectorProduct( deviceVector, cusparseVector );
   };
#endif

   benchmark.setOperation( datasetSize );
    benchmark.time< Devices::Host >( reset, "CPU", spmvHost );
   benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost );

   // Initialize the host vector to be compared.
   //  (The values in hostVector2 will be reset when spmvCuda starts)
@@ -214,7 +221,7 @@ benchmarkSpMV( Benchmark& benchmark,
   resultHostVector2 = hostVector2;

#ifdef HAVE_CUDA
    benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda );
   benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda );

   // Initialize the device vector to be compared.
   //  (The values in deviceVector2 will be reset when spmvCusparse starts)
@@ -235,46 +242,9 @@ benchmarkSpMV( Benchmark& benchmark,
         { "matrix format", convertToString( "CSR-cuSPARSE-" + getFormatShort( hostMatrix ) ) }
      } ));

    benchmark.time< Devices::Cuda >( reset, "GPU", spmvCusparse );

    HostVector resultcuSPARSEDeviceVector2;
    resultcuSPARSEDeviceVector2.setSize( deviceVector2.getSize() );
    resultcuSPARSEDeviceVector2.setValue( 0.0 );

    resultcuSPARSEDeviceVector2 = deviceVector2;

    // Difference between GPU (current format) and GPU-cuSPARSE results
    //Real cuSparseDifferenceAbsMax = resultDeviceVector2.differenceAbsMax( resultcuSPARSEDeviceVector2 );
    Real cuSparseDifferenceAbsMax = max( abs( resultDeviceVector2 - resultcuSPARSEDeviceVector2 ) );
    //Real cuSparseDifferenceLpNorm = resultDeviceVector2.differenceLpNorm( resultcuSPARSEDeviceVector2, 1 );
    Real cuSparseDifferenceLpNorm = lpNorm( resultDeviceVector2 - resultcuSPARSEDeviceVector2, 1 );

    std::string GPUxGPUcuSparse_resultDifferenceAbsMax = "GPUxGPUcuSPARSE differenceAbsMax = " + std::to_string( cuSparseDifferenceAbsMax );
    std::string GPUxGPUcuSparse_resultDifferenceLpNorm = "GPUxGPUcuSPARSE differenceLpNorm = " + std::to_string( cuSparseDifferenceLpNorm );

    char *GPUcuSparse_absMax = &GPUxGPUcuSparse_resultDifferenceAbsMax[ 0u ];
    char *GPUcuSparse_lpNorm = &GPUxGPUcuSparse_resultDifferenceLpNorm[ 0u ];


    // Difference between CPU and GPU results for the current format
    //Real differenceAbsMax = resultHostVector2.differenceAbsMax( resultDeviceVector2 );
    Real differenceAbsMax = max( abs( resultHostVector2 - resultDeviceVector2 ) );
    //Real differenceLpNorm = resultHostVector2.differenceLpNorm( resultDeviceVector2, 1 );
    Real differenceLpNorm = lpNorm( resultHostVector2 - resultDeviceVector2, 1 );

    std::string CPUxGPU_resultDifferenceAbsMax = "CPUxGPU differenceAbsMax = " + std::to_string( differenceAbsMax );
    std::string CPUxGPU_resultDifferenceLpNorm = "CPUxGPU differenceLpNorm = " + std::to_string( differenceLpNorm );

    char *CPUxGPU_absMax = &CPUxGPU_resultDifferenceAbsMax[ 0u ];
    char *CPUxGPU_lpNorm = &CPUxGPU_resultDifferenceLpNorm[ 0u ];

    // Print result differences of CPU and GPU of current format
    std::cout << CPUxGPU_absMax << std::endl;
    std::cout << CPUxGPU_lpNorm << std::endl;
   SpmvBenchmarkResult< Real, int > benchmarkResult( deviceVector2, hostVector2, cusparseVector );
   benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, benchmarkResult );

    // Print result differences of GPU of current format and GPU with cuSPARSE.
    std::cout << GPUcuSparse_absMax << std::endl;
    std::cout << GPUcuSparse_lpNorm << std::endl;
 #endif

    std::cout << std::endl;