Commit 0179d4a0 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber Committed by Jakub Klinkovský
Browse files

Working on JSON SpMV benchmark.

parent 4273faac
Loading
Loading
Loading
Loading
+13 −7
Original line number Diff line number Diff line
@@ -34,10 +34,11 @@ namespace Benchmarks {
const double oneGB = 1024.0 * 1024.0 * 1024.0;


template< typename Logger = Logging >
struct BenchmarkResult
{
   using HeaderElements = Logging::HeaderElements;
   using RowElements = Logging::RowElements;
   using HeaderElements = typename Logger::HeaderElements;
   using RowElements = typename Logger::RowElements;

   double time = std::numeric_limits<double>::quiet_NaN();
   double stddev = std::numeric_limits<double>::quiet_NaN();
@@ -71,6 +72,11 @@ public:
   using typename Logger::MetadataColumns;
   using SolverMonitorType = Solvers::IterativeSolverMonitor< double, int >;

   using typename Logger::CommonLogs;
   using Logger::addCommonLogs;
   using Logger::addLogsMetadata;
   using Logger::writeHeader;

   Benchmark( int loops = 10,
              bool verbose = true )
   : Logger(verbose), loops(loops)
@@ -202,7 +208,7 @@ public:
   time( ResetFunction reset,
         const String & performer,
         ComputeFunction & compute,
         BenchmarkResult & result )
         BenchmarkResult< Logger > & result )
   {
      result.time = std::numeric_limits<double>::quiet_NaN();
      result.stddev = std::numeric_limits<double>::quiet_NaN();
@@ -247,7 +253,7 @@ public:
         const String & performer,
         ComputeFunction & compute )
   {
      BenchmarkResult result;
      BenchmarkResult< Logger > result;
      return time< Device, ResetFunction, ComputeFunction >( reset, performer, compute, result );
   }

@@ -259,7 +265,7 @@ public:
   double
   time( const String & performer,
         ComputeFunction & compute,
         BenchmarkResult & result )
         BenchmarkResult< Logger > & result )
   {
      result.time = std::numeric_limits<double>::quiet_NaN();
      result.stddev = std::numeric_limits<double>::quiet_NaN();
@@ -295,7 +301,7 @@ public:
   time( const String & performer,
         ComputeFunction & compute )
   {
      BenchmarkResult result;
      BenchmarkResult< Logger > result;
      return time< Device, ComputeFunction >( performer, compute, result );
   }

@@ -310,7 +316,7 @@ public:
      std::cerr << msg << std::endl;
   }

   using Logging::save;
   using Logger::save;

   SolverMonitorType& getMonitor() {
      return monitor;
+45 −2
Original line number Diff line number Diff line
@@ -68,6 +68,7 @@ class JsonLoggingRowElements

      auto cend() const noexcept { return elements.cend(); }

      size_t size() const noexcept { return this->elements.size(); };
   protected:
      std::list< String > elements;

@@ -81,8 +82,11 @@ public:
   using MetadataMap = std::map< const char*, String >;
   using MetadataColumns = std::vector<MetadataElement>;

   using CommonLogs = std::vector< std::pair< const char*, String > >;
   using LogsMetadata = std::vector< String >;

   using HeaderElements = std::vector< String >;
   using RowElements = LoggingRowElements;
   using RowElements = JsonLoggingRowElements;

   JsonLogging( int verbose = true )
   : verbose(verbose)
@@ -94,6 +98,42 @@ public:
      this->verbose = verbose;
   }

   void addCommonLogs( const CommonLogs& logs )
   {
      for( auto lg : logs )
      {
         if( verbose )
            std::cout << lg.first << " = " << lg.second << std::endl;
         log << "\"" << lg.first << "\" = \"" << lg.second << std::endl;
      }
   };

   void resetLogsMetadat() { this->logsMetadata.clear(); };

   void addLogsMetadata( const std::vector< String >& md )
   {
      this->logsMetadata.insert( this->logsMetadata.end(), md.begin(), md.end() );
   }

   void writeHeader()
   {
      for( auto md : this->logsMetadata )
         std::cout << md << "\t";
      std::cout << std::endl;
   }

   void writeRow( const RowElements& rowEls )
   {
      TNL_ASSERT_EQ( rowEls.size(), this->logsMetadata.size(), "" );
      auto md = this->logsMetadata.begin();
      for( auto el : rowEls )
      {
         if( verbose )
            std::cout << el << "\t";
         log << "    \"" << *md++ << "\" = \"" << el << "," << std::endl;
      }
   }

   void
   writeTitle( const String & title )
   {
@@ -178,7 +218,7 @@ public:
            std::cout << std::setw( 20 ) << it.second;
         }
         // spanning element is printed as usual column to stdout
         std::cout << std::setw( 15 ) << spanningElement;
         //std::cout << std::setw( 15 ) << spanningElement;
         for( auto & it : subElements ) {
            std::cout << std::setw( 15 ) << it;
         }
@@ -279,6 +319,9 @@ protected:
   MetadataColumns metadataColumns;
   bool header_changed = true;
   std::vector< std::pair< String, int > > horizontalGroups;

   // new JSON implementation
   LogsMetadata logsMetadata;
};

} // namespace Benchmarks
+15 −0
Original line number Diff line number Diff line
@@ -81,6 +81,8 @@ public:
   using MetadataMap = std::map< const char*, String >;
   using MetadataColumns = std::vector<MetadataElement>;

   using CommonLogs = std::vector< std::pair< const char*, String > >;

   using HeaderElements = std::vector< String >;
   using RowElements = LoggingRowElements;

@@ -102,6 +104,19 @@ public:
      log << ": title = " << title << std::endl;
   }

   void addCommonLogs( const CommonLogs& logs )
   {
      for( auto log : logs )
      {
         if( verbose )
            std::cout << log.first << " = " << log.second << std::endl;
      }
   };

   void addLogsMetadata( const std::vector< String >& md ){};

   void writeHeader(){};

   void
   writeMetadata( const MetadataMap & metadata )
   {
+21 −7
Original line number Diff line number Diff line
@@ -17,9 +17,10 @@ namespace Benchmarks {

template< typename Real,
          typename Device,
          typename Index >
          typename Index,
          typename Logger = JsonLogging >
struct SpmvBenchmarkResult
: public BenchmarkResult
: public BenchmarkResult< Logger >
{
   using RealType = Real;
   using DeviceType = Device;
@@ -27,23 +28,35 @@ struct SpmvBenchmarkResult
   using HostVector = Containers::Vector< Real, Devices::Host, Index >;
   using BenchmarkVector = Containers::Vector< Real, Device, Index >;

   SpmvBenchmarkResult( const HostVector& csrResult,
   using typename Logger::HeaderElements;
   using typename Logger::RowElements;
   using BenchmarkResult< Logger >::stddev;
   using BenchmarkResult< Logger >::bandwidth;
   using BenchmarkResult< Logger >::speedup;


   SpmvBenchmarkResult( const String& format,
                        const HostVector& csrResult,
                        const BenchmarkVector& benchmarkResult,
                        const IndexType nonzeros )
   : csrResult( csrResult ), benchmarkResult( benchmarkResult ), nonzeros( nonzeros ){};
   : format( format ), csrResult( csrResult ), benchmarkResult( benchmarkResult ), nonzeros( nonzeros ){};

   virtual HeaderElements getTableHeader() const override
   {
      return HeaderElements( {"non-zeros", "time", "stddev", "stddev/time", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2"} );
      return HeaderElements( {"format", "device", "non-zeros", "time", "stddev", "stddev/time", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2"} );
   }

   void setFormat( const String& format ) { this->format = format; };

   virtual RowElements getRowElements() const override
   {
      HostVector benchmarkResultCopy;
      benchmarkResultCopy = benchmarkResult;
      auto diff = csrResult - benchmarkResultCopy;
      RowElements elements;
      elements << nonzeros << time << stddev << stddev/time << bandwidth;
      elements << format
               << ( std::is_same< Device, Devices::Host >::value ? "CPU" : "GPU" )
               << nonzeros << time << stddev << stddev/time << bandwidth;
      if( speedup != 0.0 )
         elements << speedup;
      else elements << "N/A";
@@ -51,6 +64,7 @@ struct SpmvBenchmarkResult
      return elements;
   }

   String format;
   const HostVector& csrResult;
   const BenchmarkVector& benchmarkResult;
   const IndexType nonzeros;
+40 −26
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@
#include <cstdint>

#include "../Benchmarks.h"
#include "../JsonLogging.h"
#include "SpmvBenchmarkResult.h"

#include <TNL/Pointers/DevicePointer.h>
@@ -58,7 +59,9 @@ using namespace TNL::Matrices;

namespace TNL {
   namespace Benchmarks {
      namespace SpMVLegacy {
      namespace SpMV {

using BenchmarkType = TNL::Benchmarks::Benchmark< JsonLogging >;

/////
// General sparse matrix aliases
@@ -218,7 +221,7 @@ std::string getFormatShort( const Matrix& matrix )
}

// Print information about the matrix.
template< typename Matrix >
/*template< typename Matrix >
void printMatrixInfo( const Matrix& matrix,
                      std::ostream& str )
{
@@ -226,13 +229,13 @@ void printMatrixInfo( const Matrix& matrix,
    str << " Rows: " << matrix.getRows() << std::endl;
    str << " Cols: " << matrix.getColumns() << std::endl;
    str << " Nonzero Elements: " << matrix.getNumberOfNonzeroMatrixElements() << std::endl;
}
}*/

template< typename Real,
          template< typename, typename, typename > class Matrix,
          template< typename, typename, typename, typename > class Vector = Containers::Vector >
void
benchmarkSpMVLegacy( Benchmark<>& benchmark,
benchmarkSpMVLegacy( BenchmarkType& benchmark,
                     const TNL::Containers::Vector< Real, Devices::Host, int >& csrResultVector,
                     const String& inputFileName,
                     bool verboseMR )
@@ -247,12 +250,12 @@ benchmarkSpMVLegacy( Benchmark<>& benchmark,

   SpMV::ReferenceFormats::Legacy::LegacyMatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR );

   benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({
   /*benchmark.setMetadataColumns( BenchmarkType::MetadataColumns({
         { "matrix name", convertToString( inputFileName ) },
         { "rows", convertToString( hostMatrix.getRows() ) },
         { "columns", convertToString( hostMatrix.getColumns() ) },
         { "matrix format", MatrixInfo< HostMatrix >::getFormat() }
      } ));
      } ));*/
   const int elements = hostMatrix.getNonzeroElementsCount();
   const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
   benchmark.setOperation( datasetSize );
@@ -271,7 +274,7 @@ benchmarkSpMVLegacy( Benchmark<>& benchmark,
      hostMatrix.vectorProduct( hostInVector, hostOutVector );

   };
   SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() );
   SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() );
   benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults );

   /////
@@ -289,7 +292,7 @@ benchmarkSpMVLegacy( Benchmark<>& benchmark,
   auto spmvCuda = [&]() {
      cudaMatrix.vectorProduct( cudaInVector, cudaOutVector );
   };
   SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() );
   SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() );
   benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults );
 #endif
    std::cout << std::endl;
@@ -300,7 +303,7 @@ template< typename Real,
          template< typename, typename, typename > class Matrix,
          template< typename, typename, typename, typename > class Vector = Containers::Vector >
void
benchmarkSpMV( Benchmark<>& benchmark,
benchmarkSpMV( BenchmarkType& benchmark,
               const InputMatrix& inputMatrix,
               const TNL::Containers::Vector< Real, Devices::Host, int >& csrResultVector,
               const String& inputFileName,
@@ -322,7 +325,7 @@ benchmarkSpMV( Benchmark<>& benchmark,
      return;
   }

   benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({
   benchmark.setMetadataColumns( BenchmarkType::MetadataColumns({
         { "matrix name", convertToString( inputFileName ) },
         { "rows", convertToString( hostMatrix.getRows() ) },
         { "columns", convertToString( hostMatrix.getColumns() ) },
@@ -346,7 +349,7 @@ benchmarkSpMV( Benchmark<>& benchmark,
      hostMatrix.vectorProduct( hostInVector, hostOutVector );

   };
   SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() );
   SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() );
   benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults );

   /////
@@ -365,7 +368,7 @@ benchmarkSpMV( Benchmark<>& benchmark,
   auto spmvCuda = [&]() {
      cudaMatrix.vectorProduct( cudaInVector, cudaOutVector );
   };
   SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() );
   SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() );
   benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults );
 #endif
    std::cout << std::endl;
@@ -374,7 +377,7 @@ benchmarkSpMV( Benchmark<>& benchmark,
template< typename Real = double,
          typename Index = int >
void
benchmarkSpmv( Benchmark<>& benchmark,
benchmarkSpmv( BenchmarkType& benchmark,
               const String& inputFileName,
               const Config::ParameterContainer& parameters,
               bool verboseMR )
@@ -417,12 +420,17 @@ benchmarkSpmv( Benchmark<>& benchmark,
   ////
   // Perform benchmark on host with CSR as a reference CPU format
   //
   benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({
   benchmark.addCommonLogs( BenchmarkType::CommonLogs( {
      { "matrix name", convertToString( inputFileName ) },
      { "rows", convertToString( csrHostMatrix.getRows() ) },
      { "columns", convertToString( csrHostMatrix.getColumns() ) } } ) );

   /*benchmark.setMetadataColumns( BenchmarkType::MetadataColumns({
         { "matrix name", convertToString( inputFileName ) },
         { "rows", convertToString( csrHostMatrix.getRows() ) },
         { "columns", convertToString( csrHostMatrix.getColumns() ) },
         { "matrix format", String( "CSR" ) }
      } ));
      } ));*/

   HostVector hostInVector( csrHostMatrix.getRows() ), hostOutVector( csrHostMatrix.getRows() );

@@ -435,19 +443,21 @@ benchmarkSpmv( Benchmark<>& benchmark,
       csrHostMatrix.vectorProduct( hostInVector, hostOutVector );
   };

   SpmvBenchmarkResult< Real, Devices::Host, int > csrBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
   benchmark.time< Devices::Cuda >( resetHostVectors, "CPU", spmvCSRHost, csrBenchmarkResults );
   SpmvBenchmarkResult< Real, Devices::Host, int > csrBenchmarkResults( String( "CSR" ), hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
   benchmark.addLogsMetadata( csrBenchmarkResults.getTableHeader() );
   benchmark.writeHeader();
   benchmark.time< Devices::Host >( resetHostVectors, "", spmvCSRHost, csrBenchmarkResults );

#ifdef HAVE_CUDA
   ////
   // Perform benchmark on CUDA device with cuSparse as a reference GPU format
   //
   benchmark.setMetadataColumns( Benchmark::MetadataColumns({
   /*benchmark.setMetadataColumns( Benchmark::MetadataColumns({
         { "matrix name", convertToString( inputFileName ) },
         { "rows", convertToString( csrHostMatrix.getRows() ) },
         { "columns", convertToString( csrHostMatrix.getColumns() ) },
         { "matrix format", String( "cuSparse" ) }
      } ));
      } ));*/

   cusparseHandle_t cusparseHandle;
   cusparseCreate( &cusparseHandle );
@@ -469,19 +479,20 @@ benchmarkSpmv( Benchmark<>& benchmark,
       cusparseMatrix.vectorProduct( cudaInVector, cudaOutVector );
   };

   SpmvBenchmarkResult< Real, Devices::Host, int > cudaBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
   SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( String( "cusprase" ), hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
   benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cudaBenchmarkResults );

#ifdef HAVE_CSR5
   ////
   // Perform benchmark on CUDA device with CSR5 as a reference GPU format
   //
   benchmark.setMetadataColumns( Benchmark::MetadataColumns({
   cudaBenchmarkResults.setFormat( String( "CSR5" ) );
   /*benchmark.setMetadataColumns( Benchmark::MetadataColumns({
      { "matrix name", convertToString( inputFileName ) },
      { "rows", convertToString( csrHostMatrix.getRows() ) },
      { "columns", convertToString( csrHostMatrix.getColumns() ) },
      { "matrix format", String( "CSR5" ) }
   } ));
   } ));*/

   CudaVector cudaOutVector2( cudaOutVector );
   CSR5Benchmark::CSR5Benchmark< CSRCudaMatrix > csr5Benchmark( csrCudaMatrix, cudaInVector, cudaOutVector );
@@ -489,6 +500,7 @@ benchmarkSpmv( Benchmark<>& benchmark,
   auto csr5SpMV = [&]() {
       csr5Benchmark.vectorProduct();
   };

   benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", csr5SpMV, cudaBenchmarkResults );
   std::cerr << "CSR5 error = " << max( abs( cudaOutVector - cudaOutVector2 ) ) << std::endl;
   csrCudaMatrix.reset();
@@ -497,12 +509,13 @@ benchmarkSpmv( Benchmark<>& benchmark,
   ////
   // Perform benchmark on CUDA device with LightSpMV as a reference GPU format
   //
   benchmark.setMetadataColumns( Benchmark::MetadataColumns({
   cudaBenchmarkResults.setFormat( String( "LightSpMV Vector" ) );
   /*benchmark.setMetadataColumns( Benchmark::MetadataColumns({
      { "matrix name", convertToString( inputFileName ) },
      { "rows", convertToString( csrHostMatrix.getRows() ) },
      { "columns", convertToString( csrHostMatrix.getColumns() ) },
      { "matrix format", String( "LightSpMV Vector" ) }
   } ));
   } ));*/

   LightSpMVCSRHostMatrix lightSpMVCSRHostMatrix;
   lightSpMVCSRHostMatrix = csrHostMatrix;
@@ -516,12 +529,13 @@ benchmarkSpmv( Benchmark<>& benchmark,
   };
   benchmark.time< Devices::Cuda >( resetLightSpMVVectors, "GPU", spmvLightSpMV, cudaBenchmarkResults );

   benchmark.setMetadataColumns( Benchmark::MetadataColumns({
   cudaBenchmarkResults.setFormat( String( "LightSpMV Warp" ) );
   /*benchmark.setMetadataColumns( Benchmark::MetadataColumns({
      { "matrix name", convertToString( inputFileName ) },
      { "rows", convertToString( csrHostMatrix.getRows() ) },
      { "columns", convertToString( csrHostMatrix.getColumns() ) },
      { "matrix format", String( "LightSpMV Warp" ) }
   } ));
   } ));*/
   lightSpMVBenchmark.setKernelType( LightSpMVBenchmarkKernelWarp );
   benchmark.time< Devices::Cuda >( resetLightSpMVVectors, "GPU", spmvLightSpMV, cudaBenchmarkResults );
#endif
Loading