Commit 92bb20c0 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber Committed by Jakub Klinkovský
Browse files

Added LightSpMV Warp kernel benchmark.

parent 751c3d1c
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -126,6 +126,11 @@ struct LightSpMVBenchmark
#endif
   }

   void setKernelType( LightSpMVBenchmarkKernelType type )
   {
      this->kernelType = type;
   }

   void resetVectors()
   {
      this->inVectorView = 1.0;
+21 −12
Original line number Diff line number Diff line
@@ -39,9 +39,9 @@
#include <TNL/Algorithms/Segments/BiEllpack.h>

// Comment the following to turn off some groups of SpMV benchmarks and speed-up the compilation
//#define WITH_TNL_BENCHMARK_SPMV_GENERAL_MATRICES
//#define WITH_TNL_BENCHMARK_SPMV_SYMMETRIC_MATRICES
//#define WITH_TNL_BENCHMARK_SPMV_LEGACY_FORMATS
#define WITH_TNL_BENCHMARK_SPMV_GENERAL_MATRICES
#define WITH_TNL_BENCHMARK_SPMV_SYMMETRIC_MATRICES
#define WITH_TNL_BENCHMARK_SPMV_LEGACY_FORMATS

// Uncomment the following line to enable benchmarking the sandbox sparse matrix.
//#define WITH_TNL_BENCHMARK_SPMV_SANDBOX_MATRIX
@@ -374,7 +374,7 @@ benchmarkSpMV( Benchmark& benchmark,
template< typename Real = double,
          typename Index = int >
void
benchmarkSpmvSynthetic( Benchmark& benchmark,
benchmarkSpmv( Benchmark& benchmark,
               const String& inputFileName,
               const Config::ParameterContainer& parameters,
               bool verboseMR )
@@ -469,8 +469,8 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
       cusparseMatrix.vectorProduct( cudaInVector, cudaOutVector );
   };

   SpmvBenchmarkResult< Real, Devices::Host, int > cusparseBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
   benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cusparseBenchmarkResults );
   SpmvBenchmarkResult< Real, Devices::Host, int > cudaBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
   benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cudaBenchmarkResults );

#ifdef HAVE_CSR5
   ////
@@ -489,7 +489,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
   auto csr5SpMV = [&]() {
       csr5Benchmark.vectorProduct();
   };
   benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", csr5SpMV, cusparseBenchmarkResults );
   benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", csr5SpMV, cudaBenchmarkResults );
   std::cerr << "CSR5 error = " << max( abs( cudaOutVector - cudaOutVector2 ) ) << std::endl;
   csrCudaMatrix.reset();
#endif
@@ -501,7 +501,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
      { "matrix name", convertToString( inputFileName ) },
      { "rows", convertToString( csrHostMatrix.getRows() ) },
      { "columns", convertToString( csrHostMatrix.getColumns() ) },
      { "matrix format", String( "LightSpMV" ) }
      { "matrix format", String( "LightSpMV Vector" ) }
   } ));

   LightSpMVCSRHostMatrix lightSpMVCSRHostMatrix;
@@ -514,7 +514,16 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
   auto spmvLightSpMV = [&]() {
       lightSpMVBenchmark.vectorProduct();
   };
   benchmark.time< Devices::Cuda >( resetLightSpMVVectors, "GPU", spmvLightSpMV, cusparseBenchmarkResults );
   benchmark.time< Devices::Cuda >( resetLightSpMVVectors, "GPU", spmvLightSpMV, cudaBenchmarkResults );

   benchmark.setMetadataColumns( Benchmark::MetadataColumns({
      { "matrix name", convertToString( inputFileName ) },
      { "rows", convertToString( csrHostMatrix.getRows() ) },
      { "columns", convertToString( csrHostMatrix.getColumns() ) },
      { "matrix format", String( "LightSpMV Warp" ) }
   } ));
   lightSpMVBenchmark.setKernelType( LightSpMVBenchmarkKernelWarp );
   benchmark.time< Devices::Cuda >( resetLightSpMVVectors, "GPU", spmvLightSpMV, cudaBenchmarkResults );
#endif
   csrHostMatrix.reset();

+1 −1
Original line number Diff line number Diff line
@@ -45,7 +45,7 @@ runSpMVBenchmarks( Benchmark & benchmark,
                           metadata );
   // Start the actual benchmark in spmv.h
   try {
      SpMVLegacy::benchmarkSpmvSynthetic< Real >( benchmark, inputFileName, parameters, verboseMR );
      SpMVLegacy::benchmarkSpmv< Real >( benchmark, inputFileName, parameters, verboseMR );
   }
   catch( const std::exception& ex ) {
      std::cerr << ex.what() << std::endl;