diff --git a/src/Benchmarks/SpMV/ReferenceFormats/LightSpMVBenchmark.h b/src/Benchmarks/SpMV/ReferenceFormats/LightSpMVBenchmark.h index 221fc627468994638bdc34c22bac19e64ae45fe5..7d6ffde49960a1e53b563222af8ab4861accba0f 100644 --- a/src/Benchmarks/SpMV/ReferenceFormats/LightSpMVBenchmark.h +++ b/src/Benchmarks/SpMV/ReferenceFormats/LightSpMVBenchmark.h @@ -126,6 +126,11 @@ struct LightSpMVBenchmark #endif } + void setKernelType( LightSpMVBenchmarkKernelType type ) + { + this->kernelType = type; + } + void resetVectors() { this->inVectorView = 1.0; diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h index d07a0f6bc6b3933a764143a421561e9c2b4f9eb7..991c6b56ca434093fa283296eba04272b98deaf6 100644 --- a/src/Benchmarks/SpMV/spmv.h +++ b/src/Benchmarks/SpMV/spmv.h @@ -39,9 +39,9 @@ #include <TNL/Algorithms/Segments/BiEllpack.h> // Comment the following to turn off some groups of SpMV benchmarks and speed-up the compilation -//#define WITH_TNL_BENCHMARK_SPMV_GENERAL_MATRICES -//#define WITH_TNL_BENCHMARK_SPMV_SYMMETRIC_MATRICES -//#define WITH_TNL_BENCHMARK_SPMV_LEGACY_FORMATS +#define WITH_TNL_BENCHMARK_SPMV_GENERAL_MATRICES +#define WITH_TNL_BENCHMARK_SPMV_SYMMETRIC_MATRICES +#define WITH_TNL_BENCHMARK_SPMV_LEGACY_FORMATS // Uncomment the following line to enable benchmarking the sandbox sparse matrix. //#define WITH_TNL_BENCHMARK_SPMV_SANDBOX_MATRIX @@ -374,10 +374,10 @@ benchmarkSpMV( Benchmark& benchmark, template< typename Real = double, typename Index = int > void -benchmarkSpmvSynthetic( Benchmark& benchmark, - const String& inputFileName, - const Config::ParameterContainer& parameters, - bool verboseMR ) +benchmarkSpmv( Benchmark& benchmark, + const String& inputFileName, + const Config::ParameterContainer& parameters, + bool verboseMR ) { // The following is another workaround because of a bug in nvcc versions 10 and 11. // If we use the current matrix formats, not the legacy ones, we get @@ -469,8 +469,8 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, cusparseMatrix.vectorProduct( cudaInVector, cudaOutVector ); }; - SpmvBenchmarkResult< Real, Devices::Host, int > cusparseBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() ); - benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cusparseBenchmarkResults ); + SpmvBenchmarkResult< Real, Devices::Host, int > cudaBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() ); + benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cudaBenchmarkResults ); #ifdef HAVE_CSR5 //// @@ -489,7 +489,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, auto csr5SpMV = [&]() { csr5Benchmark.vectorProduct(); }; - benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", csr5SpMV, cusparseBenchmarkResults ); + benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", csr5SpMV, cudaBenchmarkResults ); std::cerr << "CSR5 error = " << max( abs( cudaOutVector - cudaOutVector2 ) ) << std::endl; csrCudaMatrix.reset(); #endif @@ -501,7 +501,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, { "matrix name", convertToString( inputFileName ) }, { "rows", convertToString( csrHostMatrix.getRows() ) }, { "columns", convertToString( csrHostMatrix.getColumns() ) }, - { "matrix format", String( "LightSpMV" ) } + { "matrix format", String( "LightSpMV Vector" ) } } )); LightSpMVCSRHostMatrix lightSpMVCSRHostMatrix; @@ -514,7 +514,16 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, auto spmvLightSpMV = [&]() { lightSpMVBenchmark.vectorProduct(); }; - benchmark.time< Devices::Cuda >( resetLightSpMVVectors, "GPU", spmvLightSpMV, cusparseBenchmarkResults ); + benchmark.time< Devices::Cuda >( resetLightSpMVVectors, "GPU", spmvLightSpMV, cudaBenchmarkResults ); + + benchmark.setMetadataColumns( Benchmark::MetadataColumns({ + { "matrix name", convertToString( inputFileName ) }, + { "rows", convertToString( csrHostMatrix.getRows() ) }, + { "columns", convertToString( csrHostMatrix.getColumns() ) }, + { "matrix format", String( "LightSpMV Warp" ) } + } )); + lightSpMVBenchmark.setKernelType( LightSpMVBenchmarkKernelWarp ); + benchmark.time< Devices::Cuda >( resetLightSpMVVectors, "GPU", spmvLightSpMV, cudaBenchmarkResults ); #endif csrHostMatrix.reset(); diff --git a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h index 9a5005de73d06fb3d99709f89a32d8036722cac3..026ed356d67a3e997b032b303d3c7dbdb1e329dc 100644 --- a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h +++ b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h @@ -45,7 +45,7 @@ runSpMVBenchmarks( Benchmark & benchmark, metadata ); // Start the actual benchmark in spmv.h try { - SpMVLegacy::benchmarkSpmvSynthetic< Real >( benchmark, inputFileName, parameters, verboseMR ); + SpMVLegacy::benchmarkSpmv< Real >( benchmark, inputFileName, parameters, verboseMR ); } catch( const std::exception& ex ) { std::cerr << ex.what() << std::endl;