From 54ef7bad8ae01403577497e1e34200155d4d8cc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Thu, 4 Feb 2021 15:17:05 +0100
Subject: [PATCH] Added symmetric sparse matrices to SpMV benchmark.

---
 src/Benchmarks/SpMV/{spmv-legacy.h => spmv.h} | 216 +++++++++++++++---
 src/Benchmarks/SpMV/tnl-benchmark-spmv.h      |   2 +-
 src/TNL/Matrices/MatrixInfo.h                 |   8 +-
 3 files changed, 193 insertions(+), 33 deletions(-)
 rename src/Benchmarks/SpMV/{spmv-legacy.h => spmv.h} (65%)

diff --git a/src/Benchmarks/SpMV/spmv-legacy.h b/src/Benchmarks/SpMV/spmv.h
similarity index 65%
rename from src/Benchmarks/SpMV/spmv-legacy.h
rename to src/Benchmarks/SpMV/spmv.h
index 7c7e19d807..b010303672 100644
--- a/src/Benchmarks/SpMV/spmv-legacy.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -44,11 +44,9 @@ namespace TNL {
    namespace Benchmarks {
       namespace SpMVLegacy {
 
-// Alias to match the number of template parameters with other formats
-template< typename Real, typename Device, typename Index >
-using SlicedEllpackAlias = Benchmarks::SpMV::ReferenceFormats::Legacy::SlicedEllpack< Real, Device, Index >;
-
-// Segments based sparse matrix aliases
+/////
+// General sparse matrix aliases
+//
 template< typename Real, typename Device, typename Index >
 using SparseMatrix_CSR_Scalar = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, Algorithms::Segments::CSRScalar >;
 
@@ -85,7 +83,49 @@ using BiEllpackSegments = Algorithms::Segments::BiEllpack< Device, Index, IndexA
 template< typename Real, typename Device, typename Index >
 using SparseMatrix_BiEllpack = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, BiEllpackSegments >;
 
+/////
+// Symmetric sparse matrix aliases
+//
+template< typename Real, typename Device, typename Index >
+using SymmetricSparseMatrix_CSR_Scalar = Matrices::SparseMatrix< Real, Device, Index, Matrices::SymmetricMatrix, Algorithms::Segments::CSRScalar >;
+
+template< typename Real, typename Device, typename Index >
+using SymmetricSparseMatrix_CSR_Vector = Matrices::SparseMatrix< Real, Device, Index, Matrices::SymmetricMatrix, Algorithms::Segments::CSRVector >;
+
+template< typename Real, typename Device, typename Index >
+using SymmetricSparseMatrix_CSR_Hybrid = Matrices::SparseMatrix< Real, Device, Index, Matrices::SymmetricMatrix, Algorithms::Segments::CSRHybrid >;
+
+template< typename Real, typename Device, typename Index >
+using SymmetricSparseMatrix_CSR_Adaptive = Matrices::SparseMatrix< Real, Device, Index, Matrices::SymmetricMatrix, Algorithms::Segments::CSRAdaptive >;
+
+template< typename Device, typename Index, typename IndexAllocator >
+using EllpackSegments = Algorithms::Segments::Ellpack< Device, Index, IndexAllocator >;
+
+template< typename Real, typename Device, typename Index >
+using SymmetricSparseMatrix_Ellpack = Matrices::SparseMatrix< Real, Device, Index, Matrices::SymmetricMatrix, EllpackSegments >;
+
+template< typename Device, typename Index, typename IndexAllocator >
+using SlicedEllpackSegments = Algorithms::Segments::SlicedEllpack< Device, Index, IndexAllocator >;
+
+template< typename Real, typename Device, typename Index >
+using SymmetricSparseMatrix_SlicedEllpack = Matrices::SparseMatrix< Real, Device, Index, Matrices::SymmetricMatrix, SlicedEllpackSegments >;
+
+template< typename Device, typename Index, typename IndexAllocator >
+using ChunkedEllpackSegments = Algorithms::Segments::ChunkedEllpack< Device, Index, IndexAllocator >;
+
+template< typename Real, typename Device, typename Index >
+using SymmetricSparseMatrix_ChunkedEllpack = Matrices::SparseMatrix< Real, Device, Index, Matrices::SymmetricMatrix, ChunkedEllpackSegments >;
+
+template< typename Device, typename Index, typename IndexAllocator >
+using BiEllpackSegments = Algorithms::Segments::BiEllpack< Device, Index, IndexAllocator >;
+
+template< typename Real, typename Device, typename Index >
+using SymmetricSparseMatrix_BiEllpack = Matrices::SparseMatrix< Real, Device, Index, Matrices::SymmetricMatrix, BiEllpackSegments >;
+
+
+/////
 // Legacy formats
+//
 template< typename Real, typename Device, typename Index >
 using SparseMatrixLegacy_CSR_Scalar = Benchmarks::SpMV::ReferenceFormats::Legacy::CSR< Real, Device, Index, Benchmarks::SpMV::ReferenceFormats::Legacy::CSRScalar >;
 
@@ -119,6 +159,9 @@ using SparseMatrixLegacy_CSR_MultiVector = Benchmarks::SpMV::ReferenceFormats::L
 template< typename Real, typename Device, typename Index >
 using SparseMatrixLegacy_CSR_LightWithoutAtomic = Benchmarks::SpMV::ReferenceFormats::Legacy::CSR< Real, Device, Index, Benchmarks::SpMV::ReferenceFormats::Legacy::CSRLightWithoutAtomic >;
 
+template< typename Real, typename Device, typename Index >
+using SlicedEllpackAlias = Benchmarks::SpMV::ReferenceFormats::Legacy::SlicedEllpack< Real, Device, Index >;
+
 // Get the name (with extension) of input matrix file
 std::string getMatrixFileName( const String& InputFileName )
 {
@@ -186,7 +229,6 @@ benchmarkSpMVLegacy( Benchmark& benchmark,
 
    benchmark.setMetadataColumns( Benchmark::MetadataColumns({
          { "matrix name", convertToString( inputFileName ) },
-         //{ "non-zeros", convertToString( hostMatrix.getNonzeroElementsCount() ) },
          { "rows", convertToString( hostMatrix.getRows() ) },
          { "columns", convertToString( hostMatrix.getColumns() ) },
          { "matrix format", MatrixInfo< HostMatrix >::getFormat() }
@@ -195,9 +237,9 @@ benchmarkSpMVLegacy( Benchmark& benchmark,
    const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
    benchmark.setOperation( datasetSize );
 
-   /***
-    * Benchmark SpMV on host
-    */
+   /////
+   // Benchmark SpMV on host
+   //
    HostVector hostInVector( hostMatrix.getColumns() ), hostOutVector( hostMatrix.getRows() );
 
    auto resetHostVectors = [&]() {
@@ -212,9 +254,9 @@ benchmarkSpMVLegacy( Benchmark& benchmark,
    SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() );
    benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults );
 
-   /***
-    * Benchmark SpMV on CUDA
-    */
+   /////
+   // Benchmark SpMV on CUDA
+   //
 #ifdef HAVE_CUDA
    cudaMatrix = hostMatrix;
    CudaVector cudaInVector( hostMatrix.getColumns() ), cudaOutVector( hostMatrix.getRows() );
@@ -233,6 +275,82 @@ benchmarkSpMVLegacy( Benchmark& benchmark,
     std::cout << std::endl;
 }
 
+template< typename Real,
+          typename InputMatrix,
+          template< typename, typename, typename > class Matrix,
+          template< typename, typename, typename, typename > class Vector = Containers::Vector >
+void
+benchmarkSpMV( Benchmark& benchmark,
+               const InputMatrix& inputMatrix,
+               const TNL::Containers::Vector< Real, Devices::Host, int >& csrResultVector,
+               const String& inputFileName,
+               bool verboseMR )
+{
+   using HostMatrix = Matrix< Real, TNL::Devices::Host, int >;
+   using CudaMatrix = Matrix< Real, TNL::Devices::Cuda, int >;
+   using HostVector = Containers::Vector< Real, Devices::Host, int >;
+   using CudaVector = Containers::Vector< Real, Devices::Cuda, int >;
+
+   HostMatrix hostMatrix;
+   try
+   {
+      hostMatrix = inputMatrix;
+   }
+   catch(const std::exception& e)
+   {
+      std::cerr << "Unable to convert the matrix to the target format." << std::endl;
+      return;
+   }
+
+   benchmark.setMetadataColumns( Benchmark::MetadataColumns({
+         { "matrix name", convertToString( inputFileName ) },
+         { "rows", convertToString( hostMatrix.getRows() ) },
+         { "columns", convertToString( hostMatrix.getColumns() ) },
+         { "matrix format", MatrixInfo< HostMatrix >::getFormat() }
+      } ));
+   const int elements = hostMatrix.getNonzeroElementsCount();
+   const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
+   benchmark.setOperation( datasetSize );
+
+   /////
+   // Benchmark SpMV on host
+   //
+   HostVector hostInVector( hostMatrix.getColumns() ), hostOutVector( hostMatrix.getRows() );
+
+   auto resetHostVectors = [&]() {
+      hostInVector = 1.0;
+      hostOutVector = 0.0;
+   };
+
+   auto spmvHost = [&]() {
+      hostMatrix.vectorProduct( hostInVector, hostOutVector );
+
+   };
+   SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() );
+   benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults );
+
+   /////
+   // Benchmark SpMV on CUDA
+   //
+#ifdef HAVE_CUDA
+   CudaMatrix cudaMatrix;
+   cudaMatrix = inputMatrix;
+   CudaVector cudaInVector( hostMatrix.getColumns() ), cudaOutVector( hostMatrix.getRows() );
+
+   auto resetCudaVectors = [&]() {
+      cudaInVector = 1.0;
+      cudaOutVector = 0.0;
+   };
+
+   auto spmvCuda = [&]() {
+      cudaMatrix.vectorProduct( cudaInVector, cudaOutVector );
+   };
+   SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() );
+   benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults );
+ #endif
+    std::cout << std::endl;
+}
+
 template< typename Real = double,
           typename Index = int >
 void
@@ -247,7 +365,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
    // It seems that there is a problem with lambda functions identification when we create
    // two instances of TNL::Matrices::SparseMatrix. The second one comes from calling of
    // `benchmarkSpMV< Real, SparseMatrix_CSR_Scalar >( benchmark, hostOutVector, inputFileName, verboseMR );`
-   // and simillar later in this function. Maybe splitting this function into two might help.
+   // and simillar later in this function.
 #define USE_LEGACY_FORMATS
 #ifdef USE_LEGACY_FORMATS
    // Here we use 'int' instead of 'Index' because of compatibility with cusparse.
@@ -266,7 +384,6 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
    using CudaVector = Containers::Vector< Real, Devices::Cuda, int >;
 
    CSRHostMatrix csrHostMatrix;
-   CSRCudaMatrix csrCudaMatrix;
 
    ////
    // Set-up benchmark datasize
@@ -281,7 +398,6 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
    //
    benchmark.setMetadataColumns( Benchmark::MetadataColumns({
          { "matrix name", convertToString( inputFileName ) },
-         //{ "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) },
          { "rows", convertToString( csrHostMatrix.getRows() ) },
          { "columns", convertToString( csrHostMatrix.getColumns() ) },
          { "matrix format", String( "CSR" ) }
@@ -307,7 +423,6 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
 #ifdef HAVE_CUDA
    benchmark.setMetadataColumns( Benchmark::MetadataColumns({
          { "matrix name", convertToString( inputFileName ) },
-         //{ "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) },
          { "rows", convertToString( csrHostMatrix.getRows() ) },
          { "columns", convertToString( csrHostMatrix.getColumns() ) },
          { "matrix format", String( "cuSparse" ) }
@@ -316,6 +431,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
    cusparseHandle_t cusparseHandle;
    cusparseCreate( &cusparseHandle );
 
+   CSRCudaMatrix csrCudaMatrix;
    csrCudaMatrix = csrHostMatrix;
 
    // Delete the CSRhostMatrix, so it doesn't take up unnecessary space
@@ -337,25 +453,13 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
 
    SpmvBenchmarkResult< Real, Devices::Host, int > cusparseBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
    benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cusparseBenchmarkResults );
+   csrCudaMatrix.reset();
 #endif
-
-   /////
-   // Benchmarking TNL formats
-   benchmarkSpMVLegacy< Real, SparseMatrix_CSR_Scalar                   >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMVLegacy< Real, SparseMatrix_CSR_Vector                   >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMVLegacy< Real, SparseMatrix_CSR_Hybrid                   >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMVLegacy< Real, SparseMatrix_CSR_Adaptive                 >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMVLegacy< Real, SparseMatrix_Ellpack                      >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMVLegacy< Real, SlicedEllpackAlias                        >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMVLegacy< Real, SparseMatrix_SlicedEllpack                >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMVLegacy< Real, SparseMatrix_ChunkedEllpack               >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMVLegacy< Real, SparseMatrix_BiEllpack                    >( benchmark, hostOutVector, inputFileName, verboseMR );
-
-
-   const bool withSymmetricMatrices = parameters.getParameter< bool >("with-symmetric-matrices");
+   csrHostMatrix.reset();
 
    /////
    // Benchmarking of TNL legacy formats
+   //
    if( parameters.getParameter< bool >("with-legacy-matrices") )
    {
       using namespace Benchmarks::SpMV::ReferenceFormats;
@@ -371,12 +475,62 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
       benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_MultiVector        >( benchmark, hostOutVector, inputFileName, verboseMR );
       benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_LightWithoutAtomic >( benchmark, hostOutVector, inputFileName, verboseMR );
       benchmarkSpMVLegacy< Real, Legacy::Ellpack                           >( benchmark, hostOutVector, inputFileName, verboseMR );
+      benchmarkSpMVLegacy< Real, SlicedEllpackAlias                        >( benchmark, hostOutVector, inputFileName, verboseMR );
       benchmarkSpMVLegacy< Real, Legacy::ChunkedEllpack                    >( benchmark, hostOutVector, inputFileName, verboseMR );
       benchmarkSpMVLegacy< Real, Legacy::BiEllpack                         >( benchmark, hostOutVector, inputFileName, verboseMR );
    }
    /* AdEllpack is broken
    benchmarkSpMV< Real, Matrices::AdEllpack              >( benchmark, hostOutVector, inputFileName, verboseMR );
     */
+
+   /////
+   // Benchmarking TNL formats
+   //
+   using HostMatrixType = TNL::Matrices::SparseMatrix< Real, TNL::Devices::Host >;
+   HostMatrixType hostMatrix;
+   TNL::Matrices::MatrixReader< HostMatrixType >::readMtx( inputFileName, hostMatrix, verboseMR );
+   benchmarkSpMV< Real, HostMatrixType, SparseMatrix_CSR_Scalar                   >( benchmark, hostMatrix, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, HostMatrixType, SparseMatrix_CSR_Vector                   >( benchmark, hostMatrix, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, HostMatrixType, SparseMatrix_CSR_Hybrid                   >( benchmark, hostMatrix, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, HostMatrixType, SparseMatrix_CSR_Adaptive                 >( benchmark, hostMatrix, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, HostMatrixType, SparseMatrix_Ellpack                      >( benchmark, hostMatrix, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, HostMatrixType, SparseMatrix_SlicedEllpack                >( benchmark, hostMatrix, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, HostMatrixType, SparseMatrix_ChunkedEllpack               >( benchmark, hostMatrix, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, HostMatrixType, SparseMatrix_BiEllpack                    >( benchmark, hostMatrix, hostOutVector, inputFileName, verboseMR );
+   hostMatrix.reset();
+
+   /////
+   // Benchmarking symmetric sparse matrices
+   //
+   if( parameters.getParameter< bool >("with-symmetric-matrices") )
+   {
+      using SymmetricInputMatrix = TNL::Matrices::SparseMatrix< Real, TNL::Devices::Host, int, TNL::Matrices::SymmetricMatrix >;
+      using InputMatrix = TNL::Matrices::SparseMatrix< Real, TNL::Devices::Host, int >;
+      SymmetricInputMatrix symmetricHostMatrix;
+      try
+      {
+         TNL::Matrices::MatrixReader< SymmetricInputMatrix >::readMtx( inputFileName, symmetricHostMatrix, verboseMR );
+      }
+      catch(const std::exception& e)
+      {
+         std::cerr << e.what() << " ... SKIPPING " << std::endl;
+         return;
+      }
+      InputMatrix hostMatrix;
+      TNL::Matrices::MatrixReader< InputMatrix >::readMtx( inputFileName, hostMatrix, verboseMR );
+      if( hostMatrix != symmetricHostMatrix )
+      {
+         std::cerr << "ERROR !!!!!! " << std::endl;
+      }
+      benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Scalar                   >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, verboseMR );
+      benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Vector                   >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, verboseMR );
+      benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Hybrid                   >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, verboseMR );
+      benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Adaptive                 >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, verboseMR );
+      benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_Ellpack                      >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, verboseMR );
+      benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_SlicedEllpack                >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, verboseMR );
+      benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_ChunkedEllpack               >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, verboseMR );
+      benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_BiEllpack                    >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, verboseMR );
+   }
 }
 
 } // namespace SpMVLegacy
diff --git a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
index d4ec93934d..9a5005de73 100644
--- a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
+++ b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
@@ -18,7 +18,7 @@
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Config/parseCommandLine.h>
 
-#include "spmv-legacy.h"
+#include "spmv.h"
 
 #include <TNL/Matrices/MatrixReader.h>
 using namespace TNL::Matrices;
diff --git a/src/TNL/Matrices/MatrixInfo.h b/src/TNL/Matrices/MatrixInfo.h
index 7d28956163..d84afa39a1 100644
--- a/src/TNL/Matrices/MatrixInfo.h
+++ b/src/TNL/Matrices/MatrixInfo.h
@@ -64,7 +64,13 @@ struct MatrixInfo< SparseMatrixView< Real, Device, Index, MatrixType, SegmentsVi
 {
    static String getDensity() { return String( "sparse" ); };
 
-   static String getFormat() { return SegmentsView< Device, Index >::getSegmentsType(); };
+   static String getFormat()
+   {
+      if( MatrixType::isSymmetric() )
+         return TNL::String( "Symmetric " ) + SegmentsView< Device, Index >::getSegmentsType();
+      else
+         return SegmentsView< Device, Index >::getSegmentsType();
+   };
 };
 
 template< typename Real,
-- 
GitLab