Rewritten tnl-benchmark-linear-solvers

616f0cbe · Jakub Klinkovský · 51d16ef0 · 616f0cbe · 616f0cbe · 616f0cbe
Commit 616f0cbe authored 6 years ago by Jakub Klinkovský
--- a/src/Benchmarks/Benchmarks.h
+++ b/src/Benchmarks/Benchmarks.h
@@ -16,6 +16,8 @@
 #include <iomanip>
 #include <map>
 #include <vector>
+#include <exception>
+#include <limits>

 #include <TNL/Timer.h>
 #include <TNL/String.h>
@@ -310,7 +312,6 @@ public:
   {
      closeTable();
      writeTitle( title );
-      monitor.setStage( title.getString() );
   }

   // Marks the start of a new benchmark (with custom metadata)
@@ -320,7 +321,6 @@ public:
   {
      closeTable();
      writeTitle( title );
-      monitor.setStage( title.getString() );
      // add loops to metadata
      metadata["loops"] = String(loops);
      writeMetadata( metadata );
@@ -347,6 +347,7 @@ public:
                 const double datasetSize = 0.0, // in GB
                 const double baseTime = 0.0 )
   {
+      monitor.setStage( operation.getString() );
      if( metadataColumns.size() > 0 && String(metadataColumns[ 0 ].first) == "operation" ) {
         metadataColumns[ 0 ].second = operation;
      }
@@ -401,13 +402,19 @@ public:
         ComputeFunction & compute )
   {
      double time;
-      if( verbose ) {
-         // run the monitor main loop
-         Solvers::SolverMonitorThread monitor_thread( monitor );
-         time = timeFunction( compute, reset, loops, monitor );
+      try {
+         if( verbose ) {
+            // run the monitor main loop
+            Solvers::SolverMonitorThread monitor_thread( monitor );
+            time = timeFunction( compute, reset, loops, monitor );
+         }
+         else {
+            time = timeFunction( compute, reset, loops, monitor );
+         }
      }
-      else {
-         time = timeFunction( compute, reset, loops, monitor );
+      catch ( const std::exception& e ) {
+         std::cerr << "timeFunction failed due to a C++ exception with description: " << e.what() << std::endl;
+         time = std::numeric_limits<double>::quiet_NaN();
      }

      const double bandwidth = datasetSize / time;
@@ -450,6 +457,12 @@ public:

   using Logging::save;

+   Solvers::IterativeSolverMonitor< double, int >&
+   getMonitor()
+   {
+      return monitor;
+   }
+
 protected:
   int loops;
   double datasetSize = 0.0;

--- a/src/Benchmarks/LinearSolvers/CMakeLists.txt
+++ b/src/Benchmarks/LinearSolvers/CMakeLists.txt
 if( BUILD_CUDA )
-    CUDA_ADD_EXECUTABLE( tnl-benchmark-linear-solvers tnl-benchmark-linear-solvers.cu )
-    TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers tnl )
-else()
-    ADD_EXECUTABLE( tnl-benchmark-linear-solvers tnl-benchmark-linear-solvers.cpp )
-    TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers tnl )
+   CUDA_ADD_EXECUTABLE( tnl-benchmark-linear-solvers-cuda tnl-benchmark-linear-solvers.cu
+                        OPTIONS -DHAVE_CUSPARSE )
+   TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers-cuda tnl ${CUDA_cusparse_LIBRARY} )
+
+   install( TARGETS tnl-benchmark-linear-solvers-cuda RUNTIME DESTINATION bin )
 endif()

+ADD_EXECUTABLE( tnl-benchmark-linear-solvers tnl-benchmark-linear-solvers.cpp )
+TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers tnl )
+
 install( TARGETS tnl-benchmark-linear-solvers RUNTIME DESTINATION bin )
--- a/src/Benchmarks/LinearSolvers/CuSolverWrapper.h
+++ b/src/Benchmarks/LinearSolvers/CuSolverWrapper.h
+#pragma once
+
+#ifdef HAVE_CUDA
+
+#include <cusolverSp.h>
+
+#include <TNL/Config/ConfigDescription.h>
+#include <TNL/Matrices/CSR.h>
+#include <TNL/Solvers/Linear/LinearSolver.h>
+
+
+template< typename Matrix >
+struct is_csr_matrix
+{
+    static constexpr bool value = false;
+};
+
+template< typename Real, typename Device, typename Index >
+struct is_csr_matrix< TNL::Matrices::CSR< Real, Device, Index > >
+{
+    static constexpr bool value = true;
+};
+
+
+namespace TNL {
+namespace Solvers {
+namespace Linear {
+
+template< typename Matrix >
+class CuSolverWrapper
+    : public LinearSolver< Matrix >
+{
+    static_assert( ::is_csr_matrix< Matrix >::value, "The CuSolverWrapper solver is available only for CSR matrices." );
+    static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "CuSolverWrapper is available only on CUDA" );
+    static_assert( std::is_same< typename Matrix::RealType, float >::value ||
+                   std::is_same< typename Matrix::RealType, double >::value, "unsupported RealType" );
+    static_assert( std::is_same< typename Matrix::IndexType, int >::value, "unsupported IndexType" );
+
+    using Base = LinearSolver< Matrices::CSR< double, Devices::Cuda, int > >;
+public:
+    using RealType = typename Base::RealType;
+    using DeviceType = typename Base::DeviceType;
+    using IndexType = typename Base::IndexType;
+    using VectorViewType = typename Base::VectorViewType;
+    using ConstVectorViewType = typename Base::ConstVectorViewType;
+
+    bool solve( ConstVectorViewType b, VectorViewType x ) override
+    {
+        TNL_ASSERT_EQ( this->matrix->getRows(), this->matrix->getColumns(), "matrix must be square" );
+        TNL_ASSERT_EQ( this->matrix->getColumns(), x.getSize(), "wrong size of the solution vector" );
+        TNL_ASSERT_EQ( this->matrix->getColumns(), b.getSize(), "wrong size of the right hand side" );
+
+        const IndexType size = this->matrix->getRows();
+
+        this->resetIterations();
+        this->setResidue( this->getConvergenceResidue() + 1.0 );
+
+        cusolverSpHandle_t handle;
+        cusolverStatus_t status;
+        cusparseStatus_t cusparse_status;
+        cusparseMatDescr_t mat_descr;
+
+        status = cusolverSpCreate( &handle );
+        if( status != CUSOLVER_STATUS_SUCCESS ) {
+            std::cerr << "cusolverSpCreate failed: " << status << std::endl;
+            return false;
+        }
+
+        cusparse_status = cusparseCreateMatDescr( &mat_descr );
+        if( cusparse_status != CUSPARSE_STATUS_SUCCESS ) {
+            std::cerr << "cusparseCreateMatDescr failed: " << cusparse_status << std::endl;
+            return false;
+        }
+
+        cusparseSetMatType( mat_descr, CUSPARSE_MATRIX_TYPE_GENERAL );
+        cusparseSetMatIndexBase( mat_descr, CUSPARSE_INDEX_BASE_ZERO );
+
+        const RealType tol = 1e-16;
+        const int reorder = 0;
+        int singularity = 0;
+
+        if( std::is_same< typename Matrix::RealType, float >::value )
+        {
+            status = cusolverSpScsrlsvqr( handle,
+                                          size,
+                                          this->matrix->getValues().getSize(),
+                                          mat_descr,
+                                          (const float*) this->matrix->getValues().getData(),
+                                          this->matrix->getRowPointers().getData(),
+                                          this->matrix->getColumnIndexes().getData(),
+                                          (const float*) b.getData(),
+                                          tol,
+                                          reorder,
+                                          (float*) x.getData(),
+                                          &singularity );
+
+            if( status != CUSOLVER_STATUS_SUCCESS ) {
+                std::cerr << "cusolverSpScsrlsvqr failed: " << status << std::endl;
+                return false;
+            }
+        }
+
+        if( std::is_same< typename Matrix::RealType, double >::value )
+        {
+            status = cusolverSpDcsrlsvqr( handle,
+                                          size,
+                                          this->matrix->getValues().getSize(),
+                                          mat_descr,
+                                          (const double*) this->matrix->getValues().getData(),
+                                          this->matrix->getRowPointers().getData(),
+                                          this->matrix->getColumnIndexes().getData(),
+                                          (const double*) b.getData(),
+                                          tol,
+                                          reorder,
+                                          (double*) x.getData(),
+                                          &singularity );
+
+            if( status != CUSOLVER_STATUS_SUCCESS ) {
+                std::cerr << "cusolverSpDcsrlsvqr failed: " << status << std::endl;
+                return false;
+            }
+        }
+
+        cusparseDestroyMatDescr( mat_descr );
+        cusolverSpDestroy( handle );
+
+        return true;
+    }
+};
+
+} // namespace Linear
+} // namespace Solvers
+} // namespace TNL
+
+#endif
--- a/src/Benchmarks/LinearSolvers/benchmarks.h
+++ b/src/Benchmarks/LinearSolvers/benchmarks.h
+#pragma once
+
+#include <TNL/Pointers/SharedPointer.h>
+#include <TNL/Config/ParameterContainer.h>
+#include <TNL/Solvers/IterativeSolverMonitor.h>
+#include <TNL/DistributedContainers/DistributedMatrix.h>
+
+#include "../Benchmarks.h"
+
+#ifdef HAVE_ARMADILLO
+#include <armadillo>
+#include <TNL/Matrices/CSR.h>
+#endif
+
+#include <stdexcept>  // std::runtime_error
+
+using namespace TNL;
+using namespace TNL::Pointers;
+using namespace TNL::Benchmarks;
+
+
+template< typename Device >
+const char*
+getPerformer()
+{
+   if( std::is_same< Device, Devices::Cuda >::value )
+      return "GPU";
+   return "CPU";
+}
+
+template< typename Matrix >
+void barrier( const Matrix& matrix )
+{
+}
+
+template< typename Matrix, typename Communicator >
+void barrier( const DistributedContainers::DistributedMatrix< Matrix, Communicator >& matrix )
+{
+   Communicator::Barrier( matrix.getCommunicationGroup() );
+}
+
+template< template<typename> class Preconditioner, typename Matrix >
+void
+benchmarkPreconditionerUpdate( Benchmark& benchmark,
+                               const Config::ParameterContainer& parameters,
+                               const SharedPointer< Matrix >& matrix )
+{
+   barrier( matrix );
+   const char* performer = getPerformer< typename Matrix::DeviceType >();
+   Preconditioner< Matrix > preconditioner;
+
+   auto reset = []() {};
+   auto compute = [&]() {
+      preconditioner.update( matrix );
+      barrier( matrix );
+   };
+
+   benchmark.time( reset, performer, compute );
+}
+
+template< template<typename> class Solver, template<typename> class Preconditioner, typename Matrix, typename Vector >
+void
+benchmarkSolver( Benchmark& benchmark,
+                 const Config::ParameterContainer& parameters,
+                 const SharedPointer< Matrix >& matrix,
+                 const Vector& x0,
+                 const Vector& b )
+{
+   barrier( matrix );
+   const char* performer = getPerformer< typename Matrix::DeviceType >();
+
+   // setup
+   Solver< Matrix > solver;
+   solver.setup( parameters );
+   solver.setMatrix( matrix );
+
+   // FIXME: getMonitor returns solver monitor specialized for double and int
+   solver.setSolverMonitor( benchmark.getMonitor() );
+
+   auto pre = std::make_shared< Preconditioner< Matrix > >();
+   solver.setPreconditioner( pre );
+   // preconditioner update may throw if it's not implemented for CUDA
+   try {
+      pre->update( matrix );
+   }
+   catch ( const std::runtime_error& ) {}
+
+   Vector x;
+   x.setLike( x0 );
+
+   // reset function
+   auto reset = [&]() {
+      x = x0;
+   };
+
+   // benchmark function
+   auto compute = [&]() {
+      const bool converged = solver.solve( b, x );
+      barrier( matrix );
+      if( ! converged )
+         throw std::runtime_error("solver did not converge");
+   };
+
+   benchmark.time( reset, performer, compute );
+
+   // TODO: add extra columns to the benchmark (iterations, preconditioned residue, true residue)
+//   Vector r;
+//   r.setLike( x );
+//   matrix->vectorProduct( x, r );
+//   r.addVector( b, 1.0, -1.0 );
+//   const double trueResidue = r.lpNorm( 2.0 ) / b.lpNorm( 2.0 );
+
+//    std::cout << "Converged: " << solver.checkConvergence() << ", iterations = " << solver.getIterations() << ", residue = " << solver.getResidue() << "    " << std::endl;
+//    std::cout << "Mean time: " << time / loops << " seconds." << std::endl;
+//    std::cout << outputPrefix
+//              << "converged: "   << std::setw( 5) << std::boolalpha << ( ! std::isnan(solver.getResidue()) && solver.getResidue() < solver.getConvergenceResidue() ) << "   "
+//              << "iterations = " << std::setw( 4) << solver.getIterations() << "   "
+//              << "preconditioned residue = " << std::setw(10) << solver.getResidue() << "   "
+//              << "true residue = " << std::setw(10) << trueResidue << "   "
+//              << "mean time = "  << std::setw( 9) << time / loops << " seconds." << std::endl;
+}
+
+#ifdef HAVE_ARMADILLO
+// TODO: make a TNL solver like UmfpackWrapper
+template< typename Vector >
+void
+benchmarkArmadillo( const Config::ParameterContainer& parameters,
+                    const Pointers::SharedPointer< Matrices::CSR< double, Devices::Host, int > >& matrix,
+                    const Vector& x0,
+                    const Vector& b )
+{
+    using namespace TNL;
+
+    // copy matrix into Armadillo's class
+    // sp_mat is in CSC format
+    arma::uvec _colptr( matrix->getRowPointers().getSize() );
+    for( int i = 0; i < matrix->getRowPointers().getSize(); i++ )
+        _colptr[ i ] = matrix->getRowPointers()[ i ];
+    arma::uvec _rowind( matrix->getColumnIndexes().getSize() );
+    for( int i = 0; i < matrix->getColumnIndexes().getSize(); i++ )
+        _rowind[ i ] = matrix->getColumnIndexes()[ i ];
+    arma::vec _values( matrix->getValues().getData(), matrix->getValues().getSize() );
+    arma::sp_mat AT( _rowind,
+                     _colptr,
+                     _values,
+                     matrix->getColumns(),
+                     matrix->getRows() );
+    arma::sp_mat A = AT.t();
+
+    Vector x;
+    x.setLike( x0 );
+
+    // Armadillo vector using the same memory as x (with copy_aux_mem=false, strict=true)
+    arma::vec arma_x( x.getData(), x.getSize(), false, true );
+    arma::vec arma_b( b.getData(), b.getSize() );
+
+    arma::superlu_opts settings;
+//    settings.equilibrate = false;
+    settings.equilibrate = true;
+    settings.pivot_thresh = 1.0;
+//    settings.permutation = arma::superlu_opts::COLAMD;
+    settings.permutation = arma::superlu_opts::MMD_AT_PLUS_A;
+//    settings.refine = arma::superlu_opts::REF_DOUBLE;
+    settings.refine = arma::superlu_opts::REF_NONE;
+
+    // reset function
+    auto reset = [&]() {
+        x = x0;
+    };
+
+    // benchmark function
+    auto compute = [&]() {
+        const bool converged = arma::spsolve( arma_x, A, arma_b, "superlu", settings );
+        if( ! converged )
+            throw std::runtime_error("solver did not converge");
+    };
+
+    const int loops = parameters.getParameter< int >( "loops" );
+    double time = timeFunction( compute, reset, loops );
+
+    arma::vec r = A * arma_x - arma_b;
+//    std::cout << "Converged: " << (time > 0) << ", residue = " << arma::norm( r ) / arma::norm( arma_b ) << "                                                 " << std::endl;
+//    std::cout << "Mean time: " << time / loops << " seconds." << std::endl;
+    std::cout << "Converged: "   << std::setw( 5) << std::boolalpha << (time > 0) << "   "
+              << "iterations = " << std::setw( 4) << "N/A" << "   "
+              << "residue = "    << std::setw(10) << arma::norm( r ) / arma::norm( arma_b ) << "   "
+              << "mean time = "  << std::setw( 9) << time / loops << " seconds." << std::endl;
+}
+#endif
--- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.cpp
+++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.cpp
-/***************************************************************************
-                          tnl-benchmark-linear-solvers.cpp  -  description
-                             -------------------
-    begin                : Jun 22, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
 #include "tnl-benchmark-linear-solvers.h"
--- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.cu
+++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.cu
-/***************************************************************************
-                          tnl-benchmark-linear-solvers.cu  -  description
-                             -------------------
-    begin                : Jun 22, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
 #include "tnl-benchmark-linear-solvers.h"
--- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
+++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h