Loading src/Benchmarks/Benchmarks.h +21 −8 Original line number Diff line number Diff line Loading @@ -16,6 +16,8 @@ #include <iomanip> #include <map> #include <vector> #include <exception> #include <limits> #include <TNL/Timer.h> #include <TNL/String.h> Loading Loading @@ -310,7 +312,6 @@ public: { closeTable(); writeTitle( title ); monitor.setStage( title.getString() ); } // Marks the start of a new benchmark (with custom metadata) Loading @@ -320,7 +321,6 @@ public: { closeTable(); writeTitle( title ); monitor.setStage( title.getString() ); // add loops to metadata metadata["loops"] = String(loops); writeMetadata( metadata ); Loading @@ -347,6 +347,7 @@ public: const double datasetSize = 0.0, // in GB const double baseTime = 0.0 ) { monitor.setStage( operation.getString() ); if( metadataColumns.size() > 0 && String(metadataColumns[ 0 ].first) == "operation" ) { metadataColumns[ 0 ].second = operation; } Loading Loading @@ -401,6 +402,7 @@ public: ComputeFunction & compute ) { double time; try { if( verbose ) { // run the monitor main loop Solvers::SolverMonitorThread monitor_thread( monitor ); Loading @@ -409,6 +411,11 @@ public: else { time = timeFunction( compute, reset, loops, monitor ); } } catch ( const std::exception& e ) { std::cerr << "timeFunction failed due to a C++ exception with description: " << e.what() << std::endl; time = std::numeric_limits<double>::quiet_NaN(); } const double bandwidth = datasetSize / time; const double speedup = this->baseTime / time; Loading Loading @@ -450,6 +457,12 @@ public: using Logging::save; Solvers::IterativeSolverMonitor< double, int >& getMonitor() { return monitor; } protected: int loops; double datasetSize = 0.0; Loading src/Benchmarks/LinearSolvers/CMakeLists.txt +8 −5 Original line number Diff line number Diff line if( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( tnl-benchmark-linear-solvers tnl-benchmark-linear-solvers.cu ) TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers tnl ) else() CUDA_ADD_EXECUTABLE( tnl-benchmark-linear-solvers-cuda tnl-benchmark-linear-solvers.cu OPTIONS -DHAVE_CUSPARSE ) TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers-cuda tnl ${CUDA_cusparse_LIBRARY} ) install( TARGETS tnl-benchmark-linear-solvers-cuda RUNTIME DESTINATION bin ) endif() ADD_EXECUTABLE( tnl-benchmark-linear-solvers tnl-benchmark-linear-solvers.cpp ) TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers tnl ) endif() install( TARGETS tnl-benchmark-linear-solvers RUNTIME DESTINATION bin ) src/Benchmarks/LinearSolvers/CuSolverWrapper.h 0 → 100644 +135 −0 Original line number Diff line number Diff line #pragma once #ifdef HAVE_CUDA #include <cusolverSp.h> #include <TNL/Config/ConfigDescription.h> #include <TNL/Matrices/CSR.h> #include <TNL/Solvers/Linear/LinearSolver.h> template< typename Matrix > struct is_csr_matrix { static constexpr bool value = false; }; template< typename Real, typename Device, typename Index > struct is_csr_matrix< TNL::Matrices::CSR< Real, Device, Index > > { static constexpr bool value = true; }; namespace TNL { namespace Solvers { namespace Linear { template< typename Matrix > class CuSolverWrapper : public LinearSolver< Matrix > { static_assert( ::is_csr_matrix< Matrix >::value, "The CuSolverWrapper solver is available only for CSR matrices." ); static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "CuSolverWrapper is available only on CUDA" ); static_assert( std::is_same< typename Matrix::RealType, float >::value || std::is_same< typename Matrix::RealType, double >::value, "unsupported RealType" ); static_assert( std::is_same< typename Matrix::IndexType, int >::value, "unsupported IndexType" ); using Base = LinearSolver< Matrices::CSR< double, Devices::Cuda, int > >; public: using RealType = typename Base::RealType; using DeviceType = typename Base::DeviceType; using IndexType = typename Base::IndexType; using VectorViewType = typename Base::VectorViewType; using ConstVectorViewType = typename Base::ConstVectorViewType; bool solve( ConstVectorViewType b, VectorViewType x ) override { TNL_ASSERT_EQ( this->matrix->getRows(), this->matrix->getColumns(), "matrix must be square" ); TNL_ASSERT_EQ( this->matrix->getColumns(), x.getSize(), "wrong size of the solution vector" ); TNL_ASSERT_EQ( this->matrix->getColumns(), b.getSize(), "wrong size of the right hand side" ); const IndexType size = this->matrix->getRows(); this->resetIterations(); this->setResidue( this->getConvergenceResidue() + 1.0 ); cusolverSpHandle_t handle; cusolverStatus_t status; cusparseStatus_t cusparse_status; cusparseMatDescr_t mat_descr; status = cusolverSpCreate( &handle ); if( status != CUSOLVER_STATUS_SUCCESS ) { std::cerr << "cusolverSpCreate failed: " << status << std::endl; return false; } cusparse_status = cusparseCreateMatDescr( &mat_descr ); if( cusparse_status != CUSPARSE_STATUS_SUCCESS ) { std::cerr << "cusparseCreateMatDescr failed: " << cusparse_status << std::endl; return false; } cusparseSetMatType( mat_descr, CUSPARSE_MATRIX_TYPE_GENERAL ); cusparseSetMatIndexBase( mat_descr, CUSPARSE_INDEX_BASE_ZERO ); const RealType tol = 1e-16; const int reorder = 0; int singularity = 0; if( std::is_same< typename Matrix::RealType, float >::value ) { status = cusolverSpScsrlsvqr( handle, size, this->matrix->getValues().getSize(), mat_descr, (const float*) this->matrix->getValues().getData(), this->matrix->getRowPointers().getData(), this->matrix->getColumnIndexes().getData(), (const float*) b.getData(), tol, reorder, (float*) x.getData(), &singularity ); if( status != CUSOLVER_STATUS_SUCCESS ) { std::cerr << "cusolverSpScsrlsvqr failed: " << status << std::endl; return false; } } if( std::is_same< typename Matrix::RealType, double >::value ) { status = cusolverSpDcsrlsvqr( handle, size, this->matrix->getValues().getSize(), mat_descr, (const double*) this->matrix->getValues().getData(), this->matrix->getRowPointers().getData(), this->matrix->getColumnIndexes().getData(), (const double*) b.getData(), tol, reorder, (double*) x.getData(), &singularity ); if( status != CUSOLVER_STATUS_SUCCESS ) { std::cerr << "cusolverSpDcsrlsvqr failed: " << status << std::endl; return false; } } cusparseDestroyMatDescr( mat_descr ); cusolverSpDestroy( handle ); return true; } }; } // namespace Linear } // namespace Solvers } // namespace TNL #endif src/Benchmarks/LinearSolvers/benchmarks.h 0 → 100644 +191 −0 Original line number Diff line number Diff line #pragma once #include <TNL/Pointers/SharedPointer.h> #include <TNL/Config/ParameterContainer.h> #include <TNL/Solvers/IterativeSolverMonitor.h> #include <TNL/DistributedContainers/DistributedMatrix.h> #include "../Benchmarks.h" #ifdef HAVE_ARMADILLO #include <armadillo> #include <TNL/Matrices/CSR.h> #endif #include <stdexcept> // std::runtime_error using namespace TNL; using namespace TNL::Pointers; using namespace TNL::Benchmarks; template< typename Device > const char* getPerformer() { if( std::is_same< Device, Devices::Cuda >::value ) return "GPU"; return "CPU"; } template< typename Matrix > void barrier( const Matrix& matrix ) { } template< typename Matrix, typename Communicator > void barrier( const DistributedContainers::DistributedMatrix< Matrix, Communicator >& matrix ) { Communicator::Barrier( matrix.getCommunicationGroup() ); } template< template<typename> class Preconditioner, typename Matrix > void benchmarkPreconditionerUpdate( Benchmark& benchmark, const Config::ParameterContainer& parameters, const SharedPointer< Matrix >& matrix ) { barrier( matrix ); const char* performer = getPerformer< typename Matrix::DeviceType >(); Preconditioner< Matrix > preconditioner; preconditioner.setup( parameters ); auto reset = []() {}; auto compute = [&]() { preconditioner.update( matrix ); barrier( matrix ); }; benchmark.time( reset, performer, compute ); } template< template<typename> class Solver, template<typename> class Preconditioner, typename Matrix, typename Vector > void benchmarkSolver( Benchmark& benchmark, const Config::ParameterContainer& parameters, const SharedPointer< Matrix >& matrix, const Vector& x0, const Vector& b ) { barrier( matrix ); const char* performer = getPerformer< typename Matrix::DeviceType >(); // setup Solver< Matrix > solver; solver.setup( parameters ); solver.setMatrix( matrix ); // FIXME: getMonitor returns solver monitor specialized for double and int solver.setSolverMonitor( benchmark.getMonitor() ); auto pre = std::make_shared< Preconditioner< Matrix > >(); pre->setup( parameters ); solver.setPreconditioner( pre ); // preconditioner update may throw if it's not implemented for CUDA try { pre->update( matrix ); } catch ( const std::runtime_error& ) {} Vector x; x.setLike( x0 ); // reset function auto reset = [&]() { x = x0; }; // benchmark function auto compute = [&]() { const bool converged = solver.solve( b, x ); barrier( matrix ); if( ! converged ) throw std::runtime_error("solver did not converge"); }; benchmark.time( reset, performer, compute ); // TODO: add extra columns to the benchmark (iterations, preconditioned residue, true residue) // Vector r; // r.setLike( x ); // matrix->vectorProduct( x, r ); // r.addVector( b, 1.0, -1.0 ); // const double trueResidue = r.lpNorm( 2.0 ) / b.lpNorm( 2.0 ); // std::cout << "Converged: " << solver.checkConvergence() << ", iterations = " << solver.getIterations() << ", residue = " << solver.getResidue() << " " << std::endl; // std::cout << "Mean time: " << time / loops << " seconds." << std::endl; // std::cout << outputPrefix // << "converged: " << std::setw( 5) << std::boolalpha << ( ! std::isnan(solver.getResidue()) && solver.getResidue() < solver.getConvergenceResidue() ) << " " // << "iterations = " << std::setw( 4) << solver.getIterations() << " " // << "preconditioned residue = " << std::setw(10) << solver.getResidue() << " " // << "true residue = " << std::setw(10) << trueResidue << " " // << "mean time = " << std::setw( 9) << time / loops << " seconds." << std::endl; } #ifdef HAVE_ARMADILLO // TODO: make a TNL solver like UmfpackWrapper template< typename Vector > void benchmarkArmadillo( const Config::ParameterContainer& parameters, const Pointers::SharedPointer< Matrices::CSR< double, Devices::Host, int > >& matrix, const Vector& x0, const Vector& b ) { using namespace TNL; // copy matrix into Armadillo's class // sp_mat is in CSC format arma::uvec _colptr( matrix->getRowPointers().getSize() ); for( int i = 0; i < matrix->getRowPointers().getSize(); i++ ) _colptr[ i ] = matrix->getRowPointers()[ i ]; arma::uvec _rowind( matrix->getColumnIndexes().getSize() ); for( int i = 0; i < matrix->getColumnIndexes().getSize(); i++ ) _rowind[ i ] = matrix->getColumnIndexes()[ i ]; arma::vec _values( matrix->getValues().getData(), matrix->getValues().getSize() ); arma::sp_mat AT( _rowind, _colptr, _values, matrix->getColumns(), matrix->getRows() ); arma::sp_mat A = AT.t(); Vector x; x.setLike( x0 ); // Armadillo vector using the same memory as x (with copy_aux_mem=false, strict=true) arma::vec arma_x( x.getData(), x.getSize(), false, true ); arma::vec arma_b( b.getData(), b.getSize() ); arma::superlu_opts settings; // settings.equilibrate = false; settings.equilibrate = true; settings.pivot_thresh = 1.0; // settings.permutation = arma::superlu_opts::COLAMD; settings.permutation = arma::superlu_opts::MMD_AT_PLUS_A; // settings.refine = arma::superlu_opts::REF_DOUBLE; settings.refine = arma::superlu_opts::REF_NONE; // reset function auto reset = [&]() { x = x0; }; // benchmark function auto compute = [&]() { const bool converged = arma::spsolve( arma_x, A, arma_b, "superlu", settings ); if( ! converged ) throw std::runtime_error("solver did not converge"); }; const int loops = parameters.getParameter< int >( "loops" ); double time = timeFunction( compute, reset, loops ); arma::vec r = A * arma_x - arma_b; // std::cout << "Converged: " << (time > 0) << ", residue = " << arma::norm( r ) / arma::norm( arma_b ) << " " << std::endl; // std::cout << "Mean time: " << time / loops << " seconds." << std::endl; std::cout << "Converged: " << std::setw( 5) << std::boolalpha << (time > 0) << " " << "iterations = " << std::setw( 4) << "N/A" << " " << "residue = " << std::setw(10) << arma::norm( r ) / arma::norm( arma_b ) << " " << "mean time = " << std::setw( 9) << time / loops << " seconds." << std::endl; } #endif src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.cpp +0 −10 Original line number Diff line number Diff line /*************************************************************************** tnl-benchmark-linear-solvers.cpp - description ------------------- begin : Jun 22, 2014 copyright : (C) 2014 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ #include "tnl-benchmark-linear-solvers.h" Loading
src/Benchmarks/Benchmarks.h +21 −8 Original line number Diff line number Diff line Loading @@ -16,6 +16,8 @@ #include <iomanip> #include <map> #include <vector> #include <exception> #include <limits> #include <TNL/Timer.h> #include <TNL/String.h> Loading Loading @@ -310,7 +312,6 @@ public: { closeTable(); writeTitle( title ); monitor.setStage( title.getString() ); } // Marks the start of a new benchmark (with custom metadata) Loading @@ -320,7 +321,6 @@ public: { closeTable(); writeTitle( title ); monitor.setStage( title.getString() ); // add loops to metadata metadata["loops"] = String(loops); writeMetadata( metadata ); Loading @@ -347,6 +347,7 @@ public: const double datasetSize = 0.0, // in GB const double baseTime = 0.0 ) { monitor.setStage( operation.getString() ); if( metadataColumns.size() > 0 && String(metadataColumns[ 0 ].first) == "operation" ) { metadataColumns[ 0 ].second = operation; } Loading Loading @@ -401,6 +402,7 @@ public: ComputeFunction & compute ) { double time; try { if( verbose ) { // run the monitor main loop Solvers::SolverMonitorThread monitor_thread( monitor ); Loading @@ -409,6 +411,11 @@ public: else { time = timeFunction( compute, reset, loops, monitor ); } } catch ( const std::exception& e ) { std::cerr << "timeFunction failed due to a C++ exception with description: " << e.what() << std::endl; time = std::numeric_limits<double>::quiet_NaN(); } const double bandwidth = datasetSize / time; const double speedup = this->baseTime / time; Loading Loading @@ -450,6 +457,12 @@ public: using Logging::save; Solvers::IterativeSolverMonitor< double, int >& getMonitor() { return monitor; } protected: int loops; double datasetSize = 0.0; Loading
src/Benchmarks/LinearSolvers/CMakeLists.txt +8 −5 Original line number Diff line number Diff line if( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( tnl-benchmark-linear-solvers tnl-benchmark-linear-solvers.cu ) TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers tnl ) else() CUDA_ADD_EXECUTABLE( tnl-benchmark-linear-solvers-cuda tnl-benchmark-linear-solvers.cu OPTIONS -DHAVE_CUSPARSE ) TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers-cuda tnl ${CUDA_cusparse_LIBRARY} ) install( TARGETS tnl-benchmark-linear-solvers-cuda RUNTIME DESTINATION bin ) endif() ADD_EXECUTABLE( tnl-benchmark-linear-solvers tnl-benchmark-linear-solvers.cpp ) TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers tnl ) endif() install( TARGETS tnl-benchmark-linear-solvers RUNTIME DESTINATION bin )
src/Benchmarks/LinearSolvers/CuSolverWrapper.h 0 → 100644 +135 −0 Original line number Diff line number Diff line #pragma once #ifdef HAVE_CUDA #include <cusolverSp.h> #include <TNL/Config/ConfigDescription.h> #include <TNL/Matrices/CSR.h> #include <TNL/Solvers/Linear/LinearSolver.h> template< typename Matrix > struct is_csr_matrix { static constexpr bool value = false; }; template< typename Real, typename Device, typename Index > struct is_csr_matrix< TNL::Matrices::CSR< Real, Device, Index > > { static constexpr bool value = true; }; namespace TNL { namespace Solvers { namespace Linear { template< typename Matrix > class CuSolverWrapper : public LinearSolver< Matrix > { static_assert( ::is_csr_matrix< Matrix >::value, "The CuSolverWrapper solver is available only for CSR matrices." ); static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "CuSolverWrapper is available only on CUDA" ); static_assert( std::is_same< typename Matrix::RealType, float >::value || std::is_same< typename Matrix::RealType, double >::value, "unsupported RealType" ); static_assert( std::is_same< typename Matrix::IndexType, int >::value, "unsupported IndexType" ); using Base = LinearSolver< Matrices::CSR< double, Devices::Cuda, int > >; public: using RealType = typename Base::RealType; using DeviceType = typename Base::DeviceType; using IndexType = typename Base::IndexType; using VectorViewType = typename Base::VectorViewType; using ConstVectorViewType = typename Base::ConstVectorViewType; bool solve( ConstVectorViewType b, VectorViewType x ) override { TNL_ASSERT_EQ( this->matrix->getRows(), this->matrix->getColumns(), "matrix must be square" ); TNL_ASSERT_EQ( this->matrix->getColumns(), x.getSize(), "wrong size of the solution vector" ); TNL_ASSERT_EQ( this->matrix->getColumns(), b.getSize(), "wrong size of the right hand side" ); const IndexType size = this->matrix->getRows(); this->resetIterations(); this->setResidue( this->getConvergenceResidue() + 1.0 ); cusolverSpHandle_t handle; cusolverStatus_t status; cusparseStatus_t cusparse_status; cusparseMatDescr_t mat_descr; status = cusolverSpCreate( &handle ); if( status != CUSOLVER_STATUS_SUCCESS ) { std::cerr << "cusolverSpCreate failed: " << status << std::endl; return false; } cusparse_status = cusparseCreateMatDescr( &mat_descr ); if( cusparse_status != CUSPARSE_STATUS_SUCCESS ) { std::cerr << "cusparseCreateMatDescr failed: " << cusparse_status << std::endl; return false; } cusparseSetMatType( mat_descr, CUSPARSE_MATRIX_TYPE_GENERAL ); cusparseSetMatIndexBase( mat_descr, CUSPARSE_INDEX_BASE_ZERO ); const RealType tol = 1e-16; const int reorder = 0; int singularity = 0; if( std::is_same< typename Matrix::RealType, float >::value ) { status = cusolverSpScsrlsvqr( handle, size, this->matrix->getValues().getSize(), mat_descr, (const float*) this->matrix->getValues().getData(), this->matrix->getRowPointers().getData(), this->matrix->getColumnIndexes().getData(), (const float*) b.getData(), tol, reorder, (float*) x.getData(), &singularity ); if( status != CUSOLVER_STATUS_SUCCESS ) { std::cerr << "cusolverSpScsrlsvqr failed: " << status << std::endl; return false; } } if( std::is_same< typename Matrix::RealType, double >::value ) { status = cusolverSpDcsrlsvqr( handle, size, this->matrix->getValues().getSize(), mat_descr, (const double*) this->matrix->getValues().getData(), this->matrix->getRowPointers().getData(), this->matrix->getColumnIndexes().getData(), (const double*) b.getData(), tol, reorder, (double*) x.getData(), &singularity ); if( status != CUSOLVER_STATUS_SUCCESS ) { std::cerr << "cusolverSpDcsrlsvqr failed: " << status << std::endl; return false; } } cusparseDestroyMatDescr( mat_descr ); cusolverSpDestroy( handle ); return true; } }; } // namespace Linear } // namespace Solvers } // namespace TNL #endif
src/Benchmarks/LinearSolvers/benchmarks.h 0 → 100644 +191 −0 Original line number Diff line number Diff line #pragma once #include <TNL/Pointers/SharedPointer.h> #include <TNL/Config/ParameterContainer.h> #include <TNL/Solvers/IterativeSolverMonitor.h> #include <TNL/DistributedContainers/DistributedMatrix.h> #include "../Benchmarks.h" #ifdef HAVE_ARMADILLO #include <armadillo> #include <TNL/Matrices/CSR.h> #endif #include <stdexcept> // std::runtime_error using namespace TNL; using namespace TNL::Pointers; using namespace TNL::Benchmarks; template< typename Device > const char* getPerformer() { if( std::is_same< Device, Devices::Cuda >::value ) return "GPU"; return "CPU"; } template< typename Matrix > void barrier( const Matrix& matrix ) { } template< typename Matrix, typename Communicator > void barrier( const DistributedContainers::DistributedMatrix< Matrix, Communicator >& matrix ) { Communicator::Barrier( matrix.getCommunicationGroup() ); } template< template<typename> class Preconditioner, typename Matrix > void benchmarkPreconditionerUpdate( Benchmark& benchmark, const Config::ParameterContainer& parameters, const SharedPointer< Matrix >& matrix ) { barrier( matrix ); const char* performer = getPerformer< typename Matrix::DeviceType >(); Preconditioner< Matrix > preconditioner; preconditioner.setup( parameters ); auto reset = []() {}; auto compute = [&]() { preconditioner.update( matrix ); barrier( matrix ); }; benchmark.time( reset, performer, compute ); } template< template<typename> class Solver, template<typename> class Preconditioner, typename Matrix, typename Vector > void benchmarkSolver( Benchmark& benchmark, const Config::ParameterContainer& parameters, const SharedPointer< Matrix >& matrix, const Vector& x0, const Vector& b ) { barrier( matrix ); const char* performer = getPerformer< typename Matrix::DeviceType >(); // setup Solver< Matrix > solver; solver.setup( parameters ); solver.setMatrix( matrix ); // FIXME: getMonitor returns solver monitor specialized for double and int solver.setSolverMonitor( benchmark.getMonitor() ); auto pre = std::make_shared< Preconditioner< Matrix > >(); pre->setup( parameters ); solver.setPreconditioner( pre ); // preconditioner update may throw if it's not implemented for CUDA try { pre->update( matrix ); } catch ( const std::runtime_error& ) {} Vector x; x.setLike( x0 ); // reset function auto reset = [&]() { x = x0; }; // benchmark function auto compute = [&]() { const bool converged = solver.solve( b, x ); barrier( matrix ); if( ! converged ) throw std::runtime_error("solver did not converge"); }; benchmark.time( reset, performer, compute ); // TODO: add extra columns to the benchmark (iterations, preconditioned residue, true residue) // Vector r; // r.setLike( x ); // matrix->vectorProduct( x, r ); // r.addVector( b, 1.0, -1.0 ); // const double trueResidue = r.lpNorm( 2.0 ) / b.lpNorm( 2.0 ); // std::cout << "Converged: " << solver.checkConvergence() << ", iterations = " << solver.getIterations() << ", residue = " << solver.getResidue() << " " << std::endl; // std::cout << "Mean time: " << time / loops << " seconds." << std::endl; // std::cout << outputPrefix // << "converged: " << std::setw( 5) << std::boolalpha << ( ! std::isnan(solver.getResidue()) && solver.getResidue() < solver.getConvergenceResidue() ) << " " // << "iterations = " << std::setw( 4) << solver.getIterations() << " " // << "preconditioned residue = " << std::setw(10) << solver.getResidue() << " " // << "true residue = " << std::setw(10) << trueResidue << " " // << "mean time = " << std::setw( 9) << time / loops << " seconds." << std::endl; } #ifdef HAVE_ARMADILLO // TODO: make a TNL solver like UmfpackWrapper template< typename Vector > void benchmarkArmadillo( const Config::ParameterContainer& parameters, const Pointers::SharedPointer< Matrices::CSR< double, Devices::Host, int > >& matrix, const Vector& x0, const Vector& b ) { using namespace TNL; // copy matrix into Armadillo's class // sp_mat is in CSC format arma::uvec _colptr( matrix->getRowPointers().getSize() ); for( int i = 0; i < matrix->getRowPointers().getSize(); i++ ) _colptr[ i ] = matrix->getRowPointers()[ i ]; arma::uvec _rowind( matrix->getColumnIndexes().getSize() ); for( int i = 0; i < matrix->getColumnIndexes().getSize(); i++ ) _rowind[ i ] = matrix->getColumnIndexes()[ i ]; arma::vec _values( matrix->getValues().getData(), matrix->getValues().getSize() ); arma::sp_mat AT( _rowind, _colptr, _values, matrix->getColumns(), matrix->getRows() ); arma::sp_mat A = AT.t(); Vector x; x.setLike( x0 ); // Armadillo vector using the same memory as x (with copy_aux_mem=false, strict=true) arma::vec arma_x( x.getData(), x.getSize(), false, true ); arma::vec arma_b( b.getData(), b.getSize() ); arma::superlu_opts settings; // settings.equilibrate = false; settings.equilibrate = true; settings.pivot_thresh = 1.0; // settings.permutation = arma::superlu_opts::COLAMD; settings.permutation = arma::superlu_opts::MMD_AT_PLUS_A; // settings.refine = arma::superlu_opts::REF_DOUBLE; settings.refine = arma::superlu_opts::REF_NONE; // reset function auto reset = [&]() { x = x0; }; // benchmark function auto compute = [&]() { const bool converged = arma::spsolve( arma_x, A, arma_b, "superlu", settings ); if( ! converged ) throw std::runtime_error("solver did not converge"); }; const int loops = parameters.getParameter< int >( "loops" ); double time = timeFunction( compute, reset, loops ); arma::vec r = A * arma_x - arma_b; // std::cout << "Converged: " << (time > 0) << ", residue = " << arma::norm( r ) / arma::norm( arma_b ) << " " << std::endl; // std::cout << "Mean time: " << time / loops << " seconds." << std::endl; std::cout << "Converged: " << std::setw( 5) << std::boolalpha << (time > 0) << " " << "iterations = " << std::setw( 4) << "N/A" << " " << "residue = " << std::setw(10) << arma::norm( r ) / arma::norm( arma_b ) << " " << "mean time = " << std::setw( 9) << time / loops << " seconds." << std::endl; } #endif
src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.cpp +0 −10 Original line number Diff line number Diff line /*************************************************************************** tnl-benchmark-linear-solvers.cpp - description ------------------- begin : Jun 22, 2014 copyright : (C) 2014 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ #include "tnl-benchmark-linear-solvers.h"