Skip to content
Snippets Groups Projects
Commit 616f0cbe authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Rewritten tnl-benchmark-linear-solvers

parent 51d16ef0
No related branches found
No related tags found
No related merge requests found
......@@ -16,6 +16,8 @@
#include <iomanip>
#include <map>
#include <vector>
#include <exception>
#include <limits>
#include <TNL/Timer.h>
#include <TNL/String.h>
......@@ -310,7 +312,6 @@ public:
{
closeTable();
writeTitle( title );
monitor.setStage( title.getString() );
}
// Marks the start of a new benchmark (with custom metadata)
......@@ -320,7 +321,6 @@ public:
{
closeTable();
writeTitle( title );
monitor.setStage( title.getString() );
// add loops to metadata
metadata["loops"] = String(loops);
writeMetadata( metadata );
......@@ -347,6 +347,7 @@ public:
const double datasetSize = 0.0, // in GB
const double baseTime = 0.0 )
{
monitor.setStage( operation.getString() );
if( metadataColumns.size() > 0 && String(metadataColumns[ 0 ].first) == "operation" ) {
metadataColumns[ 0 ].second = operation;
}
......@@ -401,13 +402,19 @@ public:
ComputeFunction & compute )
{
double time;
if( verbose ) {
// run the monitor main loop
Solvers::SolverMonitorThread monitor_thread( monitor );
time = timeFunction( compute, reset, loops, monitor );
try {
if( verbose ) {
// run the monitor main loop
Solvers::SolverMonitorThread monitor_thread( monitor );
time = timeFunction( compute, reset, loops, monitor );
}
else {
time = timeFunction( compute, reset, loops, monitor );
}
}
else {
time = timeFunction( compute, reset, loops, monitor );
catch ( const std::exception& e ) {
std::cerr << "timeFunction failed due to a C++ exception with description: " << e.what() << std::endl;
time = std::numeric_limits<double>::quiet_NaN();
}
const double bandwidth = datasetSize / time;
......@@ -450,6 +457,12 @@ public:
using Logging::save;
Solvers::IterativeSolverMonitor< double, int >&
getMonitor()
{
return monitor;
}
protected:
int loops;
double datasetSize = 0.0;
......
if( BUILD_CUDA )
CUDA_ADD_EXECUTABLE( tnl-benchmark-linear-solvers tnl-benchmark-linear-solvers.cu )
TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers tnl )
else()
ADD_EXECUTABLE( tnl-benchmark-linear-solvers tnl-benchmark-linear-solvers.cpp )
TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers tnl )
CUDA_ADD_EXECUTABLE( tnl-benchmark-linear-solvers-cuda tnl-benchmark-linear-solvers.cu
OPTIONS -DHAVE_CUSPARSE )
TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers-cuda tnl ${CUDA_cusparse_LIBRARY} )
install( TARGETS tnl-benchmark-linear-solvers-cuda RUNTIME DESTINATION bin )
endif()
ADD_EXECUTABLE( tnl-benchmark-linear-solvers tnl-benchmark-linear-solvers.cpp )
TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers tnl )
install( TARGETS tnl-benchmark-linear-solvers RUNTIME DESTINATION bin )
#pragma once
#ifdef HAVE_CUDA
#include <cusolverSp.h>
#include <TNL/Config/ConfigDescription.h>
#include <TNL/Matrices/CSR.h>
#include <TNL/Solvers/Linear/LinearSolver.h>
template< typename Matrix >
struct is_csr_matrix
{
static constexpr bool value = false;
};
template< typename Real, typename Device, typename Index >
struct is_csr_matrix< TNL::Matrices::CSR< Real, Device, Index > >
{
static constexpr bool value = true;
};
namespace TNL {
namespace Solvers {
namespace Linear {
template< typename Matrix >
class CuSolverWrapper
: public LinearSolver< Matrix >
{
static_assert( ::is_csr_matrix< Matrix >::value, "The CuSolverWrapper solver is available only for CSR matrices." );
static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "CuSolverWrapper is available only on CUDA" );
static_assert( std::is_same< typename Matrix::RealType, float >::value ||
std::is_same< typename Matrix::RealType, double >::value, "unsupported RealType" );
static_assert( std::is_same< typename Matrix::IndexType, int >::value, "unsupported IndexType" );
using Base = LinearSolver< Matrices::CSR< double, Devices::Cuda, int > >;
public:
using RealType = typename Base::RealType;
using DeviceType = typename Base::DeviceType;
using IndexType = typename Base::IndexType;
using VectorViewType = typename Base::VectorViewType;
using ConstVectorViewType = typename Base::ConstVectorViewType;
bool solve( ConstVectorViewType b, VectorViewType x ) override
{
TNL_ASSERT_EQ( this->matrix->getRows(), this->matrix->getColumns(), "matrix must be square" );
TNL_ASSERT_EQ( this->matrix->getColumns(), x.getSize(), "wrong size of the solution vector" );
TNL_ASSERT_EQ( this->matrix->getColumns(), b.getSize(), "wrong size of the right hand side" );
const IndexType size = this->matrix->getRows();
this->resetIterations();
this->setResidue( this->getConvergenceResidue() + 1.0 );
cusolverSpHandle_t handle;
cusolverStatus_t status;
cusparseStatus_t cusparse_status;
cusparseMatDescr_t mat_descr;
status = cusolverSpCreate( &handle );
if( status != CUSOLVER_STATUS_SUCCESS ) {
std::cerr << "cusolverSpCreate failed: " << status << std::endl;
return false;
}
cusparse_status = cusparseCreateMatDescr( &mat_descr );
if( cusparse_status != CUSPARSE_STATUS_SUCCESS ) {
std::cerr << "cusparseCreateMatDescr failed: " << cusparse_status << std::endl;
return false;
}
cusparseSetMatType( mat_descr, CUSPARSE_MATRIX_TYPE_GENERAL );
cusparseSetMatIndexBase( mat_descr, CUSPARSE_INDEX_BASE_ZERO );
const RealType tol = 1e-16;
const int reorder = 0;
int singularity = 0;
if( std::is_same< typename Matrix::RealType, float >::value )
{
status = cusolverSpScsrlsvqr( handle,
size,
this->matrix->getValues().getSize(),
mat_descr,
(const float*) this->matrix->getValues().getData(),
this->matrix->getRowPointers().getData(),
this->matrix->getColumnIndexes().getData(),
(const float*) b.getData(),
tol,
reorder,
(float*) x.getData(),
&singularity );
if( status != CUSOLVER_STATUS_SUCCESS ) {
std::cerr << "cusolverSpScsrlsvqr failed: " << status << std::endl;
return false;
}
}
if( std::is_same< typename Matrix::RealType, double >::value )
{
status = cusolverSpDcsrlsvqr( handle,
size,
this->matrix->getValues().getSize(),
mat_descr,
(const double*) this->matrix->getValues().getData(),
this->matrix->getRowPointers().getData(),
this->matrix->getColumnIndexes().getData(),
(const double*) b.getData(),
tol,
reorder,
(double*) x.getData(),
&singularity );
if( status != CUSOLVER_STATUS_SUCCESS ) {
std::cerr << "cusolverSpDcsrlsvqr failed: " << status << std::endl;
return false;
}
}
cusparseDestroyMatDescr( mat_descr );
cusolverSpDestroy( handle );
return true;
}
};
} // namespace Linear
} // namespace Solvers
} // namespace TNL
#endif
#pragma once
#include <TNL/Pointers/SharedPointer.h>
#include <TNL/Config/ParameterContainer.h>
#include <TNL/Solvers/IterativeSolverMonitor.h>
#include <TNL/DistributedContainers/DistributedMatrix.h>
#include "../Benchmarks.h"
#ifdef HAVE_ARMADILLO
#include <armadillo>
#include <TNL/Matrices/CSR.h>
#endif
#include <stdexcept> // std::runtime_error
using namespace TNL;
using namespace TNL::Pointers;
using namespace TNL::Benchmarks;
template< typename Device >
const char*
getPerformer()
{
if( std::is_same< Device, Devices::Cuda >::value )
return "GPU";
return "CPU";
}
template< typename Matrix >
void barrier( const Matrix& matrix )
{
}
template< typename Matrix, typename Communicator >
void barrier( const DistributedContainers::DistributedMatrix< Matrix, Communicator >& matrix )
{
Communicator::Barrier( matrix.getCommunicationGroup() );
}
template< template<typename> class Preconditioner, typename Matrix >
void
benchmarkPreconditionerUpdate( Benchmark& benchmark,
const Config::ParameterContainer& parameters,
const SharedPointer< Matrix >& matrix )
{
barrier( matrix );
const char* performer = getPerformer< typename Matrix::DeviceType >();
Preconditioner< Matrix > preconditioner;
auto reset = []() {};
auto compute = [&]() {
preconditioner.update( matrix );
barrier( matrix );
};
benchmark.time( reset, performer, compute );
}
template< template<typename> class Solver, template<typename> class Preconditioner, typename Matrix, typename Vector >
void
benchmarkSolver( Benchmark& benchmark,
const Config::ParameterContainer& parameters,
const SharedPointer< Matrix >& matrix,
const Vector& x0,
const Vector& b )
{
barrier( matrix );
const char* performer = getPerformer< typename Matrix::DeviceType >();
// setup
Solver< Matrix > solver;
solver.setup( parameters );
solver.setMatrix( matrix );
// FIXME: getMonitor returns solver monitor specialized for double and int
solver.setSolverMonitor( benchmark.getMonitor() );
auto pre = std::make_shared< Preconditioner< Matrix > >();
solver.setPreconditioner( pre );
// preconditioner update may throw if it's not implemented for CUDA
try {
pre->update( matrix );
}
catch ( const std::runtime_error& ) {}
Vector x;
x.setLike( x0 );
// reset function
auto reset = [&]() {
x = x0;
};
// benchmark function
auto compute = [&]() {
const bool converged = solver.solve( b, x );
barrier( matrix );
if( ! converged )
throw std::runtime_error("solver did not converge");
};
benchmark.time( reset, performer, compute );
// TODO: add extra columns to the benchmark (iterations, preconditioned residue, true residue)
// Vector r;
// r.setLike( x );
// matrix->vectorProduct( x, r );
// r.addVector( b, 1.0, -1.0 );
// const double trueResidue = r.lpNorm( 2.0 ) / b.lpNorm( 2.0 );
// std::cout << "Converged: " << solver.checkConvergence() << ", iterations = " << solver.getIterations() << ", residue = " << solver.getResidue() << " " << std::endl;
// std::cout << "Mean time: " << time / loops << " seconds." << std::endl;
// std::cout << outputPrefix
// << "converged: " << std::setw( 5) << std::boolalpha << ( ! std::isnan(solver.getResidue()) && solver.getResidue() < solver.getConvergenceResidue() ) << " "
// << "iterations = " << std::setw( 4) << solver.getIterations() << " "
// << "preconditioned residue = " << std::setw(10) << solver.getResidue() << " "
// << "true residue = " << std::setw(10) << trueResidue << " "
// << "mean time = " << std::setw( 9) << time / loops << " seconds." << std::endl;
}
#ifdef HAVE_ARMADILLO
// TODO: make a TNL solver like UmfpackWrapper
template< typename Vector >
void
benchmarkArmadillo( const Config::ParameterContainer& parameters,
const Pointers::SharedPointer< Matrices::CSR< double, Devices::Host, int > >& matrix,
const Vector& x0,
const Vector& b )
{
using namespace TNL;
// copy matrix into Armadillo's class
// sp_mat is in CSC format
arma::uvec _colptr( matrix->getRowPointers().getSize() );
for( int i = 0; i < matrix->getRowPointers().getSize(); i++ )
_colptr[ i ] = matrix->getRowPointers()[ i ];
arma::uvec _rowind( matrix->getColumnIndexes().getSize() );
for( int i = 0; i < matrix->getColumnIndexes().getSize(); i++ )
_rowind[ i ] = matrix->getColumnIndexes()[ i ];
arma::vec _values( matrix->getValues().getData(), matrix->getValues().getSize() );
arma::sp_mat AT( _rowind,
_colptr,
_values,
matrix->getColumns(),
matrix->getRows() );
arma::sp_mat A = AT.t();
Vector x;
x.setLike( x0 );
// Armadillo vector using the same memory as x (with copy_aux_mem=false, strict=true)
arma::vec arma_x( x.getData(), x.getSize(), false, true );
arma::vec arma_b( b.getData(), b.getSize() );
arma::superlu_opts settings;
// settings.equilibrate = false;
settings.equilibrate = true;
settings.pivot_thresh = 1.0;
// settings.permutation = arma::superlu_opts::COLAMD;
settings.permutation = arma::superlu_opts::MMD_AT_PLUS_A;
// settings.refine = arma::superlu_opts::REF_DOUBLE;
settings.refine = arma::superlu_opts::REF_NONE;
// reset function
auto reset = [&]() {
x = x0;
};
// benchmark function
auto compute = [&]() {
const bool converged = arma::spsolve( arma_x, A, arma_b, "superlu", settings );
if( ! converged )
throw std::runtime_error("solver did not converge");
};
const int loops = parameters.getParameter< int >( "loops" );
double time = timeFunction( compute, reset, loops );
arma::vec r = A * arma_x - arma_b;
// std::cout << "Converged: " << (time > 0) << ", residue = " << arma::norm( r ) / arma::norm( arma_b ) << " " << std::endl;
// std::cout << "Mean time: " << time / loops << " seconds." << std::endl;
std::cout << "Converged: " << std::setw( 5) << std::boolalpha << (time > 0) << " "
<< "iterations = " << std::setw( 4) << "N/A" << " "
<< "residue = " << std::setw(10) << arma::norm( r ) / arma::norm( arma_b ) << " "
<< "mean time = " << std::setw( 9) << time / loops << " seconds." << std::endl;
}
#endif
/***************************************************************************
tnl-benchmark-linear-solvers.cpp - description
-------------------
begin : Jun 22, 2014
copyright : (C) 2014 by Tomas Oberhuber
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
#include "tnl-benchmark-linear-solvers.h"
/***************************************************************************
tnl-benchmark-linear-solvers.cu - description
-------------------
begin : Jun 22, 2014
copyright : (C) 2014 by Tomas Oberhuber
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
#include "tnl-benchmark-linear-solvers.h"
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment