Skip to content
Snippets Groups Projects

Distributed linear solvers

Merged Jakub Klinkovský requested to merge cineca/mpi into develop
1 file
+ 2
0
Compare changes
  • Side-by-side
  • Inline
+ 215
0
#pragma once
#include <TNL/Pointers/SharedPointer.h>
#include <TNL/Config/ParameterContainer.h>
#include <TNL/Solvers/IterativeSolverMonitor.h>
#include <TNL/DistributedContainers/DistributedMatrix.h>
#include "../Benchmarks.h"
#ifdef HAVE_ARMADILLO
#include <armadillo>
#include <TNL/Matrices/CSR.h>
#endif
#include <stdexcept> // std::runtime_error
using namespace TNL;
using namespace TNL::Pointers;
using namespace TNL::Benchmarks;
template< typename Device >
const char*
getPerformer()
{
if( std::is_same< Device, Devices::Cuda >::value )
return "GPU";
return "CPU";
}
template< typename Matrix >
void barrier( const Matrix& matrix )
{
}
template< typename Matrix, typename Communicator >
void barrier( const DistributedContainers::DistributedMatrix< Matrix, Communicator >& matrix )
{
Communicator::Barrier( matrix.getCommunicationGroup() );
}
template< template<typename> class Preconditioner, typename Matrix >
void
benchmarkPreconditionerUpdate( Benchmark& benchmark,
const Config::ParameterContainer& parameters,
const SharedPointer< Matrix >& matrix )
{
barrier( matrix );
const char* performer = getPerformer< typename Matrix::DeviceType >();
Preconditioner< Matrix > preconditioner;
preconditioner.setup( parameters );
auto reset = []() {};
auto compute = [&]() {
preconditioner.update( matrix );
barrier( matrix );
};
benchmark.time( reset, performer, compute );
}
template< template<typename> class Solver, template<typename> class Preconditioner, typename Matrix, typename Vector >
void
benchmarkSolver( Benchmark& benchmark,
const Config::ParameterContainer& parameters,
const SharedPointer< Matrix >& matrix,
const Vector& x0,
const Vector& b )
{
barrier( matrix );
const char* performer = getPerformer< typename Matrix::DeviceType >();
// setup
Solver< Matrix > solver;
solver.setup( parameters );
solver.setMatrix( matrix );
// FIXME: getMonitor returns solver monitor specialized for double and int
solver.setSolverMonitor( benchmark.getMonitor() );
auto pre = std::make_shared< Preconditioner< Matrix > >();
pre->setup( parameters );
solver.setPreconditioner( pre );
// preconditioner update may throw if it's not implemented for CUDA
try {
pre->update( matrix );
}
catch ( const std::runtime_error& ) {}
Vector x;
x.setLike( x0 );
// reset function
auto reset = [&]() {
x = x0;
};
// benchmark function
auto compute = [&]() {
const bool converged = solver.solve( b, x );
barrier( matrix );
if( ! converged )
throw std::runtime_error("solver did not converge");
};
// subclass BenchmarkResult to add extra columns to the benchmark
// (iterations, preconditioned residue, true residue)
struct MyBenchmarkResult : public BenchmarkResult
{
using HeaderElements = BenchmarkResult::HeaderElements;
using RowElements = BenchmarkResult::RowElements;
Solver< Matrix >& solver;
const SharedPointer< Matrix >& matrix;
const Vector& x;
const Vector& b;
MyBenchmarkResult( Solver< Matrix >& solver,
const SharedPointer< Matrix >& matrix,
const Vector& x,
const Vector& b )
: solver(solver), matrix(matrix), x(x), b(b)
{}
virtual HeaderElements getTableHeader() const override
{
return HeaderElements({"time", "speedup", "converged", "iterations", "residue_precond", "residue_true"});
}
virtual RowElements getRowElements() const override
{
const bool converged = ! std::isnan(solver.getResidue()) && solver.getResidue() < solver.getConvergenceResidue();
const long iterations = solver.getIterations();
const double residue_precond = solver.getResidue();
Vector r;
r.setLike( x );
matrix->vectorProduct( x, r );
r.addVector( b, 1.0, -1.0 );
const double residue_true = r.lpNorm( 2.0 ) / b.lpNorm( 2.0 );
return RowElements({ time, speedup, (double) converged, (double) iterations,
residue_precond, residue_true });
}
};
MyBenchmarkResult benchmarkResult( solver, matrix, x, b );
benchmark.time( reset, performer, compute, benchmarkResult );
}
#ifdef HAVE_ARMADILLO
// TODO: make a TNL solver like UmfpackWrapper
template< typename Vector >
void
benchmarkArmadillo( const Config::ParameterContainer& parameters,
const Pointers::SharedPointer< Matrices::CSR< double, Devices::Host, int > >& matrix,
const Vector& x0,
const Vector& b )
{
// copy matrix into Armadillo's class
// sp_mat is in CSC format
arma::uvec _colptr( matrix->getRowPointers().getSize() );
for( int i = 0; i < matrix->getRowPointers().getSize(); i++ )
_colptr[ i ] = matrix->getRowPointers()[ i ];
arma::uvec _rowind( matrix->getColumnIndexes().getSize() );
for( int i = 0; i < matrix->getColumnIndexes().getSize(); i++ )
_rowind[ i ] = matrix->getColumnIndexes()[ i ];
arma::vec _values( matrix->getValues().getData(), matrix->getValues().getSize() );
arma::sp_mat AT( _rowind,
_colptr,
_values,
matrix->getColumns(),
matrix->getRows() );
arma::sp_mat A = AT.t();
Vector x;
x.setLike( x0 );
// Armadillo vector using the same memory as x (with copy_aux_mem=false, strict=true)
arma::vec arma_x( x.getData(), x.getSize(), false, true );
arma::vec arma_b( b.getData(), b.getSize() );
arma::superlu_opts settings;
// settings.equilibrate = false;
settings.equilibrate = true;
settings.pivot_thresh = 1.0;
// settings.permutation = arma::superlu_opts::COLAMD;
settings.permutation = arma::superlu_opts::MMD_AT_PLUS_A;
// settings.refine = arma::superlu_opts::REF_DOUBLE;
settings.refine = arma::superlu_opts::REF_NONE;
// reset function
auto reset = [&]() {
x = x0;
};
// benchmark function
auto compute = [&]() {
const bool converged = arma::spsolve( arma_x, A, arma_b, "superlu", settings );
if( ! converged )
throw std::runtime_error("solver did not converge");
};
const int loops = parameters.getParameter< int >( "loops" );
double time = timeFunction( compute, reset, loops );
arma::vec r = A * arma_x - arma_b;
// std::cout << "Converged: " << (time > 0) << ", residue = " << arma::norm( r ) / arma::norm( arma_b ) << " " << std::endl;
// std::cout << "Mean time: " << time / loops << " seconds." << std::endl;
std::cout << "Converged: " << std::setw( 5) << std::boolalpha << (time > 0) << " "
<< "iterations = " << std::setw( 4) << "N/A" << " "
<< "residue = " << std::setw(10) << arma::norm( r ) / arma::norm( arma_b ) << " "
<< "mean time = " << std::setw( 9) << time / loops << " seconds." << std::endl;
}
#endif
Loading