Rewritten tnl-benchmark-linear-solvers (449d7efe) · Commits · TNL / tnl-dev

src/Benchmarks/Benchmarks.h

+21 −8

Original line number	Diff line number	Diff line
		@@ -16,6 +16,8 @@
		#include <iomanip>
		#include <map>
		#include <vector>
		#include <exception>
		#include <limits>

		#include <TNL/Timer.h>
		#include <TNL/String.h>
		@@ -310,7 +312,6 @@ public:
		{
		closeTable();
		writeTitle( title );
		monitor.setStage( title.getString() );
		}

		// Marks the start of a new benchmark (with custom metadata)
		@@ -320,7 +321,6 @@ public:
		{
		closeTable();
		writeTitle( title );
		monitor.setStage( title.getString() );
		// add loops to metadata
		metadata["loops"] = String(loops);
		writeMetadata( metadata );
		@@ -347,6 +347,7 @@ public:
		const double datasetSize = 0.0, // in GB
		const double baseTime = 0.0 )
		{
		monitor.setStage( operation.getString() );
		if( metadataColumns.size() > 0 && String(metadataColumns[ 0 ].first) == "operation" ) {
		metadataColumns[ 0 ].second = operation;
		}
		@@ -401,6 +402,7 @@ public:
		ComputeFunction & compute )
		{
		double time;
		try {
		if( verbose ) {
		// run the monitor main loop
		Solvers::SolverMonitorThread monitor_thread( monitor );
		@@ -409,6 +411,11 @@ public:
		else {
		time = timeFunction( compute, reset, loops, monitor );
		}
		}
		catch ( const std::exception& e ) {
		std::cerr << "timeFunction failed due to a C++ exception with description: " << e.what() << std::endl;
		time = std::numeric_limits<double>::quiet_NaN();
		}

		const double bandwidth = datasetSize / time;
		const double speedup = this->baseTime / time;
		@@ -450,6 +457,12 @@ public:

		using Logging::save;

		Solvers::IterativeSolverMonitor< double, int >&
		getMonitor()
		{
		return monitor;
		}

		protected:
		int loops;
		double datasetSize = 0.0;

src/Benchmarks/LinearSolvers/CMakeLists.txt

+8 −5

Original line number	Diff line number	Diff line
		if( BUILD_CUDA )
		CUDA_ADD_EXECUTABLE( tnl-benchmark-linear-solvers tnl-benchmark-linear-solvers.cu )
		TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers tnl )
		else()
		CUDA_ADD_EXECUTABLE( tnl-benchmark-linear-solvers-cuda tnl-benchmark-linear-solvers.cu
		OPTIONS -DHAVE_CUSPARSE )
		TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers-cuda tnl ${CUDA_cusparse_LIBRARY} )

		install( TARGETS tnl-benchmark-linear-solvers-cuda RUNTIME DESTINATION bin )
		endif()

		ADD_EXECUTABLE( tnl-benchmark-linear-solvers tnl-benchmark-linear-solvers.cpp )
		TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers tnl )
		endif()

		install( TARGETS tnl-benchmark-linear-solvers RUNTIME DESTINATION bin )

src/Benchmarks/LinearSolvers/CuSolverWrapper.h

0 → 100644

+135 −0

Original line number	Diff line number	Diff line
		#pragma once

		#ifdef HAVE_CUDA

		#include <cusolverSp.h>

		#include <TNL/Config/ConfigDescription.h>
		#include <TNL/Matrices/CSR.h>
		#include <TNL/Solvers/Linear/LinearSolver.h>


		template< typename Matrix >
		struct is_csr_matrix
		{
		static constexpr bool value = false;
		};

		template< typename Real, typename Device, typename Index >
		struct is_csr_matrix< TNL::Matrices::CSR< Real, Device, Index > >
		{
		static constexpr bool value = true;
		};


		namespace TNL {
		namespace Solvers {
		namespace Linear {

		template< typename Matrix >
		class CuSolverWrapper
		: public LinearSolver< Matrix >
		{
		static_assert( ::is_csr_matrix< Matrix >::value, "The CuSolverWrapper solver is available only for CSR matrices." );
		static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "CuSolverWrapper is available only on CUDA" );
		static_assert( std::is_same< typename Matrix::RealType, float >::value \|\|
		std::is_same< typename Matrix::RealType, double >::value, "unsupported RealType" );
		static_assert( std::is_same< typename Matrix::IndexType, int >::value, "unsupported IndexType" );

		using Base = LinearSolver< Matrices::CSR< double, Devices::Cuda, int > >;
		public:
		using RealType = typename Base::RealType;
		using DeviceType = typename Base::DeviceType;
		using IndexType = typename Base::IndexType;
		using VectorViewType = typename Base::VectorViewType;
		using ConstVectorViewType = typename Base::ConstVectorViewType;

		bool solve( ConstVectorViewType b, VectorViewType x ) override
		{
		TNL_ASSERT_EQ( this->matrix->getRows(), this->matrix->getColumns(), "matrix must be square" );
		TNL_ASSERT_EQ( this->matrix->getColumns(), x.getSize(), "wrong size of the solution vector" );
		TNL_ASSERT_EQ( this->matrix->getColumns(), b.getSize(), "wrong size of the right hand side" );

		const IndexType size = this->matrix->getRows();

		this->resetIterations();
		this->setResidue( this->getConvergenceResidue() + 1.0 );

		cusolverSpHandle_t handle;
		cusolverStatus_t status;
		cusparseStatus_t cusparse_status;
		cusparseMatDescr_t mat_descr;

		status = cusolverSpCreate( &handle );
		if( status != CUSOLVER_STATUS_SUCCESS ) {
		std::cerr << "cusolverSpCreate failed: " << status << std::endl;
		return false;
		}

		cusparse_status = cusparseCreateMatDescr( &mat_descr );
		if( cusparse_status != CUSPARSE_STATUS_SUCCESS ) {
		std::cerr << "cusparseCreateMatDescr failed: " << cusparse_status << std::endl;
		return false;
		}

		cusparseSetMatType( mat_descr, CUSPARSE_MATRIX_TYPE_GENERAL );
		cusparseSetMatIndexBase( mat_descr, CUSPARSE_INDEX_BASE_ZERO );

		const RealType tol = 1e-16;
		const int reorder = 0;
		int singularity = 0;

		if( std::is_same< typename Matrix::RealType, float >::value )
		{
		status = cusolverSpScsrlsvqr( handle,
		size,
		this->matrix->getValues().getSize(),
		mat_descr,
		(const float*) this->matrix->getValues().getData(),
		this->matrix->getRowPointers().getData(),
		this->matrix->getColumnIndexes().getData(),
		(const float*) b.getData(),
		tol,
		reorder,
		(float*) x.getData(),
		&singularity );

		if( status != CUSOLVER_STATUS_SUCCESS ) {
		std::cerr << "cusolverSpScsrlsvqr failed: " << status << std::endl;
		return false;
		}
		}

		if( std::is_same< typename Matrix::RealType, double >::value )
		{
		status = cusolverSpDcsrlsvqr( handle,
		size,
		this->matrix->getValues().getSize(),
		mat_descr,
		(const double*) this->matrix->getValues().getData(),
		this->matrix->getRowPointers().getData(),
		this->matrix->getColumnIndexes().getData(),
		(const double*) b.getData(),
		tol,
		reorder,
		(double*) x.getData(),
		&singularity );

		if( status != CUSOLVER_STATUS_SUCCESS ) {
		std::cerr << "cusolverSpDcsrlsvqr failed: " << status << std::endl;
		return false;
		}
		}

		cusparseDestroyMatDescr( mat_descr );
		cusolverSpDestroy( handle );

		return true;
		}
		};

		} // namespace Linear
		} // namespace Solvers
		} // namespace TNL

		#endif

src/Benchmarks/LinearSolvers/benchmarks.h

0 → 100644

+191 −0

Original line number	Diff line number	Diff line
		#pragma once

		#include <TNL/Pointers/SharedPointer.h>
		#include <TNL/Config/ParameterContainer.h>
		#include <TNL/Solvers/IterativeSolverMonitor.h>
		#include <TNL/DistributedContainers/DistributedMatrix.h>

		#include "../Benchmarks.h"

		#ifdef HAVE_ARMADILLO
		#include <armadillo>
		#include <TNL/Matrices/CSR.h>
		#endif

		#include <stdexcept> // std::runtime_error

		using namespace TNL;
		using namespace TNL::Pointers;
		using namespace TNL::Benchmarks;


		template< typename Device >
		const char*
		getPerformer()
		{
		if( std::is_same< Device, Devices::Cuda >::value )
		return "GPU";
		return "CPU";
		}

		template< typename Matrix >
		void barrier( const Matrix& matrix )
		{
		}

		template< typename Matrix, typename Communicator >
		void barrier( const DistributedContainers::DistributedMatrix< Matrix, Communicator >& matrix )
		{
		Communicator::Barrier( matrix.getCommunicationGroup() );
		}

		template< template<typename> class Preconditioner, typename Matrix >
		void
		benchmarkPreconditionerUpdate( Benchmark& benchmark,
		const Config::ParameterContainer& parameters,
		const SharedPointer< Matrix >& matrix )
		{
		barrier( matrix );
		const char* performer = getPerformer< typename Matrix::DeviceType >();
		Preconditioner< Matrix > preconditioner;
		preconditioner.setup( parameters );

		auto reset = []() {};
		auto compute = [&]() {
		preconditioner.update( matrix );
		barrier( matrix );
		};

		benchmark.time( reset, performer, compute );
		}

		template< template<typename> class Solver, template<typename> class Preconditioner, typename Matrix, typename Vector >
		void
		benchmarkSolver( Benchmark& benchmark,
		const Config::ParameterContainer& parameters,
		const SharedPointer< Matrix >& matrix,
		const Vector& x0,
		const Vector& b )
		{
		barrier( matrix );
		const char* performer = getPerformer< typename Matrix::DeviceType >();

		// setup
		Solver< Matrix > solver;
		solver.setup( parameters );
		solver.setMatrix( matrix );

		// FIXME: getMonitor returns solver monitor specialized for double and int
		solver.setSolverMonitor( benchmark.getMonitor() );

		auto pre = std::make_shared< Preconditioner< Matrix > >();
		pre->setup( parameters );
		solver.setPreconditioner( pre );
		// preconditioner update may throw if it's not implemented for CUDA
		try {
		pre->update( matrix );
		}
		catch ( const std::runtime_error& ) {}

		Vector x;
		x.setLike( x0 );

		// reset function
		auto reset = [&]() {
		x = x0;
		};

		// benchmark function
		auto compute = [&]() {
		const bool converged = solver.solve( b, x );
		barrier( matrix );
		if( ! converged )
		throw std::runtime_error("solver did not converge");
		};

		benchmark.time( reset, performer, compute );

		// TODO: add extra columns to the benchmark (iterations, preconditioned residue, true residue)
		// Vector r;
		// r.setLike( x );
		// matrix->vectorProduct( x, r );
		// r.addVector( b, 1.0, -1.0 );
		// const double trueResidue = r.lpNorm( 2.0 ) / b.lpNorm( 2.0 );

		// std::cout << "Converged: " << solver.checkConvergence() << ", iterations = " << solver.getIterations() << ", residue = " << solver.getResidue() << " " << std::endl;
		// std::cout << "Mean time: " << time / loops << " seconds." << std::endl;
		// std::cout << outputPrefix
		// << "converged: " << std::setw( 5) << std::boolalpha << ( ! std::isnan(solver.getResidue()) && solver.getResidue() < solver.getConvergenceResidue() ) << " "
		// << "iterations = " << std::setw( 4) << solver.getIterations() << " "
		// << "preconditioned residue = " << std::setw(10) << solver.getResidue() << " "
		// << "true residue = " << std::setw(10) << trueResidue << " "
		// << "mean time = " << std::setw( 9) << time / loops << " seconds." << std::endl;
		}

		#ifdef HAVE_ARMADILLO
		// TODO: make a TNL solver like UmfpackWrapper
		template< typename Vector >
		void
		benchmarkArmadillo( const Config::ParameterContainer& parameters,
		const Pointers::SharedPointer< Matrices::CSR< double, Devices::Host, int > >& matrix,
		const Vector& x0,
		const Vector& b )
		{
		using namespace TNL;

		// copy matrix into Armadillo's class
		// sp_mat is in CSC format
		arma::uvec _colptr( matrix->getRowPointers().getSize() );
		for( int i = 0; i < matrix->getRowPointers().getSize(); i++ )
		_colptr[ i ] = matrix->getRowPointers()[ i ];
		arma::uvec _rowind( matrix->getColumnIndexes().getSize() );
		for( int i = 0; i < matrix->getColumnIndexes().getSize(); i++ )
		_rowind[ i ] = matrix->getColumnIndexes()[ i ];
		arma::vec _values( matrix->getValues().getData(), matrix->getValues().getSize() );
		arma::sp_mat AT( _rowind,
		_colptr,
		_values,
		matrix->getColumns(),
		matrix->getRows() );
		arma::sp_mat A = AT.t();

		Vector x;
		x.setLike( x0 );

		// Armadillo vector using the same memory as x (with copy_aux_mem=false, strict=true)
		arma::vec arma_x( x.getData(), x.getSize(), false, true );
		arma::vec arma_b( b.getData(), b.getSize() );

		arma::superlu_opts settings;
		// settings.equilibrate = false;
		settings.equilibrate = true;
		settings.pivot_thresh = 1.0;
		// settings.permutation = arma::superlu_opts::COLAMD;
		settings.permutation = arma::superlu_opts::MMD_AT_PLUS_A;
		// settings.refine = arma::superlu_opts::REF_DOUBLE;
		settings.refine = arma::superlu_opts::REF_NONE;

		// reset function
		auto reset = [&]() {
		x = x0;
		};

		// benchmark function
		auto compute = [&]() {
		const bool converged = arma::spsolve( arma_x, A, arma_b, "superlu", settings );
		if( ! converged )
		throw std::runtime_error("solver did not converge");
		};

		const int loops = parameters.getParameter< int >( "loops" );
		double time = timeFunction( compute, reset, loops );

		arma::vec r = A * arma_x - arma_b;
		// std::cout << "Converged: " << (time > 0) << ", residue = " << arma::norm( r ) / arma::norm( arma_b ) << " " << std::endl;
		// std::cout << "Mean time: " << time / loops << " seconds." << std::endl;
		std::cout << "Converged: " << std::setw( 5) << std::boolalpha << (time > 0) << " "
		<< "iterations = " << std::setw( 4) << "N/A" << " "
		<< "residue = " << std::setw(10) << arma::norm( r ) / arma::norm( arma_b ) << " "
		<< "mean time = " << std::setw( 9) << time / loops << " seconds." << std::endl;
		}
		#endif

src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.cpp

+0 −10

Original line number	Diff line number	Diff line
		/***************************************************************************
		tnl-benchmark-linear-solvers.cpp - description
		-------------------
		begin : Jun 22, 2014
		copyright : (C) 2014 by Tomas Oberhuber
		email : tomas.oberhuber@fjfi.cvut.cz
		***************************************************************************/

		/* See Copyright Notice in tnl/Copyright */

		#include "tnl-benchmark-linear-solvers.h"