diff --git a/CMakeLists.txt b/CMakeLists.txt index 0ef868cc7622d04150a5358e5b6506e98b03b11e..68252ba6a74c45adc73dbd4e18b0afb5c81e5a67 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,11 +25,11 @@ option(WITH_GMP "Build with GMP support" OFF) option(WITH_TESTS "Build tests" ON) option(WITH_PROFILING "Enable code profiling compiler flags" OFF ) option(WITH_COVERAGE "Enable code coverage reports from unit tests" OFF) -option(WITH_EXAMPLES "Compile the 'examples' directory" ON) +option(WITH_EXAMPLES "Compile the 'src/Examples' directory" ON) option(WITH_TOOLS "Compile the 'src/Tools' directory" ON) option(WITH_BENCHMARKS "Compile the 'src/Benchmarks' directory" ON) option(WITH_PYTHON "Compile the Python bindings" ON) -option(WITH_DOC "Generate documentation" ON) +option(WITH_DOC "Build examples included in the documentation" ON) # install paths relative to the cmake's prefix set( TNL_TARGET_INCLUDE_DIRECTORY "include/TNL" ) @@ -81,10 +81,10 @@ set( CMAKE_CXX_STANDARD 14 ) set( CMAKE_CXX_STANDARD_REQUIRED ON ) set( CMAKE_CXX_EXTENSIONS OFF ) -# set Debug/Release options +# set default build options set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-unknown-pragmas" ) set( CMAKE_CXX_FLAGS_DEBUG "-g" ) -set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native -DNDEBUG" ) +set( CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG" ) # pass -rdynamic only in Debug mode set( CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "" ) set( CMAKE_SHARED_LIBRARY_LINK_C_FLAGS_DEBUG "-rdynamic" ) @@ -96,6 +96,13 @@ set( CMAKE_SHARED_LINKER_FLAGS "" ) set( CMAKE_SHARED_LINKER_FLAGS_DEBUG "-rdynamic" ) set( CMAKE_SHARED_LINKER_FLAGS_RELEASE "" ) +# set additional Debug/Release options using generator expressions +# (that way we can exclude some options for specific targets, see https://stackoverflow.com/a/59734798 for details) +add_compile_options( + # GOTCHA: CMake uses semicolons as list item separator, spaces would lead to a single argument inside double-quotes on the command line + "$<$:-march=native;-mtune=native>" +) + # disable GCC's infamous "maybe-uninitialized" warning (it produces mostly false positives) if( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-maybe-uninitialized" ) @@ -300,7 +307,7 @@ add_subdirectory( src ) add_subdirectory( share ) # Add subdirectories for examples included in the documentation -if( ${WITH_DOC} OR ${WITH_EXAMPLES} ) +if( ${WITH_DOC} ) set( TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH "${CMAKE_SOURCE_DIR}/Documentation/output_snippets" ) file(MAKE_DIRECTORY ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}) add_subdirectory( Documentation/Examples ) diff --git a/build b/build index 68966f0f423d13fd2cfb69a380fe02ea9bc25e8f..76169a7c4b20cdc8c6750c23abd7aa6fb675c5e6 100755 --- a/build +++ b/build @@ -87,10 +87,11 @@ if [[ ${HELP} == "yes" ]]; then echo " --with-profiling=yes/no Enables code profiling compiler falgs. 'no' by default." echo " --with-coverage=yes/no Enables code coverage reports for unit tests. 'no' by default (lcov is required)." echo " --with-doc=yes/no Build documentation. 'yes' by default." - echo " --with-examples=yes/no Compile the 'examples' directory. 'yes' by default." + echo " --with-examples=yes/no Compile the 'src/Examples' directory. 'yes' by default." echo " --with-tools=yes/no Compile the 'src/Tools' directory. 'yes' by default." echo " --with-python=yes/no Compile the Python bindings. 'yes' by default." echo " --with-benchmarks=yes/no Compile the 'src/Benchmarks' directory. 'yes' by default." + echo " --with-doc=yes/no Generate the documentation. 'yes' by default." echo " --cmake=CMAKE Path to cmake. 'cmake' by default." echo " --verbose It enables verbose build." echo " --root-dir=PATH Path to the TNL source code root dir." diff --git a/src/Benchmarks/BLAS/CMakeLists.txt b/src/Benchmarks/BLAS/CMakeLists.txt index 3f040abc893464efb196d5a40ef1e1f3edbcbbab..81d83753329658c960b47a08d3f1ac58881bd9ff 100644 --- a/src/Benchmarks/BLAS/CMakeLists.txt +++ b/src/Benchmarks/BLAS/CMakeLists.txt @@ -5,16 +5,28 @@ else() add_executable( tnl-benchmark-blas tnl-benchmark-blas.cpp ) endif() -find_library( CBLAS_LIBRARY NAMES cblas - PATHS /usr/lib - /usr/lib64 - /usr/lib/x86_64-linux-gnu - /usr/local/lib - /usr/local/lib64 ) +find_library( CBLAS_LIBRARY NAMES cblas ) + +# fallback for Centos 7.5 - libcblas.so does not exist, link to libtatlas.so or libsatlas.so +# https://forums.centos.org/viewtopic.php?t=48543 +find_library( TATLAS_LIBRARY NAMES tatlas + PATH_SUFFIXES atlas ) +find_library( SATLAS_LIBRARY NAMES satlas + PATH_SUFFIXES atlas ) + if( CBLAS_LIBRARY ) target_compile_definitions( tnl-benchmark-blas PUBLIC "-DHAVE_BLAS" ) target_link_libraries( tnl-benchmark-blas ${CBLAS_LIBRARY} ) +elseif( TATLAS_LIBRARY ) + target_compile_definitions( tnl-benchmark-blas PUBLIC "-DHAVE_BLAS" ) + target_link_libraries( tnl-benchmark-blas ${TATLAS_LIBRARY} ) +elseif( SATLAS_LIBRARY ) + target_compile_definitions( tnl-benchmark-blas PUBLIC "-DHAVE_BLAS" ) + target_link_libraries( tnl-benchmark-blas ${SATLAS_LIBRARY} ) else() + # FIXME: We require the CBLAS interface, but CMake's FindBLAS cannot detect that, + # so this fails unless the BLAS implementation includes it in the same + # shared library file as the Fortran implementation (e.g. OpenBLAS does that). find_package( BLAS ) if( BLAS_FOUND ) target_compile_definitions( tnl-benchmark-blas PUBLIC "-DHAVE_BLAS" ) diff --git a/src/Benchmarks/BLAS/blasWrappers.h b/src/Benchmarks/BLAS/blasWrappers.h index d1e0edff119c733126db27cdcc913cd189d4468e..ce30060bf399fa80c8119dc185edb84a5e3010ca 100644 --- a/src/Benchmarks/BLAS/blasWrappers.h +++ b/src/Benchmarks/BLAS/blasWrappers.h @@ -2,7 +2,13 @@ #ifdef HAVE_BLAS +// HOTFIX: cblas.h from the atlas-devel package (version 3.10.1-12.el7) on CentOS 7 +// does not declare the functions as `extern "C"`, which breaks name mangling. +// Note that nested `extern "C"` is valid and correct: +// https://stackoverflow.com/questions/48099828/what-happens-if-you-nest-extern-c +extern "C" { #include +} inline int blasIgamax( int n, const float *x, int incx ) { diff --git a/src/Benchmarks/LinearSolvers/benchmarks.h b/src/Benchmarks/LinearSolvers/benchmarks.h index 577907c78ab13253c6c46c9db4d182b00d11957e..7b22cdfc1ef30aaddc29cc35067962ed4530f257 100644 --- a/src/Benchmarks/LinearSolvers/benchmarks.h +++ b/src/Benchmarks/LinearSolvers/benchmarks.h @@ -145,7 +145,7 @@ benchmarkSolver( Benchmark& benchmark, virtual HeaderElements getTableHeader() const override { - return HeaderElements({"time", "speedup", "converged", "iterations", "residue_precond", "residue_true"}); + return HeaderElements({"time", "stddev", "stddev/time", "speedup", "converged", "iterations", "residue_precond", "residue_true"}); } virtual RowElements getRowElements() const override @@ -160,7 +160,7 @@ benchmarkSolver( Benchmark& benchmark, r = b - r; const double residue_true = lpNorm( r, 2.0 ) / lpNorm( b, 2.0 ); - return RowElements({ time, speedup, (double) converged, (double) iterations, + return RowElements({ time, stddev, stddev/time, speedup, (double) converged, (double) iterations, residue_precond, residue_true }); } }; diff --git a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h index 0701b647a42416e6439cd02a1ef10157512210f3..4aabf39cd4bae98bc411fcc95feef56672b039ca 100644 --- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h +++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h @@ -15,6 +15,7 @@ #include #include #include +#include #ifndef NDEBUG #include @@ -30,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -68,12 +70,19 @@ using CommunicatorType = Communicators::NoDistrCommunicator; static const std::set< std::string > valid_solvers = { "gmres", - "cwygmres", "tfqmr", "bicgstab", "bicgstab-ell", }; +static const std::set< std::string > valid_gmres_variants = { + "CGS", + "CGSR", + "MGS", + "MGSR", + "CWY", +}; + static const std::set< std::string > valid_preconditioners = { "jacobi", "ilu0", @@ -85,19 +94,19 @@ parse_comma_list( const Config::ParameterContainer& parameters, const char* parameter, const std::set< std::string >& options ) { - const String solvers = parameters.getParameter< String >( parameter ); + const String param = parameters.getParameter< String >( parameter ); - if( solvers == "all" ) + if( param == "all" ) return options; - std::stringstream ss( solvers.getString() ); + std::stringstream ss( param.getString() ); std::string s; std::set< std::string > set; while( std::getline( ss, s, ',' ) ) { if( ! options.count( s ) ) throw std::logic_error( std::string("Invalid value in the comma-separated list for the parameter '") - + parameter + "': '" + s + "'. The list contains: '" + solvers.getString() + "'." ); + + parameter + "': '" + s + "'. The list contains: '" + param.getString() + "'." ); set.insert( s ); @@ -108,6 +117,42 @@ parse_comma_list( const Config::ParameterContainer& parameters, return set; } +// TODO: implement this in TNL::String +bool ends_with( const std::string& value, const std::string& ending ) +{ + if (ending.size() > value.size()) + return false; + return std::equal(ending.rbegin(), ending.rend(), value.rbegin()); +} + +// initialize all vector entries with a unioformly distributed random value from the interval [a, b] +template< typename Vector > +void set_random_vector( Vector& v, typename Vector::RealType a, typename Vector::RealType b ) +{ + using RealType = typename Vector::RealType; + using IndexType = typename Vector::IndexType; + // random device will be used to obtain a seed for the random number engine + std::random_device rd; + // initialize the standard mersenne_twister_engine with rd() as the seed + std::mt19937 gen(rd()); + // create uniform distribution + std::uniform_real_distribution< RealType > dis(a, b); + + // create host vector + typename Vector::template Self< RealType, Devices::Host > host_v; + host_v.setSize( v.getSize() ); + + // initialize the host vector + auto kernel = [&] ( IndexType i ) + { + host_v[ i ] = dis(gen); + }; + Algorithms::ParallelFor< Devices::Host >::exec( (IndexType) 0, host_v.getSize(), kernel ); + + // copy the data to the device vector + v = host_v; +} + template< typename Matrix, typename Vector > void benchmarkIterativeSolvers( Benchmark& benchmark, @@ -138,6 +183,7 @@ benchmarkIterativeSolvers( Benchmark& benchmark, const int ell_max = 2; const std::set< std::string > solvers = parse_comma_list( parameters, "solvers", valid_solvers ); + const std::set< std::string > gmresVariants = parse_comma_list( parameters, "gmres-variants", valid_gmres_variants ); const std::set< std::string > preconditioners = parse_comma_list( parameters, "preconditioners", valid_preconditioners ); const bool with_preconditioner_update = parameters.getParameter< bool >( "with-preconditioner-update" ); @@ -151,21 +197,14 @@ benchmarkIterativeSolvers( Benchmark& benchmark, } if( solvers.count( "gmres" ) ) { - benchmark.setOperation("GMRES (Jacobi)"); - parameters.template setParameter< String >( "gmres-variant", "MGSR" ); - benchmarkSolver< GMRES, Diagonal >( benchmark, parameters, matrixPointer, x0, b ); - #ifdef HAVE_CUDA - benchmarkSolver< GMRES, Diagonal >( benchmark, parameters, cudaMatrixPointer, cuda_x0, cuda_b ); - #endif - } - - if( solvers.count( "cwygmres" ) ) { - benchmark.setOperation("CWYGMRES (Jacobi)"); - parameters.template setParameter< String >( "gmres-variant", "CWY" ); - benchmarkSolver< GMRES, Diagonal >( benchmark, parameters, matrixPointer, x0, b ); - #ifdef HAVE_CUDA - benchmarkSolver< GMRES, Diagonal >( benchmark, parameters, cudaMatrixPointer, cuda_x0, cuda_b ); - #endif + for( auto variant : gmresVariants ) { + benchmark.setOperation(variant + "-GMRES (Jacobi)"); + parameters.template setParameter< String >( "gmres-variant", variant.c_str() ); + benchmarkSolver< GMRES, Diagonal >( benchmark, parameters, matrixPointer, x0, b ); + #ifdef HAVE_CUDA + benchmarkSolver< GMRES, Diagonal >( benchmark, parameters, cudaMatrixPointer, cuda_x0, cuda_b ); + #endif + } } if( solvers.count( "tfqmr" ) ) { @@ -208,21 +247,14 @@ benchmarkIterativeSolvers( Benchmark& benchmark, } if( solvers.count( "gmres" ) ) { - benchmark.setOperation("GMRES (ILU0)"); - parameters.template setParameter< String >( "gmres-variant", "MGSR" ); - benchmarkSolver< GMRES, ILU0 >( benchmark, parameters, matrixPointer, x0, b ); - #ifdef HAVE_CUDA - benchmarkSolver< GMRES, ILU0 >( benchmark, parameters, cudaMatrixPointer, cuda_x0, cuda_b ); - #endif - } - - if( solvers.count( "cwygmres" ) ) { - benchmark.setOperation("CWYGMRES (ILU0)"); - parameters.template setParameter< String >( "gmres-variant", "CWY" ); - benchmarkSolver< GMRES, ILU0 >( benchmark, parameters, matrixPointer, x0, b ); - #ifdef HAVE_CUDA - benchmarkSolver< GMRES, ILU0 >( benchmark, parameters, cudaMatrixPointer, cuda_x0, cuda_b ); - #endif + for( auto variant : gmresVariants ) { + benchmark.setOperation(variant + "-GMRES (ILU0)"); + parameters.template setParameter< String >( "gmres-variant", variant.c_str() ); + benchmarkSolver< GMRES, ILU0 >( benchmark, parameters, matrixPointer, x0, b ); + #ifdef HAVE_CUDA + benchmarkSolver< GMRES, ILU0 >( benchmark, parameters, cudaMatrixPointer, cuda_x0, cuda_b ); + #endif + } } if( solvers.count( "tfqmr" ) ) { @@ -264,21 +296,14 @@ benchmarkIterativeSolvers( Benchmark& benchmark, } if( solvers.count( "gmres" ) ) { - benchmark.setOperation("GMRES (ILUT)"); - parameters.template setParameter< String >( "gmres-variant", "MGSR" ); - benchmarkSolver< GMRES, ILUT >( benchmark, parameters, matrixPointer, x0, b ); - #ifdef HAVE_CUDA - benchmarkSolver< GMRES, ILUT >( benchmark, parameters, cudaMatrixPointer, cuda_x0, cuda_b ); - #endif - } - - if( solvers.count( "cwygmres" ) ) { - benchmark.setOperation("CWYGMRES (ILUT)"); - parameters.template setParameter< String >( "gmres-variant", "CWY" ); - benchmarkSolver< GMRES, ILUT >( benchmark, parameters, matrixPointer, x0, b ); - #ifdef HAVE_CUDA - benchmarkSolver< GMRES, ILUT >( benchmark, parameters, cudaMatrixPointer, cuda_x0, cuda_b ); - #endif + for( auto variant : gmresVariants ) { + benchmark.setOperation(variant + "-GMRES (ILUT)"); + parameters.template setParameter< String >( "gmres-variant", variant.c_str() ); + benchmarkSolver< GMRES, ILUT >( benchmark, parameters, matrixPointer, x0, b ); + #ifdef HAVE_CUDA + benchmarkSolver< GMRES, ILUT >( benchmark, parameters, cudaMatrixPointer, cuda_x0, cuda_b ); + #endif + } } if( solvers.count( "tfqmr" ) ) { @@ -330,11 +355,42 @@ struct LinearSolversBenchmark // const Config::ParameterContainer& parameters ) Config::ParameterContainer& parameters ) { + const String file_matrix = parameters.getParameter< String >( "input-matrix" ); + const String file_dof = parameters.getParameter< String >( "input-dof" ); + const String file_rhs = parameters.getParameter< String >( "input-rhs" ); + SharedPointer< MatrixType > matrixPointer; VectorType x0, b; - matrixPointer->load( parameters.getParameter< String >( "input-matrix" ) ); - File( parameters.getParameter< String >( "input-dof" ), std::ios_base::in ) >> x0; - File( parameters.getParameter< String >( "input-rhs" ), std::ios_base::in ) >> b; + + // load the matrix + if( ends_with( file_matrix, ".mtx" ) ) { + Matrices::MatrixReader< MatrixType > reader; + if( ! reader.readMtxFile( file_matrix, *matrixPointer ) ) + return false; + } + else { + matrixPointer->load( file_matrix ); + } + + // load the vectors + if( file_dof && file_rhs ) { + File( file_dof, std::ios_base::in ) >> x0; + File( file_rhs, std::ios_base::in ) >> b; + } + else { + // set x0 := 0 + x0.setSize( matrixPointer->getColumns() ); + x0 = 0; + + // generate random vector x + VectorType x; + x.setSize( matrixPointer->getColumns() ); + set_random_vector( x, 1e2, 1e3 ); + + // set b := A*x + b.setSize( matrixPointer->getRows() ); + matrixPointer->vectorProduct( x, b ); + } typename MatrixType::CompressedRowLengthsVector rowLengths; matrixPointer->getCompressedRowLengths( rowLengths ); @@ -488,14 +544,15 @@ configSetup( Config::ConfigDescription& config ) config.addEntryEnum( "append" ); config.addEntryEnum( "overwrite" ); config.addEntry< int >( "loops", "Number of repetitions of the benchmark.", 10 ); - config.addRequiredEntry< String >( "input-matrix", "File name of the input matrix (in binary TNL format)." ); - config.addRequiredEntry< String >( "input-dof", "File name of the input DOF vector (in binary TNL format)." ); - config.addRequiredEntry< String >( "input-rhs", "File name of the input right-hand-side vector (in binary TNL format)." ); + config.addRequiredEntry< String >( "input-matrix", "File name of the input matrix (in binary TNL format or textual MTX format)." ); + config.addEntry< String >( "input-dof", "File name of the input DOF vector (in binary TNL format).", "" ); + config.addEntry< String >( "input-rhs", "File name of the input right-hand-side vector (in binary TNL format).", "" ); config.addEntry< String >( "name", "Name of the matrix in the benchmark.", "" ); config.addEntry< int >( "verbose", "Verbose mode.", 1 ); config.addEntry< bool >( "reorder-dofs", "Reorder matrix entries corresponding to the same DOF together.", false ); config.addEntry< bool >( "with-direct", "Includes the 3rd party direct solvers in the benchmark.", false ); - config.addEntry< String >( "solvers", "Comma-separated list of solvers to run benchmarks for. Options: gmres, cwygmres, tfqmr, bicgstab, bicgstab-ell.", "all" ); + config.addEntry< String >( "solvers", "Comma-separated list of solvers to run benchmarks for. Options: gmres, tfqmr, bicgstab, bicgstab-ell.", "all" ); + config.addEntry< String >( "gmres-variants", "Comma-separated list of GMRES variants to run benchmarks for. Options: CGS, CGSR, MGS, MGSR, CWY.", "all" ); config.addEntry< String >( "preconditioners", "Comma-separated list of preconditioners to run benchmarks for. Options: jacobi, ilu0, ilut.", "all" ); config.addEntry< bool >( "with-preconditioner-update", "Run benchmark for the preconditioner update.", true ); config.addEntry< String >( "devices", "Run benchmarks on these devices.", "all" ); diff --git a/src/Python/pytnl/tnl/CMakeLists.txt b/src/Python/pytnl/tnl/CMakeLists.txt index b6c4182b88d7f8c4d1f2ca9acb8478daec0bd45e..de405e5e50c0549e2cf846b06a82d1acb07d9414 100644 --- a/src/Python/pytnl/tnl/CMakeLists.txt +++ b/src/Python/pytnl/tnl/CMakeLists.txt @@ -16,6 +16,16 @@ pybind11_add_module( pytnl ${sources} ) # rename the shared library to tnl.cpython-XXm-x86_64-linux-gnu.so set_target_properties( pytnl PROPERTIES LIBRARY_OUTPUT_NAME tnl ) +# Skip -march=native -mtune=native for pytnl - optimizing python bindings for +# a specific architecture is not very useful and prevents using Python tools on +# hybrid clusters. +get_target_property( pytnl_COMPILE_OPTIONS pytnl COMPILE_OPTIONS ) +if( pytnl_COMPILE_OPTIONS ) + string( REPLACE "-march=native" "" pytnl_COMPILE_OPTIONS "${pytnl_COMPILE_OPTIONS}" ) + string( REPLACE "-mtune=native" "" pytnl_COMPILE_OPTIONS "${pytnl_COMPILE_OPTIONS}" ) + set_target_properties( pytnl PROPERTIES COMPILE_OPTIONS "${pytnl_COMPILE_OPTIONS}" ) +endif() + # We have bindings for unsafe objects (e.g. Array::operator[]) where assertion # is the only safeguard, so we need to translate the TNL::AssertionError to # Python's AssertionError. diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h index dd6ddc07245150c859946e97b2fde3a66021aaa3..0415d5f8d1d60af87f82a6efff5f2a5ea4d18ab7 100644 --- a/src/TNL/Matrices/MatrixReader_impl.h +++ b/src/TNL/Matrices/MatrixReader_impl.h @@ -165,7 +165,7 @@ template< typename Matrix > bool MatrixReader< Matrix >::checkMtxHeader( const String& header, bool& symmetric ) { - std::vector< String > parsedLine = header.split(); + std::vector< String > parsedLine = header.split( ' ', String::SplitSkip::SkipEmpty ); if( (int) parsedLine.size() < 5 ) return false; if( parsedLine[ 0 ] != "%%MatrixMarket" ) @@ -231,7 +231,7 @@ bool MatrixReader< Matrix >::readMtxHeader( std::istream& file, return false; } - parsedLine = line.split(); + parsedLine = line.split( ' ', String::SplitSkip::SkipEmpty ); if( (int) parsedLine.size() != 3 ) { std::cerr << "Wrong number of parameters in the matrix header." << std::endl; @@ -389,7 +389,7 @@ bool MatrixReader< Matrix >::parseMtxLineWithElement( const String& line, IndexType& column, RealType& value ) { - std::vector< String > parsedLine = line.split(); + std::vector< String > parsedLine = line.split( ' ', String::SplitSkip::SkipEmpty ); if( (int) parsedLine.size() != 3 ) { std::cerr << "Wrong number of parameters in the matrix row at line:" << line << std::endl; diff --git a/src/TNL/Solvers/IterativeSolver.h b/src/TNL/Solvers/IterativeSolver.h index 6550e5e497c461d26879ca6eeb3e9e989e04ab07..75487fa0197a51b79897384de3b12ecec12c667b 100644 --- a/src/TNL/Solvers/IterativeSolver.h +++ b/src/TNL/Solvers/IterativeSolver.h @@ -2,7 +2,7 @@ IterativeSolver.h - description ------------------- begin : Oct 19, 2012 - copyright : (C) 2012 by Tomas Oberhuber + copyright : (C) 2012 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ @@ -10,6 +10,8 @@ #pragma once +#include + #include #include #include @@ -22,17 +24,16 @@ template< typename Real, typename SolverMonitor = IterativeSolverMonitor< Real, Index > > class IterativeSolver { - public: +public: + using SolverMonitorType = SolverMonitor; - using SolverMonitorType = SolverMonitor; - - IterativeSolver(); + IterativeSolver() = default; static void configSetup( Config::ConfigDescription& config, const String& prefix = "" ); bool setup( const Config::ParameterContainer& parameters, - const String& prefix = "" ); + const String& prefix = "" ); void setMaxIterations( const Index& maxIterations ); @@ -70,27 +71,27 @@ class IterativeSolver void refreshSolverMonitor( bool force = false ); +protected: + Index maxIterations = 1000000000; - protected: + Index minIterations = 0; - Index maxIterations; + Index currentIteration = 0; - Index minIterations; + Real convergenceResidue = 1e-6; - Index currentIteration; + // If the current residue is greater than divergenceResidue, the solver is stopped. + Real divergenceResidue = std::numeric_limits< float >::max(); - Real convergenceResidue; + Real currentResidue = 0; - /**** - * If the current residue is over divergenceResidue the solver is stopped. - */ - Real divergenceResidue; + SolverMonitor* solverMonitor = nullptr; - Real currentResidue; + Index refreshRate = 1; - SolverMonitor* solverMonitor; + String residualHistoryFileName = ""; - Index refreshRate; + std::ofstream residualHistoryFile; }; } // namespace Solvers diff --git a/src/TNL/Solvers/IterativeSolver_impl.h b/src/TNL/Solvers/IterativeSolver_impl.h index 528a08ce6febb0a55a0c8b127d9aa90a925f2c18..8f28e8847f52ede82e68d6117312638c499fac5b 100644 --- a/src/TNL/Solvers/IterativeSolver_impl.h +++ b/src/TNL/Solvers/IterativeSolver_impl.h @@ -2,7 +2,7 @@ IterativeSolver_impl.h - description ------------------- begin : Oct 19, 2012 - copyright : (C) 2012 by Tomas Oberhuber + copyright : (C) 2012 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ @@ -11,34 +11,21 @@ #pragma once #include -#include -#include #include "IterativeSolver.h" namespace TNL { -namespace Solvers { +namespace Solvers { template< typename Real, typename Index, typename SolverMonitor > -IterativeSolver< Real, Index, SolverMonitor >::IterativeSolver() -: maxIterations( 1000000000 ), - minIterations( 0 ), - currentIteration( 0 ), - convergenceResidue( 1.0e-6 ), - divergenceResidue( DBL_MAX ), - currentResidue( 0 ), - solverMonitor( 0 ), - refreshRate( 1 ) -{ -}; - -template< typename Real, typename Index, typename SolverMonitor > -void IterativeSolver< Real, Index, SolverMonitor >::configSetup( Config::ConfigDescription& config, - const String& prefix ) +void +IterativeSolver< Real, Index, SolverMonitor >:: +configSetup( Config::ConfigDescription& config, + const String& prefix ) { config.addEntry< int > ( prefix + "max-iterations", "Maximal number of iterations the solver may perform.", 1000000000 ); config.addEntry< int > ( prefix + "min-iterations", "Minimal number of iterations the solver must perform.", 0 ); - + // The default value for the convergence residue MUST be zero since not in all problems we want to stop the solver // when we reach a state near a steady state. This can be only temporary if, for example, when the boundary conditions // are time dependent (growing velocity at inlet starting from 0). @@ -46,56 +33,74 @@ void IterativeSolver< Real, Index, SolverMonitor >::configSetup( Config::ConfigD config.addEntry< double >( prefix + "divergence-residue", "Divergence occurs when the residue exceeds given limit.", std::numeric_limits< float >::max() ); // TODO: setting refresh rate should be done in SolverStarter::setup (it's not a parameter of the IterativeSolver) config.addEntry< int > ( prefix + "refresh-rate", "Number of iterations between solver monitor refreshes.", 1 ); + + config.addEntry< String >( prefix + "residual-history-file", "Path to the file where the residual history will be saved.", "" ); } template< typename Real, typename Index, typename SolverMonitor > -bool IterativeSolver< Real, Index, SolverMonitor >::setup( const Config::ParameterContainer& parameters, - const String& prefix ) +bool +IterativeSolver< Real, Index, SolverMonitor >:: +setup( const Config::ParameterContainer& parameters, + const String& prefix ) { - this->setMaxIterations( parameters.getParameter< int >( "max-iterations" ) ); - this->setMinIterations( parameters.getParameter< int >( "min-iterations" ) ); - this->setConvergenceResidue( parameters.getParameter< double >( "convergence-residue" ) ); - this->setDivergenceResidue( parameters.getParameter< double >( "divergence-residue" ) ); + this->setMaxIterations( parameters.getParameter< int >( prefix + "max-iterations" ) ); + this->setMinIterations( parameters.getParameter< int >( prefix + "min-iterations" ) ); + this->setConvergenceResidue( parameters.getParameter< double >( prefix + "convergence-residue" ) ); + this->setDivergenceResidue( parameters.getParameter< double >( prefix + "divergence-residue" ) ); // TODO: setting refresh rate should be done in SolverStarter::setup (it's not a parameter of the IterativeSolver) - this->setRefreshRate( parameters.getParameter< int >( "refresh-rate" ) ); + this->setRefreshRate( parameters.getParameter< int >( prefix + "refresh-rate" ) ); + this->residualHistoryFileName = parameters.getParameter< String >( prefix + "residual-history-file" ); + if( this->residualHistoryFileName ) + this->residualHistoryFile.open( this->residualHistoryFileName.getString() ); return true; } template< typename Real, typename Index, typename SolverMonitor > -void IterativeSolver< Real, Index, SolverMonitor >::setMaxIterations( const Index& maxIterations ) +void +IterativeSolver< Real, Index, SolverMonitor >:: +setMaxIterations( const Index& maxIterations ) { this->maxIterations = maxIterations; } template< typename Real, typename Index, typename SolverMonitor > -const Index& IterativeSolver< Real, Index, SolverMonitor >::getMaxIterations() const +const Index& +IterativeSolver< Real, Index, SolverMonitor >:: +getMaxIterations() const { return this->maxIterations; } template< typename Real, typename Index, typename SolverMonitor > -void IterativeSolver< Real, Index, SolverMonitor >::setMinIterations( const Index& minIterations ) +void +IterativeSolver< Real, Index, SolverMonitor >:: +setMinIterations( const Index& minIterations ) { this->minIterations = minIterations; } template< typename Real, typename Index, typename SolverMonitor > -const Index& IterativeSolver< Real, Index, SolverMonitor >::getMinIterations() const +const Index& +IterativeSolver< Real, Index, SolverMonitor >:: +getMinIterations() const { return this->minIterations; } template< typename Real, typename Index, typename SolverMonitor > -void IterativeSolver< Real, Index, SolverMonitor >::resetIterations() +void +IterativeSolver< Real, Index, SolverMonitor >:: +resetIterations() { this->currentIteration = 0; if( this->solverMonitor ) this->solverMonitor->setIterations( 0 ); - } template< typename Real, typename Index, typename SolverMonitor > -bool IterativeSolver< Real, Index, SolverMonitor >::nextIteration() +bool +IterativeSolver< Real, Index, SolverMonitor >:: +nextIteration() { // this->checkNextIteration() must be called before the iteration counter is incremented bool result = this->checkNextIteration(); @@ -108,7 +113,9 @@ bool IterativeSolver< Real, Index, SolverMonitor >::nextIteration() } template< typename Real, typename Index, typename SolverMonitor > -bool IterativeSolver< Real, Index, SolverMonitor >::checkNextIteration() +bool +IterativeSolver< Real, Index, SolverMonitor >:: +checkNextIteration() { this->refreshSolverMonitor(); @@ -116,7 +123,7 @@ bool IterativeSolver< Real, Index, SolverMonitor >::checkNextIteration() this->getIterations() > this->getMaxIterations() || ( this->getResidue() > this->getDivergenceResidue() && this->getIterations() >= this->getMinIterations() ) || ( this->getResidue() < this->getConvergenceResidue() && this->getIterations() >= this->getMinIterations() ) ) - return false; + return false; return true; } @@ -158,59 +165,81 @@ getIterations() const } template< typename Real, typename Index, typename SolverMonitor > -void IterativeSolver< Real, Index, SolverMonitor >::setConvergenceResidue( const Real& convergenceResidue ) +void +IterativeSolver< Real, Index, SolverMonitor >:: +setConvergenceResidue( const Real& convergenceResidue ) { this->convergenceResidue = convergenceResidue; } template< typename Real, typename Index, typename SolverMonitor > -const Real& IterativeSolver< Real, Index, SolverMonitor >::getConvergenceResidue() const +const Real& +IterativeSolver< Real, Index, SolverMonitor >:: +getConvergenceResidue() const { return this->convergenceResidue; } template< typename Real, typename Index, typename SolverMonitor > -void IterativeSolver< Real, Index, SolverMonitor >::setDivergenceResidue( const Real& divergenceResidue ) +void +IterativeSolver< Real, Index, SolverMonitor >:: +setDivergenceResidue( const Real& divergenceResidue ) { this->divergenceResidue = divergenceResidue; } template< typename Real, typename Index, typename SolverMonitor > -const Real& IterativeSolver< Real, Index, SolverMonitor >::getDivergenceResidue() const +const Real& +IterativeSolver< Real, Index, SolverMonitor >:: +getDivergenceResidue() const { return this->divergenceResidue; } - template< typename Real, typename Index, typename SolverMonitor > -void IterativeSolver< Real, Index, SolverMonitor >::setResidue( const Real& residue ) +void +IterativeSolver< Real, Index, SolverMonitor >:: +setResidue( const Real& residue ) { this->currentResidue = residue; if( this->solverMonitor ) this->solverMonitor->setResidue( this->getResidue() ); + if( this->residualHistoryFile ) { + if( this->getIterations() == 0 ) + this->residualHistoryFile << "\n"; + this->residualHistoryFile << this->getIterations() << "\t" << std::scientific << residue << std::endl; + } } template< typename Real, typename Index, typename SolverMonitor > -const Real& IterativeSolver< Real, Index, SolverMonitor >::getResidue() const +const Real& +IterativeSolver< Real, Index, SolverMonitor >:: +getResidue() const { return this->currentResidue; } // TODO: setting refresh rate should be done in SolverStarter::setup (it's not a parameter of the IterativeSolver) template< typename Real, typename Index, typename SolverMonitor > -void IterativeSolver< Real, Index, SolverMonitor >::setRefreshRate( const Index& refreshRate ) +void +IterativeSolver< Real, Index, SolverMonitor >:: +setRefreshRate( const Index& refreshRate ) { this->refreshRate = refreshRate; } template< typename Real, typename Index, typename SolverMonitor > -void IterativeSolver< Real, Index, SolverMonitor >::setSolverMonitor( SolverMonitorType& solverMonitor ) +void +IterativeSolver< Real, Index, SolverMonitor >:: +setSolverMonitor( SolverMonitorType& solverMonitor ) { this->solverMonitor = &solverMonitor; } template< typename Real, typename Index, typename SolverMonitor > -void IterativeSolver< Real, Index, SolverMonitor >::refreshSolverMonitor( bool force ) +void +IterativeSolver< Real, Index, SolverMonitor >:: +refreshSolverMonitor( bool force ) { if( this->solverMonitor ) { diff --git a/src/TNL/Solvers/Linear/GMRES.h b/src/TNL/Solvers/Linear/GMRES.h index f1a4b87328a630411c634bed6744494f76afea02..e1c02f0ab5eadbaea7ba2c2678641b0c1a05ee6b 100644 --- a/src/TNL/Solvers/Linear/GMRES.h +++ b/src/TNL/Solvers/Linear/GMRES.h @@ -53,7 +53,16 @@ protected: using HostView = typename DeviceView::template Self< RealType, Devices::Host >; using HostVector = typename DeviceVector::template Self< RealType, Devices::Host >;; - enum class Variant { MGS, MGSR, CWY }; + enum class Variant { CGS, CGSR, MGS, MGSR, CWY }; + +// nvcc allows __cuda_callable__ lambdas only in public methods +#ifdef __NVCC__ +public: +#endif + int orthogonalize_CGS( const int m, const RealType normb, const RealType beta ); +#ifdef __NVCC__ +protected: +#endif int orthogonalize_MGS( const int m, const RealType normb, const RealType beta ); diff --git a/src/TNL/Solvers/Linear/GMRES_impl.h b/src/TNL/Solvers/Linear/GMRES_impl.h index d6cb8fdd095120c35cbf303a734ed7c5667bb79d..8e653e28b2d783e4a342f899eb794554d11c131a 100644 --- a/src/TNL/Solvers/Linear/GMRES_impl.h +++ b/src/TNL/Solvers/Linear/GMRES_impl.h @@ -31,6 +31,8 @@ configSetup( Config::ConfigDescription& config, const String& prefix ) { config.addEntry< String >( prefix + "gmres-variant", "Minimal number of iterations after which the GMRES restarts.", "CWY" ); + config.addEntryEnum( "CGS" ); + config.addEntryEnum( "CGSR" ); config.addEntryEnum( "MGS" ); config.addEntryEnum( "MGSR" ); config.addEntryEnum( "CWY" ); @@ -47,7 +49,11 @@ setup( const Config::ParameterContainer& parameters, const String& prefix ) { const String var = parameters.getParameter< String >( prefix + "gmres-variant" ); - if( var == "MGS" ) + if( var == "CGS" ) + variant = Variant::CGS; + else if( var == "CGSR" ) + variant = Variant::CGSR; + else if( var == "MGS" ) variant = Variant::MGS; else if( var == "MGSR" ) variant = Variant::MGSR; @@ -133,6 +139,10 @@ solve( ConstVectorViewType b, VectorViewType x ) // orthogonalization int o_steps = 0; switch( variant ) { + case Variant::CGS: + case Variant::CGSR: + o_steps = orthogonalize_CGS( m, normb, beta ); + break; case Variant::MGS: case Variant::MGSR: o_steps = orthogonalize_MGS( m, normb, beta ); @@ -167,6 +177,102 @@ solve( ConstVectorViewType b, VectorViewType x ) return this->checkConvergence(); } +template< typename Matrix > +int +GMRES< Matrix >:: +orthogonalize_CGS( const int m, const RealType normb, const RealType beta ) +{ + // initial binding to _M_tmp sets the correct local range, global size and + // communication group for distributed views + VectorViewType v_i( _M_tmp.getView() ); + VectorViewType v_k( _M_tmp.getView() ); + + /*** + * v_0 = r / | r | = 1.0 / beta * r + */ + v_i.bind( V.getData(), size ); + v_i = (1.0 / beta) * r; + + H.setValue( 0.0 ); + s.setValue( 0.0 ); + s[ 0 ] = beta; + + /**** + * Starting m-loop + */ + for( int i = 0; i < m && this->nextIteration(); i++ ) { + v_i.bind( &V.getData()[ i * ldSize ], size ); + /**** + * Solve w from M w = A v_i + */ + preconditioned_matvec( w, v_i ); + + for( int k = 0; k <= i; k++ ) + H[ k + i * (m + 1) ] = 0.0; + const int reorthogonalize = (variant == Variant::CGSR) ? 2 : 1; + for( int l = 0; l < reorthogonalize; l++ ) { + // auxiliary array for the H coefficients of the current l-loop + RealType H_l[i + 1]; + + // CGS part 1: compute projection coefficients +// for( int k = 0; k <= i; k++ ) { +// v_k.bind( &V.getData()[ k * ldSize ], size ); +// H_l[k] = (w, v_k); +// H[ k + i * (m + 1) ] += H_l[k]; +// } + // H_l = V_i^T * w + const RealType* _V = V.getData(); + const RealType* _w = Traits::getConstLocalView( w ).getData(); + const IndexType ldSize = this->ldSize; + auto fetch = [_V, _w, ldSize] __cuda_callable__ ( IndexType idx, int k ) { return _V[ idx + k * ldSize ] * _w[ idx ]; }; + Algorithms::Multireduction< DeviceType >::reduce + ( (RealType) 0, + fetch, + std::plus<>{}, + size, + i + 1, + H_l ); + for( int k = 0; k <= i; k++ ) + H[ k + i * (m + 1) ] += H_l[k]; + + // CGS part 2: subtract the projections +// for( int k = 0; k <= i; k++ ) { +// v_k.bind( &V.getData()[ k * ldSize ], size ); +// w = w - H_l[k] * v_k; +// } + // w := w - V_i * H_l + Matrices::MatrixOperations< DeviceType >:: + gemv( size, i + 1, + -1.0, V.getData(), ldSize, H_l, + 1.0, Traits::getLocalView( w ).getData() ); + } + /*** + * H_{i+1,i} = |w| + */ + RealType normw = lpNorm( w, 2.0 ); + H[ i + 1 + i * (m + 1) ] = normw; + + /*** + * v_{i+1} = w / |w| + */ + v_i.bind( &V.getData()[ ( i + 1 ) * ldSize ], size ); + v_i = (1.0 / normw) * w; + + /**** + * Applying the Givens rotations G_0, ..., G_i + */ + apply_givens_rotations( i, m ); + + this->setResidue( std::fabs( s[ i + 1 ] ) / normb ); + if( ! this->checkNextIteration() ) + return i; + else + this->refreshSolverMonitor(); + } + + return m; +} + template< typename Matrix > int GMRES< Matrix >:: @@ -198,13 +304,13 @@ orthogonalize_MGS( const int m, const RealType normb, const RealType beta ) preconditioned_matvec( w, v_i ); for( int k = 0; k <= i; k++ ) - H[ k + i * ( m + 1 ) ] = 0.0; + H[ k + i * (m + 1) ] = 0.0; const int reorthogonalize = (variant == Variant::MGSR) ? 2 : 1; for( int l = 0; l < reorthogonalize; l++ ) for( int k = 0; k <= i; k++ ) { v_k.bind( &V.getData()[ k * ldSize ], size ); /*** - * H_{k,i} = ( w, v_k ) + * H_{k,i} = (w, v_k) */ RealType H_k_i = (w, v_k); H[ k + i * (m + 1) ] += H_k_i; diff --git a/src/TNL/Solvers/ODE/ExplicitSolver_impl.h b/src/TNL/Solvers/ODE/ExplicitSolver_impl.h index 46e69381c82610d6d69854342a6e90caf8654ad9..eef1a30accbac59433876da2ddfe03aa06c8470c 100644 --- a/src/TNL/Solvers/ODE/ExplicitSolver_impl.h +++ b/src/TNL/Solvers/ODE/ExplicitSolver_impl.h @@ -10,6 +10,8 @@ #pragma once +#include + namespace TNL { namespace Solvers { namespace ODE {