diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 028a646245937f527c16b4e51df4b885bceb898f..fc5b04679a89e0b92e6298497e48387351329a6e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -5,14 +5,12 @@ before_script: - which cmake - which ninja -# Stages are useful only to enforce some ordering of the jobs. Every job is run -# in its own directory and only very few data can be shared between the jobs in -# different stages. It has to be zipped and uploaded to the server, so we can't -# do it with the build directory. Hence, we must build, test and install in the -# same job. stages: - build - - doc_build + - build:cuda + - build:gcc + - build:clang + - build:doc - deploy # default flags for cmake @@ -32,9 +30,45 @@ stages: WITH_TOOLS: "no" WITH_PYTHON: "no" +# base for OpenMP+MPI builds +.openmp_mpi: + tags: + - openmp + - mpi + variables: + WITH_OPENMP: "yes" + WITH_MPI: "yes" + +.cuda_openmp_mpi: + extends: .openmp_mpi + # tags are overridden, not merged... + tags: + - gpu + - openmp + - mpi + +# base for Clang builds +.clang: + stage: build:clang + variables: + CXX: clang++ + CC: clang + CUDA_HOST_COMPILER: clang++ + tags: + - clang + +.clang_mpi: + extends: .clang + # tags are overridden, not merged... + tags: + - clang + - mpi + # template for build jobs -.build_template_def: &build_template - stage: build +.build_template: + stage: build:gcc + # don't wait for jobs in previous stages to complete before starting this job + needs: [] script: # all cores including hyperthreading # - export NUM_CORES=$(grep "core id" /proc/cpuinfo | wc -l) @@ -82,6 +116,7 @@ stages: - Documentation/Tutorials/**/*.{h,hpp,cpp,cu} - "**/CMakeLists.txt" - .gitlab-ci.yml + interruptible: true # Dummy build job to ensure that a pipeline is created for a merge request, even # when there were no changes. @@ -104,63 +139,58 @@ dummy build job: # significantly more time than debug builds). cuda_tests_Debug: - <<: *build_template + extends: .build_template + stage: build:cuda tags: - - openmp - gpu variables: <<: *default_cmake_flags - WITH_OPENMP: "yes" WITH_CUDA: "yes" BUILD_TYPE: Debug WITH_TESTS: "yes" cuda_tests_Release: - <<: *build_template + extends: .build_template + stage: build:cuda tags: - - openmp - gpu variables: <<: *default_cmake_flags - WITH_OPENMP: "yes" WITH_CUDA: "yes" BUILD_TYPE: Release WITH_TESTS: "yes" cuda_matrix_tests_Debug: - <<: *build_template + extends: .build_template + stage: build:cuda tags: - - openmp - gpu variables: <<: *default_cmake_flags - WITH_OPENMP: "yes" WITH_CUDA: "yes" BUILD_TYPE: Debug WITH_MATRIX_TESTS: "yes" cuda_matrix_tests_Release: - <<: *build_template + extends: .build_template + stage: build:cuda tags: - - openmp - gpu variables: <<: *default_cmake_flags - WITH_OPENMP: "yes" WITH_CUDA: "yes" BUILD_TYPE: Release WITH_MATRIX_TESTS: "yes" cuda_examples_Debug: - <<: *build_template + extends: .build_template + stage: build:cuda tags: - - openmp - gpu variables: <<: *default_cmake_flags - WITH_OPENMP: "yes" WITH_CUDA: "yes" BUILD_TYPE: Debug WITH_EXAMPLES: "yes" @@ -172,26 +202,24 @@ cuda_examples_Debug: - Documentation/output_snippets/ cuda_examples_Release: - <<: *build_template + extends: .build_template + stage: build:cuda tags: - - openmp - gpu variables: <<: *default_cmake_flags - WITH_OPENMP: "yes" WITH_CUDA: "yes" BUILD_TYPE: Release WITH_EXAMPLES: "yes" cuda_benchmarks_tools_python_Debug: - <<: *build_template + extends: .build_template + stage: build:cuda tags: - - openmp - gpu variables: <<: *default_cmake_flags - WITH_OPENMP: "yes" WITH_CUDA: "yes" BUILD_TYPE: Debug WITH_BENCHMARKS: "yes" @@ -199,13 +227,12 @@ cuda_benchmarks_tools_python_Debug: WITH_PYTHON: "yes" cuda_benchmarks_tools_python_Release: - <<: *build_template + extends: .build_template + stage: build:cuda tags: - - openmp - gpu variables: <<: *default_cmake_flags - WITH_OPENMP: "yes" WITH_CUDA: "yes" BUILD_TYPE: Release WITH_BENCHMARKS: "yes" @@ -214,172 +241,93 @@ cuda_benchmarks_tools_python_Release: cuda_mpi_tests_Debug: - <<: *build_template - tags: - - openmp - - gpu - - mpi - variables: - <<: *default_cmake_flags - WITH_OPENMP: "yes" - WITH_CUDA: "yes" - WITH_MPI: "yes" - BUILD_TYPE: Debug - WITH_TESTS: "yes" + extends: + - cuda_tests_Debug + - .cuda_openmp_mpi cuda_mpi_tests_Release: - <<: *build_template - tags: - - openmp - - gpu - - mpi - variables: - <<: *default_cmake_flags - WITH_OPENMP: "yes" - WITH_CUDA: "yes" - WITH_MPI: "yes" - BUILD_TYPE: Release - WITH_TESTS: "yes" - + extends: + - cuda_tests_Release + - .cuda_openmp_mpi cuda_mpi_matrix_tests_Debug: - <<: *build_template - tags: - - openmp - - gpu - - mpi - variables: - <<: *default_cmake_flags - WITH_OPENMP: "yes" - WITH_CUDA: "yes" - WITH_MPI: "yes" - BUILD_TYPE: Debug - WITH_MATRIX_TESTS: "yes" + extends: + - cuda_matrix_tests_Debug + - .cuda_openmp_mpi cuda_mpi_matrix_tests_Release: - <<: *build_template - tags: - - openmp - - gpu - - mpi - variables: - <<: *default_cmake_flags - WITH_OPENMP: "yes" - WITH_CUDA: "yes" - WITH_MPI: "yes" - BUILD_TYPE: Release - WITH_MATRIX_TESTS: "yes" - + extends: + - cuda_matrix_tests_Release + - .cuda_openmp_mpi cuda_mpi_examples_Debug: - <<: *build_template - tags: - - openmp - - gpu - - mpi - variables: - <<: *default_cmake_flags - WITH_OPENMP: "yes" - WITH_CUDA: "yes" - WITH_MPI: "yes" - BUILD_TYPE: Debug - WITH_EXAMPLES: "yes" + extends: + - cuda_examples_Debug + - .cuda_openmp_mpi cuda_mpi_examples_Release: - <<: *build_template - tags: - - openmp - - gpu - - mpi - variables: - <<: *default_cmake_flags - WITH_OPENMP: "yes" - WITH_CUDA: "yes" - WITH_MPI: "yes" - BUILD_TYPE: Release - WITH_EXAMPLES: "yes" - + extends: + - cuda_examples_Release + - .cuda_openmp_mpi cuda_mpi_benchmarks_tools_python_Debug: - <<: *build_template - tags: - - openmp - - gpu - - mpi - variables: - <<: *default_cmake_flags - WITH_OPENMP: "yes" - WITH_CUDA: "yes" - WITH_MPI: "yes" - BUILD_TYPE: Debug - WITH_BENCHMARKS: "yes" - WITH_TOOLS: "yes" - WITH_PYTHON: "yes" + extends: + - cuda_benchmarks_tools_python_Debug + - .cuda_openmp_mpi cuda_mpi_benchmarks_tools_python_Release: - <<: *build_template - tags: - - openmp - - gpu - - mpi - variables: - <<: *default_cmake_flags - WITH_OPENMP: "yes" - WITH_CUDA: "yes" - WITH_MPI: "yes" - BUILD_TYPE: Release - WITH_BENCHMARKS: "yes" - WITH_TOOLS: "yes" - WITH_PYTHON: "yes" + extends: + - cuda_benchmarks_tools_python_Release + - .cuda_openmp_mpi default_tests_Debug: - <<: *build_template + extends: .build_template variables: <<: *default_cmake_flags BUILD_TYPE: Debug WITH_TESTS: "yes" default_tests_Release: - <<: *build_template + extends: .build_template variables: <<: *default_cmake_flags BUILD_TYPE: Release WITH_TESTS: "yes" default_matrix_tests_Debug: - <<: *build_template + extends: .build_template variables: <<: *default_cmake_flags BUILD_TYPE: Debug WITH_MATRIX_TESTS: "yes" default_matrix_tests_Release: - <<: *build_template + extends: .build_template variables: <<: *default_cmake_flags BUILD_TYPE: Release WITH_MATRIX_TESTS: "yes" default_examples_Debug: - <<: *build_template + extends: .build_template variables: <<: *default_cmake_flags BUILD_TYPE: Debug WITH_EXAMPLES: "yes" default_examples_Release: - <<: *build_template + extends: .build_template variables: <<: *default_cmake_flags BUILD_TYPE: Release WITH_EXAMPLES: "yes" default_benchmarks_tools_python_Debug: - <<: *build_template + extends: .build_template variables: <<: *default_cmake_flags BUILD_TYPE: Debug @@ -388,7 +336,7 @@ default_benchmarks_tools_python_Debug: WITH_PYTHON: "yes" default_benchmarks_tools_python_Release: - <<: *build_template + extends: .build_template variables: <<: *default_cmake_flags BUILD_TYPE: Release @@ -398,118 +346,142 @@ default_benchmarks_tools_python_Release: mpi_tests_Debug: - <<: *build_template - tags: - - openmp - - mpi - variables: - <<: *default_cmake_flags - WITH_OPENMP: "yes" - WITH_MPI: "yes" - BUILD_TYPE: Debug - WITH_TESTS: "yes" + extends: + - default_tests_Debug + - .openmp_mpi mpi_tests_Release: - <<: *build_template - tags: - - openmp - - mpi - variables: - <<: *default_cmake_flags - WITH_OPENMP: "yes" - WITH_MPI: "yes" - BUILD_TYPE: Release - WITH_TESTS: "yes" + extends: + - default_tests_Release + - .openmp_mpi mpi_matrix_tests_Debug: - <<: *build_template - tags: - - openmp - - mpi - variables: - <<: *default_cmake_flags - WITH_OPENMP: "yes" - WITH_MPI: "yes" - BUILD_TYPE: Debug - WITH_MATRIX_TESTS: "yes" + extends: + - default_matrix_tests_Debug + - .openmp_mpi mpi_matrix_tests_Release: - <<: *build_template - tags: - - openmp - - mpi - variables: - <<: *default_cmake_flags - WITH_OPENMP: "yes" - WITH_MPI: "yes" - BUILD_TYPE: Release - WITH_MATRIX_TESTS: "yes" + extends: + - default_matrix_tests_Release + - .openmp_mpi mpi_examples_Debug: - <<: *build_template - tags: - - openmp - - mpi - variables: - <<: *default_cmake_flags - WITH_OPENMP: "yes" - WITH_MPI: "yes" - BUILD_TYPE: Debug - WITH_EXAMPLES: "yes" + extends: + - default_examples_Debug + - .openmp_mpi mpi_examples_Release: - <<: *build_template - tags: - - openmp - - mpi - variables: - <<: *default_cmake_flags - WITH_OPENMP: "yes" - WITH_MPI: "yes" - BUILD_TYPE: Release - WITH_EXAMPLES: "yes" + extends: + - default_examples_Release + - .openmp_mpi mpi_benchmarks_tools_python_Debug: - <<: *build_template - tags: - - openmp - - mpi - variables: - <<: *default_cmake_flags - WITH_OPENMP: "yes" - WITH_MPI: "yes" - BUILD_TYPE: Debug - WITH_BENCHMARKS: "yes" - WITH_TOOLS: "yes" - WITH_PYTHON: "yes" + extends: + - default_benchmarks_tools_python_Debug + - .openmp_mpi mpi_benchmarks_tools_python_Release: - <<: *build_template - tags: - - openmp - - mpi - variables: - <<: *default_cmake_flags - WITH_OPENMP: "yes" - WITH_MPI: "yes" - BUILD_TYPE: Release - WITH_BENCHMARKS: "yes" - WITH_TOOLS: "yes" - WITH_PYTHON: "yes" + extends: + - default_benchmarks_tools_python_Release + - .openmp_mpi + + +clang_tests_Debug: + extends: + - default_tests_Debug + - .clang + +clang_tests_Release: + extends: + - default_tests_Release + - .clang + +clang_matrix_tests_Debug: + extends: + - default_matrix_tests_Debug + - .clang + +clang_matrix_tests_Release: + extends: + - default_matrix_tests_Release + - .clang + +clang_examples_Debug: + extends: + - default_examples_Debug + - .clang + +clang_examples_Release: + extends: + - default_examples_Release + - .clang + +clang_benchmarks_tools_python_Debug: + extends: + - default_benchmarks_tools_python_Debug + - .clang + +clang_benchmarks_tools_python_Release: + extends: + - default_benchmarks_tools_python_Release + - .clang + + +clang_mpi_tests_Debug: + extends: + - mpi_tests_Debug + - .clang_mpi + +clang_mpi_tests_Release: + extends: + - mpi_tests_Release + - .clang_mpi + +clang_mpi_matrix_tests_Debug: + extends: + - mpi_matrix_tests_Debug + - .clang_mpi + +clang_mpi_matrix_tests_Release: + extends: + - mpi_matrix_tests_Release + - .clang_mpi + +clang_mpi_examples_Debug: + extends: + - mpi_examples_Debug + - .clang_mpi + +clang_mpi_examples_Release: + extends: + - mpi_examples_Release + - .clang_mpi + +clang_mpi_benchmarks_tools_python_Debug: + extends: + - mpi_benchmarks_tools_python_Debug + - .clang_mpi + +clang_mpi_benchmarks_tools_python_Release: + extends: + - mpi_benchmarks_tools_python_Release + - .clang_mpi build documentation: - stage: doc_build + stage: build:doc only: changes: - Documentation/**/* - src/TNL/**/*.{h,hpp} - .gitlab-ci.yml - dependencies: + # use "needs" instead of "dependencies" to allow out-of-order start of this job + needs: # the job which builds Documentation/output_snippets/ - - cuda_examples_Debug + - job: cuda_examples_Debug + artifacts: true script: - ./Documentation/build artifacts: @@ -531,6 +503,7 @@ deploy documentation: - develop - schedules - triggers + # use "dependencies" instead of "needs" to deploy only when the entire pipeline succeeds dependencies: - build documentation script: diff --git a/src/Benchmarks/BLAS/vector-operations.h b/src/Benchmarks/BLAS/vector-operations.h index 7254ba9f4075c81f7100e4c6c86bd16c3b9077a7..5531b360d913b70a24eeead5bbef2c280c044904 100644 --- a/src/Benchmarks/BLAS/vector-operations.h +++ b/src/Benchmarks/BLAS/vector-operations.h @@ -56,7 +56,9 @@ benchmarkVectorOperations( Benchmark & benchmark, #endif HostView hostView( hostVector ), hostView2( hostVector2 ), hostView3( hostVector3 ), hostView4( hostVector4 ); +#ifdef HAVE_CUDA CudaView deviceView( deviceVector ), deviceView2( deviceVector2 ), deviceView3( deviceVector3 ), deviceView4( deviceVector4 ); +#endif Real resultHost, resultDevice; diff --git a/src/Benchmarks/SpMV/spmv-legacy.h b/src/Benchmarks/SpMV/spmv-legacy.h index 91db24d0187ea588d111d49beb5370c3e27fe24b..617f344791df97243ff968f2fba16ba0043248c5 100644 --- a/src/Benchmarks/SpMV/spmv-legacy.h +++ b/src/Benchmarks/SpMV/spmv-legacy.h @@ -243,7 +243,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, auto resetHostVectors = [&]() { hostInVector = 1.0; - hostOutVector == 0.0; + hostOutVector = 0.0; }; auto spmvCSRHost = [&]() { @@ -279,7 +279,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, auto resetCusparseVectors = [&]() { cusparseInVector = 1.0; - cusparseOutVector == 0.0; + cusparseOutVector = 0.0; }; auto spmvCusparse = [&]() { diff --git a/src/Python/pytnl/tnl/CMakeLists.txt b/src/Python/pytnl/tnl/CMakeLists.txt index de405e5e50c0549e2cf846b06a82d1acb07d9414..ca0c10af02735bbc2947fb1a126a08a7364bc7a3 100644 --- a/src/Python/pytnl/tnl/CMakeLists.txt +++ b/src/Python/pytnl/tnl/CMakeLists.txt @@ -33,4 +33,12 @@ endif() # per-target, so we need to undefine it by passing -U NDEBUG. target_compile_options( pytnl PRIVATE -U NDEBUG -D TNL_THROW_ASSERTION_ERROR ) +# disable errors due to -Wunused-value coming from pybind11 +if( ${WITH_CI_FLAGS} ) + if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + target_compile_options( pytnl PRIVATE -Wno-error=unused-value ) + endif() +endif() + + install( TARGETS pytnl DESTINATION ${PYTHON_SITE_PACKAGES_DIR} ) diff --git a/src/TNL/Algorithms/Scan.hpp b/src/TNL/Algorithms/Scan.hpp index fc1f2f1e5449a12c56b525c92854705e7bd003e6..74351077ebf85fa5c222c639f5503214cdb1844a 100644 --- a/src/TNL/Algorithms/Scan.hpp +++ b/src/TNL/Algorithms/Scan.hpp @@ -108,8 +108,12 @@ perform( Vector& v, const typename Vector::RealType zero ) { #ifdef HAVE_OPENMP - const auto blockShifts = performFirstPhase( v, begin, end, reduction, zero ); - performSecondPhase( v, blockShifts, begin, end, reduction, zero ); + if( Devices::Host::isOMPEnabled() && Devices::Host::getMaxThreadsCount() >= 2 ) { + const auto blockShifts = performFirstPhase( v, begin, end, reduction, zero ); + performSecondPhase( v, blockShifts, begin, end, reduction, zero ); + } + else + Scan< Devices::Sequential, Type >::perform( v, begin, end, reduction, zero ); #else Scan< Devices::Sequential, Type >::perform( v, begin, end, reduction, zero ); #endif diff --git a/src/TNL/Matrices/MatrixType.h b/src/TNL/Matrices/MatrixType.h index ad1faaa8ba8e665fcb81b6b37ecaa594b4df2608..5eececf51032f33bd01a53e01ad0fc26bb7e8cd0 100644 --- a/src/TNL/Matrices/MatrixType.h +++ b/src/TNL/Matrices/MatrixType.h @@ -32,8 +32,8 @@ struct MatrixType type = "General"; else { - if( isSymmetric ) type = "Symmetric"; - if( isBinary ) type += "Binary"; + if( isSymmetric() ) type = "Symmetric"; + if( isBinary() ) type += "Binary"; } return type; } diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 268af8a332dafb066d6061021b193c06dbf5ddeb..9b6bf9fd13d492db330ec0cea3763a3a859d9b94 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -322,7 +322,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > * \par Output * \include SparseMatrixExample_getSerializationType.out */ - virtual String getSerializationTypeVirtual() const; + virtual String getSerializationTypeVirtual() const override; /** * \brief Set number of rows and columns of this matrix. @@ -855,21 +855,21 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > * * \param fileName is name of the file. */ - void save( File& file ) const; + virtual void save( File& file ) const override; /** * \brief Method for loading the matrix from a file. * * \param fileName is name of the file. */ - void load( File& file ); + virtual void load( File& file ) override; /** * \brief Method for printing the matrix to output stream. * * \param str is the output stream. */ - void print( std::ostream& str ) const; + virtual void print( std::ostream& str ) const override; /** * \brief Returns a padding index value. diff --git a/src/TNL/Solvers/Linear/GMRES_impl.h b/src/TNL/Solvers/Linear/GMRES_impl.h index 2e1d9fbb25a8dfbb1d05b64c186dd855a479924b..02a122a5dd178cb7100edd52210004dccddf2626 100644 --- a/src/TNL/Solvers/Linear/GMRES_impl.h +++ b/src/TNL/Solvers/Linear/GMRES_impl.h @@ -185,7 +185,7 @@ orthogonalize_CGS( const int m, const RealType normb, const RealType beta ) // initial binding to _M_tmp sets the correct local range, global size and // communication group for distributed views VectorViewType v_i( _M_tmp.getView() ); - VectorViewType v_k( _M_tmp.getView() ); +// VectorViewType v_k( _M_tmp.getView() ); /*** * v_0 = r / | r | = 1.0 / beta * r