diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 028a646245937f527c16b4e51df4b885bceb898f..fc5b04679a89e0b92e6298497e48387351329a6e 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -5,14 +5,12 @@ before_script:
     - which cmake
     - which ninja
 
-# Stages are useful only to enforce some ordering of the jobs. Every job is run
-# in its own directory and only very few data can be shared between the jobs in
-# different stages. It has to be zipped and uploaded to the server, so we can't
-# do it with the build directory. Hence, we must build, test and install in the
-# same job.
 stages:
     - build
-    - doc_build
+    - build:cuda
+    - build:gcc
+    - build:clang
+    - build:doc
     - deploy
 
 # default flags for cmake
@@ -32,9 +30,45 @@ stages:
     WITH_TOOLS: "no"
     WITH_PYTHON: "no"
 
+# base for OpenMP+MPI builds
+.openmp_mpi:
+    tags:
+        - openmp
+        - mpi
+    variables:
+        WITH_OPENMP: "yes"
+        WITH_MPI: "yes"
+
+.cuda_openmp_mpi:
+    extends: .openmp_mpi
+    # tags are overridden, not merged...
+    tags:
+        - gpu
+        - openmp
+        - mpi
+
+# base for Clang builds
+.clang:
+    stage: build:clang
+    variables:
+        CXX: clang++
+        CC: clang
+        CUDA_HOST_COMPILER: clang++
+    tags:
+        - clang
+
+.clang_mpi:
+    extends: .clang
+    # tags are overridden, not merged...
+    tags:
+        - clang
+        - mpi
+
 # template for build jobs
-.build_template_def: &build_template
-    stage: build
+.build_template:
+    stage: build:gcc
+    # don't wait for jobs in previous stages to complete before starting this job
+    needs: []
     script:
         # all cores including hyperthreading
 #        - export NUM_CORES=$(grep "core id" /proc/cpuinfo | wc -l)
@@ -82,6 +116,7 @@ stages:
             - Documentation/Tutorials/**/*.{h,hpp,cpp,cu}
             - "**/CMakeLists.txt"
             - .gitlab-ci.yml
+    interruptible: true
 
 # Dummy build job to ensure that a pipeline is created for a merge request, even
 # when there were no changes.
@@ -104,63 +139,58 @@ dummy build job:
 # significantly more time than debug builds).
 
 cuda_tests_Debug:
-    <<: *build_template
+    extends: .build_template
+    stage: build:cuda
     tags:
-        - openmp
         - gpu
     variables:
         <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
         WITH_CUDA: "yes"
         BUILD_TYPE: Debug
         WITH_TESTS: "yes"
 
 cuda_tests_Release:
-    <<: *build_template
+    extends: .build_template
+    stage: build:cuda
     tags:
-        - openmp
         - gpu
     variables:
         <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
         WITH_CUDA: "yes"
         BUILD_TYPE: Release
         WITH_TESTS: "yes"
 
 
 cuda_matrix_tests_Debug:
-    <<: *build_template
+    extends: .build_template
+    stage: build:cuda
     tags:
-        - openmp
         - gpu
     variables:
         <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
         WITH_CUDA: "yes"
         BUILD_TYPE: Debug
         WITH_MATRIX_TESTS: "yes"
 
 cuda_matrix_tests_Release:
-    <<: *build_template
+    extends: .build_template
+    stage: build:cuda
     tags:
-        - openmp
         - gpu
     variables:
         <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
         WITH_CUDA: "yes"
         BUILD_TYPE: Release
         WITH_MATRIX_TESTS: "yes"
 
 
 cuda_examples_Debug:
-    <<: *build_template
+    extends: .build_template
+    stage: build:cuda
     tags:
-        - openmp
         - gpu
     variables:
         <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
         WITH_CUDA: "yes"
         BUILD_TYPE: Debug
         WITH_EXAMPLES: "yes"
@@ -172,26 +202,24 @@ cuda_examples_Debug:
             - Documentation/output_snippets/
 
 cuda_examples_Release:
-    <<: *build_template
+    extends: .build_template
+    stage: build:cuda
     tags:
-        - openmp
         - gpu
     variables:
         <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
         WITH_CUDA: "yes"
         BUILD_TYPE: Release
         WITH_EXAMPLES: "yes"
 
 
 cuda_benchmarks_tools_python_Debug:
-    <<: *build_template
+    extends: .build_template
+    stage: build:cuda
     tags:
-        - openmp
         - gpu
     variables:
         <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
         WITH_CUDA: "yes"
         BUILD_TYPE: Debug
         WITH_BENCHMARKS: "yes"
@@ -199,13 +227,12 @@ cuda_benchmarks_tools_python_Debug:
         WITH_PYTHON: "yes"
 
 cuda_benchmarks_tools_python_Release:
-    <<: *build_template
+    extends: .build_template
+    stage: build:cuda
     tags:
-        - openmp
         - gpu
     variables:
         <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
         WITH_CUDA: "yes"
         BUILD_TYPE: Release
         WITH_BENCHMARKS: "yes"
@@ -214,172 +241,93 @@ cuda_benchmarks_tools_python_Release:
 
 
 cuda_mpi_tests_Debug:
-    <<: *build_template
-    tags:
-        - openmp
-        - gpu
-        - mpi
-    variables:
-        <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
-        WITH_CUDA: "yes"
-        WITH_MPI: "yes"
-        BUILD_TYPE: Debug
-        WITH_TESTS: "yes"
+    extends:
+        - cuda_tests_Debug
+        - .cuda_openmp_mpi
 
 cuda_mpi_tests_Release:
-    <<: *build_template
-    tags:
-        - openmp
-        - gpu
-        - mpi
-    variables:
-        <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
-        WITH_CUDA: "yes"
-        WITH_MPI: "yes"
-        BUILD_TYPE: Release
-        WITH_TESTS: "yes"
-
+    extends:
+        - cuda_tests_Release
+        - .cuda_openmp_mpi
 
 cuda_mpi_matrix_tests_Debug:
-    <<: *build_template
-    tags:
-        - openmp
-        - gpu
-        - mpi
-    variables:
-        <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
-        WITH_CUDA: "yes"
-        WITH_MPI: "yes"
-        BUILD_TYPE: Debug
-        WITH_MATRIX_TESTS: "yes"
+    extends:
+        - cuda_matrix_tests_Debug
+        - .cuda_openmp_mpi
 
 cuda_mpi_matrix_tests_Release:
-    <<: *build_template
-    tags:
-        - openmp
-        - gpu
-        - mpi
-    variables:
-        <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
-        WITH_CUDA: "yes"
-        WITH_MPI: "yes"
-        BUILD_TYPE: Release
-        WITH_MATRIX_TESTS: "yes"
-
+    extends:
+        - cuda_matrix_tests_Release
+        - .cuda_openmp_mpi
 
 cuda_mpi_examples_Debug:
-    <<: *build_template
-    tags:
-        - openmp
-        - gpu
-        - mpi
-    variables:
-        <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
-        WITH_CUDA: "yes"
-        WITH_MPI: "yes"
-        BUILD_TYPE: Debug
-        WITH_EXAMPLES: "yes"
+    extends:
+        - cuda_examples_Debug
+        - .cuda_openmp_mpi
 
 cuda_mpi_examples_Release:
-    <<: *build_template
-    tags:
-        - openmp
-        - gpu
-        - mpi
-    variables:
-        <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
-        WITH_CUDA: "yes"
-        WITH_MPI: "yes"
-        BUILD_TYPE: Release
-        WITH_EXAMPLES: "yes"
-
+    extends:
+        - cuda_examples_Release
+        - .cuda_openmp_mpi
 
 cuda_mpi_benchmarks_tools_python_Debug:
-    <<: *build_template
-    tags:
-        - openmp
-        - gpu
-        - mpi
-    variables:
-        <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
-        WITH_CUDA: "yes"
-        WITH_MPI: "yes"
-        BUILD_TYPE: Debug
-        WITH_BENCHMARKS: "yes"
-        WITH_TOOLS: "yes"
-        WITH_PYTHON: "yes"
+    extends:
+        - cuda_benchmarks_tools_python_Debug
+        - .cuda_openmp_mpi
 
 cuda_mpi_benchmarks_tools_python_Release:
-    <<: *build_template
-    tags:
-        - openmp
-        - gpu
-        - mpi
-    variables:
-        <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
-        WITH_CUDA: "yes"
-        WITH_MPI: "yes"
-        BUILD_TYPE: Release
-        WITH_BENCHMARKS: "yes"
-        WITH_TOOLS: "yes"
-        WITH_PYTHON: "yes"
+    extends:
+        - cuda_benchmarks_tools_python_Release
+        - .cuda_openmp_mpi
 
 
 
 
 
 default_tests_Debug:
-    <<: *build_template
+    extends: .build_template
     variables:
         <<: *default_cmake_flags
         BUILD_TYPE: Debug
         WITH_TESTS: "yes"
 
 default_tests_Release:
-    <<: *build_template
+    extends: .build_template
     variables:
         <<: *default_cmake_flags
         BUILD_TYPE: Release
         WITH_TESTS: "yes"
 
 default_matrix_tests_Debug:
-    <<: *build_template
+    extends: .build_template
     variables:
         <<: *default_cmake_flags
         BUILD_TYPE: Debug
         WITH_MATRIX_TESTS: "yes"
 
 default_matrix_tests_Release:
-    <<: *build_template
+    extends: .build_template
     variables:
         <<: *default_cmake_flags
         BUILD_TYPE: Release
         WITH_MATRIX_TESTS: "yes"
 
 default_examples_Debug:
-    <<: *build_template
+    extends: .build_template
     variables:
         <<: *default_cmake_flags
         BUILD_TYPE: Debug
         WITH_EXAMPLES: "yes"
 
 default_examples_Release:
-    <<: *build_template
+    extends: .build_template
     variables:
         <<: *default_cmake_flags
         BUILD_TYPE: Release
         WITH_EXAMPLES: "yes"
 
 default_benchmarks_tools_python_Debug:
-    <<: *build_template
+    extends: .build_template
     variables:
         <<: *default_cmake_flags
         BUILD_TYPE: Debug
@@ -388,7 +336,7 @@ default_benchmarks_tools_python_Debug:
         WITH_PYTHON: "yes"
 
 default_benchmarks_tools_python_Release:
-    <<: *build_template
+    extends: .build_template
     variables:
         <<: *default_cmake_flags
         BUILD_TYPE: Release
@@ -398,118 +346,142 @@ default_benchmarks_tools_python_Release:
 
 
 mpi_tests_Debug:
-    <<: *build_template
-    tags:
-        - openmp
-        - mpi
-    variables:
-        <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
-        WITH_MPI: "yes"
-        BUILD_TYPE: Debug
-        WITH_TESTS: "yes"
+    extends:
+        - default_tests_Debug
+        - .openmp_mpi
 
 mpi_tests_Release:
-    <<: *build_template
-    tags:
-        - openmp
-        - mpi
-    variables:
-        <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
-        WITH_MPI: "yes"
-        BUILD_TYPE: Release
-        WITH_TESTS: "yes"
+    extends:
+        - default_tests_Release
+        - .openmp_mpi
 
 mpi_matrix_tests_Debug:
-    <<: *build_template
-    tags:
-        - openmp
-        - mpi
-    variables:
-        <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
-        WITH_MPI: "yes"
-        BUILD_TYPE: Debug
-        WITH_MATRIX_TESTS: "yes"
+    extends:
+        - default_matrix_tests_Debug
+        - .openmp_mpi
 
 mpi_matrix_tests_Release:
-    <<: *build_template
-    tags:
-        - openmp
-        - mpi
-    variables:
-        <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
-        WITH_MPI: "yes"
-        BUILD_TYPE: Release
-        WITH_MATRIX_TESTS: "yes"
+    extends:
+        - default_matrix_tests_Release
+        - .openmp_mpi
 
 mpi_examples_Debug:
-    <<: *build_template
-    tags:
-        - openmp
-        - mpi
-    variables:
-        <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
-        WITH_MPI: "yes"
-        BUILD_TYPE: Debug
-        WITH_EXAMPLES: "yes"
+    extends:
+        - default_examples_Debug
+        - .openmp_mpi
 
 mpi_examples_Release:
-    <<: *build_template
-    tags:
-        - openmp
-        - mpi
-    variables:
-        <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
-        WITH_MPI: "yes"
-        BUILD_TYPE: Release
-        WITH_EXAMPLES: "yes"
+    extends:
+        - default_examples_Release
+        - .openmp_mpi
 
 mpi_benchmarks_tools_python_Debug:
-    <<: *build_template
-    tags:
-        - openmp
-        - mpi
-    variables:
-        <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
-        WITH_MPI: "yes"
-        BUILD_TYPE: Debug
-        WITH_BENCHMARKS: "yes"
-        WITH_TOOLS: "yes"
-        WITH_PYTHON: "yes"
+    extends:
+        - default_benchmarks_tools_python_Debug
+        - .openmp_mpi
 
 mpi_benchmarks_tools_python_Release:
-    <<: *build_template
-    tags:
-        - openmp
-        - mpi
-    variables:
-        <<: *default_cmake_flags
-        WITH_OPENMP: "yes"
-        WITH_MPI: "yes"
-        BUILD_TYPE: Release
-        WITH_BENCHMARKS: "yes"
-        WITH_TOOLS: "yes"
-        WITH_PYTHON: "yes"
+    extends:
+        - default_benchmarks_tools_python_Release
+        - .openmp_mpi
+
+
+clang_tests_Debug:
+    extends:
+        - default_tests_Debug
+        - .clang
+
+clang_tests_Release:
+    extends:
+        - default_tests_Release
+        - .clang
+
+clang_matrix_tests_Debug:
+    extends:
+        - default_matrix_tests_Debug
+        - .clang
+
+clang_matrix_tests_Release:
+    extends:
+        - default_matrix_tests_Release
+        - .clang
+
+clang_examples_Debug:
+    extends:
+        - default_examples_Debug
+        - .clang
+
+clang_examples_Release:
+    extends:
+        - default_examples_Release
+        - .clang
+
+clang_benchmarks_tools_python_Debug:
+    extends:
+        - default_benchmarks_tools_python_Debug
+        - .clang
+
+clang_benchmarks_tools_python_Release:
+    extends:
+        - default_benchmarks_tools_python_Release
+        - .clang
+
+
+clang_mpi_tests_Debug:
+    extends:
+        - mpi_tests_Debug
+        - .clang_mpi
+
+clang_mpi_tests_Release:
+    extends:
+        - mpi_tests_Release
+        - .clang_mpi
+
+clang_mpi_matrix_tests_Debug:
+    extends:
+        - mpi_matrix_tests_Debug
+        - .clang_mpi
+
+clang_mpi_matrix_tests_Release:
+    extends:
+        - mpi_matrix_tests_Release
+        - .clang_mpi
+
+clang_mpi_examples_Debug:
+    extends:
+        - mpi_examples_Debug
+        - .clang_mpi
+
+clang_mpi_examples_Release:
+    extends:
+        - mpi_examples_Release
+        - .clang_mpi
+
+clang_mpi_benchmarks_tools_python_Debug:
+    extends:
+        - mpi_benchmarks_tools_python_Debug
+        - .clang_mpi
+
+clang_mpi_benchmarks_tools_python_Release:
+    extends:
+        - mpi_benchmarks_tools_python_Release
+        - .clang_mpi
 
 
 
 
 build documentation:
-    stage: doc_build
+    stage: build:doc
     only:
         changes:
             - Documentation/**/*
             - src/TNL/**/*.{h,hpp}
             - .gitlab-ci.yml
-    dependencies:
+    # use "needs" instead of "dependencies" to allow out-of-order start of this job
+    needs:
         # the job which builds Documentation/output_snippets/
-        - cuda_examples_Debug
+        - job: cuda_examples_Debug
+          artifacts: true
     script:
         - ./Documentation/build
     artifacts:
@@ -531,6 +503,7 @@ deploy documentation:
             - develop
             - schedules
             - triggers
+    # use "dependencies" instead of "needs" to deploy only when the entire pipeline succeeds
     dependencies:
         - build documentation
     script:
diff --git a/src/Benchmarks/BLAS/vector-operations.h b/src/Benchmarks/BLAS/vector-operations.h
index 7254ba9f4075c81f7100e4c6c86bd16c3b9077a7..5531b360d913b70a24eeead5bbef2c280c044904 100644
--- a/src/Benchmarks/BLAS/vector-operations.h
+++ b/src/Benchmarks/BLAS/vector-operations.h
@@ -56,7 +56,9 @@ benchmarkVectorOperations( Benchmark & benchmark,
 #endif
 
    HostView hostView( hostVector ), hostView2( hostVector2 ), hostView3( hostVector3 ), hostView4( hostVector4 );
+#ifdef HAVE_CUDA
    CudaView deviceView( deviceVector ), deviceView2( deviceVector2 ), deviceView3( deviceVector3 ), deviceView4( deviceVector4 );
+#endif
 
    Real resultHost, resultDevice;
 
diff --git a/src/Benchmarks/SpMV/spmv-legacy.h b/src/Benchmarks/SpMV/spmv-legacy.h
index 91db24d0187ea588d111d49beb5370c3e27fe24b..617f344791df97243ff968f2fba16ba0043248c5 100644
--- a/src/Benchmarks/SpMV/spmv-legacy.h
+++ b/src/Benchmarks/SpMV/spmv-legacy.h
@@ -243,7 +243,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
 
    auto resetHostVectors = [&]() {
       hostInVector = 1.0;
-      hostOutVector == 0.0;
+      hostOutVector = 0.0;
    };
 
    auto spmvCSRHost = [&]() {
@@ -279,7 +279,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
 
    auto resetCusparseVectors = [&]() {
       cusparseInVector = 1.0;
-      cusparseOutVector == 0.0;
+      cusparseOutVector = 0.0;
    };
 
    auto spmvCusparse = [&]() {
diff --git a/src/Python/pytnl/tnl/CMakeLists.txt b/src/Python/pytnl/tnl/CMakeLists.txt
index de405e5e50c0549e2cf846b06a82d1acb07d9414..ca0c10af02735bbc2947fb1a126a08a7364bc7a3 100644
--- a/src/Python/pytnl/tnl/CMakeLists.txt
+++ b/src/Python/pytnl/tnl/CMakeLists.txt
@@ -33,4 +33,12 @@ endif()
 # per-target, so we need to undefine it by passing -U NDEBUG.
 target_compile_options( pytnl PRIVATE -U NDEBUG -D TNL_THROW_ASSERTION_ERROR )
 
+# disable errors due to -Wunused-value coming from pybind11
+if( ${WITH_CI_FLAGS} )
+   if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+      target_compile_options( pytnl PRIVATE -Wno-error=unused-value )
+   endif()
+endif()
+
+
 install( TARGETS pytnl DESTINATION ${PYTHON_SITE_PACKAGES_DIR} )
diff --git a/src/TNL/Algorithms/Scan.hpp b/src/TNL/Algorithms/Scan.hpp
index fc1f2f1e5449a12c56b525c92854705e7bd003e6..74351077ebf85fa5c222c639f5503214cdb1844a 100644
--- a/src/TNL/Algorithms/Scan.hpp
+++ b/src/TNL/Algorithms/Scan.hpp
@@ -108,8 +108,12 @@ perform( Vector& v,
          const typename Vector::RealType zero )
 {
 #ifdef HAVE_OPENMP
-   const auto blockShifts = performFirstPhase( v, begin, end, reduction, zero );
-   performSecondPhase( v, blockShifts, begin, end, reduction, zero );
+   if( Devices::Host::isOMPEnabled() && Devices::Host::getMaxThreadsCount() >= 2 ) {
+      const auto blockShifts = performFirstPhase( v, begin, end, reduction, zero );
+      performSecondPhase( v, blockShifts, begin, end, reduction, zero );
+   }
+   else
+      Scan< Devices::Sequential, Type >::perform( v, begin, end, reduction, zero );
 #else
    Scan< Devices::Sequential, Type >::perform( v, begin, end, reduction, zero );
 #endif
diff --git a/src/TNL/Matrices/MatrixType.h b/src/TNL/Matrices/MatrixType.h
index ad1faaa8ba8e665fcb81b6b37ecaa594b4df2608..5eececf51032f33bd01a53e01ad0fc26bb7e8cd0 100644
--- a/src/TNL/Matrices/MatrixType.h
+++ b/src/TNL/Matrices/MatrixType.h
@@ -32,8 +32,8 @@ struct MatrixType
          type = "General";
       else
       {
-         if( isSymmetric ) type = "Symmetric";
-         if( isBinary ) type += "Binary";
+         if( isSymmetric() ) type = "Symmetric";
+         if( isBinary() ) type += "Binary";
       }
       return type;
    }
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 268af8a332dafb066d6061021b193c06dbf5ddeb..9b6bf9fd13d492db330ec0cea3763a3a859d9b94 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -322,7 +322,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \par Output
        * \include SparseMatrixExample_getSerializationType.out
        */
-      virtual String getSerializationTypeVirtual() const;
+      virtual String getSerializationTypeVirtual() const override;
 
       /**
        * \brief Set number of rows and columns of this matrix.
@@ -855,21 +855,21 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * 
        * \param fileName is name of the file.
        */
-      void save( File& file ) const;
+      virtual void save( File& file ) const override;
 
       /**
        * \brief Method for loading the matrix from a file.
        * 
        * \param fileName is name of the file.
        */
-      void load( File& file );
+      virtual void load( File& file ) override;
 
       /**
        * \brief Method for printing the matrix to output stream.
        * 
        * \param str is the output stream.
        */
-      void print( std::ostream& str ) const;
+      virtual void print( std::ostream& str ) const override;
 
       /**
        * \brief Returns a padding index value.
diff --git a/src/TNL/Solvers/Linear/GMRES_impl.h b/src/TNL/Solvers/Linear/GMRES_impl.h
index 2e1d9fbb25a8dfbb1d05b64c186dd855a479924b..02a122a5dd178cb7100edd52210004dccddf2626 100644
--- a/src/TNL/Solvers/Linear/GMRES_impl.h
+++ b/src/TNL/Solvers/Linear/GMRES_impl.h
@@ -185,7 +185,7 @@ orthogonalize_CGS( const int m, const RealType normb, const RealType beta )
    // initial binding to _M_tmp sets the correct local range, global size and
    // communication group for distributed views
    VectorViewType v_i( _M_tmp.getView() );
-   VectorViewType v_k( _M_tmp.getView() );
+//   VectorViewType v_k( _M_tmp.getView() );
 
    /***
     * v_0 = r / | r | =  1.0 / beta * r