From e7880461cfd64752fa8136c5e48ed6cc60bda30a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sat, 17 Aug 2019 21:24:48 +0200
Subject: [PATCH 01/35] Removed MIC support

---
 .gitlab-ci.yml                                |   2 -
 CMakeLists.txt                                |  18 -
 build                                         |   3 -
 .../HeatEquation/Tuning/GridTraverser_impl.h  |   2 -
 src/Benchmarks/ODESolvers/Euler.hpp           |  22 -
 src/Examples/heat-equation/CMakeLists.txt     |  13 +-
 src/TNL/Allocators/Default.h                  |  10 -
 src/TNL/Allocators/MIC.h                      | 100 ----
 src/TNL/Assert.h                              |   2 +-
 .../Containers/Algorithms/ArrayOperations.h   |  91 ----
 .../Algorithms/ArrayOperationsMIC.hpp         | 429 ------------------
 src/TNL/Devices/CudaCallable.h                |   6 +-
 src/TNL/Devices/MIC.h                         | 170 -------
 src/TNL/Exceptions/MICBadAlloc.h              |  31 --
 src/TNL/Exceptions/MICSupportMissing.h        |  30 --
 src/TNL/File.h                                |  17 -
 src/TNL/File.hpp                              |  85 ----
 src/TNL/Math.h                                |  54 +--
 src/TNL/Matrices/CSR_impl.h                   |  32 --
 src/TNL/Matrices/SlicedEllpack_impl.h         |  63 +--
 src/TNL/Meshes/GridDetails/GridTraverser.h    | 113 -----
 .../Meshes/GridDetails/GridTraverser_1D.hpp   |  67 +--
 .../Meshes/GridDetails/GridTraverser_2D.hpp   | 102 +----
 .../Meshes/GridDetails/GridTraverser_3D.hpp   |  66 +--
 src/TNL/Object.h                              |   2 -
 src/TNL/Pointers/DevicePointer.h              | 282 +-----------
 src/TNL/Pointers/SharedPointer.h              |   3 +-
 src/TNL/Pointers/SharedPointerMic.h           | 373 ---------------
 src/TNL/Pointers/UniquePointer.h              | 181 +-------
 src/TNL/Solvers/BuildConfigTags.h             |   4 -
 src/TNL/Solvers/Linear/Preconditioners/ILU0.h |  23 -
 src/TNL/Solvers/Linear/Preconditioners/ILUT.h |  23 -
 src/TNL/Solvers/ODE/Euler.h                   |   2 -
 src/TNL/Solvers/ODE/Euler.hpp                 |   7 +-
 src/TNL/Solvers/SolverConfig_impl.h           |   6 -
 src/TNL/Solvers/SolverInitiator_impl.h        |   3 -
 src/UnitTests/Containers/ArrayTest.h          |  34 --
 src/UnitTests/Containers/ArrayViewTest.h      |  34 --
 src/UnitTests/Containers/VectorTestSetup.h    |  15 -
 39 files changed, 41 insertions(+), 2479 deletions(-)
 delete mode 100644 src/TNL/Allocators/MIC.h
 delete mode 100644 src/TNL/Containers/Algorithms/ArrayOperationsMIC.hpp
 delete mode 100644 src/TNL/Devices/MIC.h
 delete mode 100644 src/TNL/Exceptions/MICBadAlloc.h
 delete mode 100644 src/TNL/Exceptions/MICSupportMissing.h
 delete mode 100644 src/TNL/Pointers/SharedPointerMic.h

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index d260486c8..a26124c8a 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -21,7 +21,6 @@ stages:
     WITH_OPENMP: "no"
     WITH_CUDA: "no"
     WITH_CUDA_ARCH: "auto"
-    WITH_MIC: "no"
     WITH_MPI: "no"
     # configurations
     WITH_TESTS: "no"
@@ -56,7 +55,6 @@ stages:
                 -DWITH_MPI=${WITH_MPI}
                 -DWITH_CUDA=${WITH_CUDA}
                 -DWITH_CUDA_ARCH=${WITH_CUDA_ARCH}
-                -DWITH_MIC=${WITH_MIC}
                 -DWITH_TESTS=${WITH_TESTS}
                 -DWITH_DOC=${WITH_DOC}
                 -DWITH_COVERAGE=${WITH_COVERAGE}
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9540fe002..4d7e0cedf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,7 +17,6 @@ set( tnlVersion "0.1" )
 
 # declare all custom build options
 option(OFFLINE_BUILD "Offline build (i.e. without downloading libraries such as pybind11)" OFF)
-option(WITH_MIC "Build with MIC support" OFF)
 option(WITH_CUDA "Build with CUDA support" ON)
 set(WITH_CUDA_ARCH "auto" CACHE STRING "Build for these CUDA architectures")
 option(WITH_OPENMP "Build with OpenMP support" ON)
@@ -120,22 +119,6 @@ if( NOT DEFINED ENV{CI_JOB_NAME} )
    endif()
 endif()
 
-if( CMAKE_CXX_COMPILER_ID STREQUAL "Intel" )
-   set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_ICPC -wd2568 -wd2571 -wd2570")
-   #####
-   #  Check for MIC
-   #
-   if( ${WITH_MIC} )
-      message( "Enabled MIC support." )
-      set( MIC_CXX_FLAGS "-DHAVE_MIC")
-      # build all tests with MIC support
-      set( CXX_TESTS_FLAGS ${CXX_TESTS_FLAGS} -DHAVE_MIC )
-      set( WITH_CUDA OFF CACHE BOOL "Build with CUDA support" )
-   else()
-      set( MIC_CXX_FLAGS "")
-   endif()
-endif()
-
 # force colorized output in continuous integration
 if( DEFINED ENV{CI_JOB_NAME} OR ${CMAKE_GENERATOR} STREQUAL "Ninja" )
    message(STATUS "Continuous integration or Ninja detected -- forcing compilers to produce colorized output.")
@@ -355,7 +338,6 @@ INCLUDE( CPack )
 # Print custom build options
 message( "-- Build options:" )
 message( "   OFFLINE_BUILD = ${OFFLINE_BUILD}" )
-message( "   WITH_MIC = ${WITH_MIC}" )
 message( "   WITH_CUDA = ${WITH_CUDA}" )
 message( "   WITH_CUDA_ARCH = ${WITH_CUDA_ARCH}" )
 message( "   WITH_OPENMP = ${WITH_OPENMP}" )
diff --git a/build b/build
index c1e0d3162..bcd590860 100755
--- a/build
+++ b/build
@@ -48,7 +48,6 @@ do
         --offline-build                  ) OFFLINE_BUILD="yes" ;;
         --with-clang=*                   ) WITH_CLANG="${option#*=}" ;;
         --with-mpi=*                     ) WITH_MPI="${option#*=}" ;;
-        --with-mic=*                     ) WITH_MIC="${option#*=}" ;;
         --with-cuda=*                    ) WITH_CUDA="${option#*=}" ;;
         --with-cuda-arch=*               ) WITH_CUDA_ARCH="${option#*=}";;
         --with-openmp=*                  ) WITH_OPENMP="${option#*=}" ;;
@@ -78,7 +77,6 @@ if [[ ${HELP} == "yes" ]]; then
     echo "   --install=yes/no                      Enables the installation of TNL files."
     echo "   --offline-build=yes/no                Disables online updates during the build. 'no' by default."
     echo "   --with-mpi=yes/no                     Enables MPI. 'yes' by default (OpenMPI required)."
-    echo "   --with-mic=yes/no                     Enables MIC (Intel Xeon Phi). 'no' by default (Intel Compiler required)."
     echo "   --with-cuda=yes/no                    Enables CUDA. 'yes' by default (CUDA Toolkit is required)."
     echo "   --with-cuda-arch=all/auto/3.0/3.5/... Chooses CUDA architecture. 'auto' by default."
     echo "   --with-openmp=yes/no                  Enables OpenMP. 'yes' by default."
@@ -126,7 +124,6 @@ cmake_command=(
          -DCMAKE_BUILD_TYPE=${BUILD}
          -DCMAKE_INSTALL_PREFIX=${PREFIX}
          -DOFFLINE_BUILD=${OFFLINE_BUILD}
-         -DWITH_MIC=${WITH_MIC}
          -DWITH_CUDA=${WITH_CUDA}
          -DWITH_CUDA_ARCH=${WITH_CUDA_ARCH}
          -DWITH_OPENMP=${WITH_OPENMP}
diff --git a/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h b/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h
index f3d9fbeec..816ee5e2c 100644
--- a/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h
+++ b/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h
@@ -8,8 +8,6 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include <TNL/Devices/MIC.h>
-
 #pragma once
 
 #include "GridTraverser.h"
diff --git a/src/Benchmarks/ODESolvers/Euler.hpp b/src/Benchmarks/ODESolvers/Euler.hpp
index 1066e178c..efb336aca 100644
--- a/src/Benchmarks/ODESolvers/Euler.hpp
+++ b/src/Benchmarks/ODESolvers/Euler.hpp
@@ -10,7 +10,6 @@
 
 #pragma once
 
-#include <TNL/Devices/MIC.h>
 #include <TNL/Communicators/MpiCommunicator.h>
 #include <TNL/Communicators/NoDistrCommunicator.h>
 #include "ComputeBlockResidue.h"
@@ -209,28 +208,7 @@ void Euler< Problem, SolverMonitor >::computeNewTimeLevel( DofVectorPointer& u,
       }
 #endif
    }
-   
-   //MIC
-   if( std::is_same< DeviceType, Devices::MIC >::value )
-   {
 
-#ifdef HAVE_MIC
-      Devices::MICHider<RealType> mu;
-      mu.pointer=_u;
-      Devices::MICHider<RealType> mk1;
-      mk1.pointer=_k1;
-    #pragma offload target(mic) in(mu,mk1,size) inout(localResidue)
-    {
-      #pragma omp parallel for reduction(+:localResidue) firstprivate( mu, mk1 )  
-      for( IndexType i = 0; i < size; i ++ )
-      {
-         const RealType add = tau * mk1.pointer[ i ];
-         mu.pointer[ i ] += add;
-         localResidue += std::fabs( add );
-      }
-    }
-#endif
-   }
    localResidue /= tau * ( RealType ) size;
    Problem::CommunicatorType::Allreduce( &localResidue, &currentResidue, 1, MPI_SUM, Problem::CommunicatorType::AllGroup );
    //std::cerr << "Local residue = " << localResidue << " - globalResidue = " << currentResidue << std::endl;
diff --git a/src/Examples/heat-equation/CMakeLists.txt b/src/Examples/heat-equation/CMakeLists.txt
index c89519906..979c34076 100644
--- a/src/Examples/heat-equation/CMakeLists.txt
+++ b/src/Examples/heat-equation/CMakeLists.txt
@@ -1,18 +1,17 @@
-set( tnl_heat_equation_SOURCES     
+set( tnl_heat_equation_SOURCES
      tnl-heat-equation.cpp
      tnl-heat-equation-eoc.cpp
      tnl-heat-equation.cu
      tnl-heat-equation-eoc.cu )
-               
+
 IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE(tnl-heat-equation tnl-heat-equation.cu)
    CUDA_ADD_EXECUTABLE(tnl-heat-equation-eoc-test tnl-heat-equation-eoc.cu)
    target_link_libraries (tnl-heat-equation ${CUSPARSE_LIBRARY} )
    target_link_libraries (tnl-heat-equation-eoc-test ${CUSPARSE_LIBRARY} )
-ELSE(  BUILD_CUDA )               
-   ADD_EXECUTABLE(tnl-heat-equation tnl-heat-equation.cpp)     
-   ADD_EXECUTABLE(tnl-heat-equation-eoc-test tnl-heat-equation-eoc.cpp)   
-   TARGET_COMPILE_DEFINITIONS( tnl-heat-equation PUBLIC ${MIC_CXX_FLAGS} )
+ELSE(  BUILD_CUDA )
+   ADD_EXECUTABLE(tnl-heat-equation tnl-heat-equation.cpp)
+   ADD_EXECUTABLE(tnl-heat-equation-eoc-test tnl-heat-equation-eoc.cpp)
 ENDIF( BUILD_CUDA )
 
 
@@ -20,7 +19,7 @@ INSTALL( TARGETS tnl-heat-equation
                  tnl-heat-equation-eoc-test
          RUNTIME DESTINATION bin
          PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE )
-        
+
 INSTALL( FILES tnl-run-heat-equation-eoc-test
                tnl-run-heat-equation
                ${tnl_heat_equation_SOURCES}
diff --git a/src/TNL/Allocators/Default.h b/src/TNL/Allocators/Default.h
index 6906a905c..eed5c193b 100644
--- a/src/TNL/Allocators/Default.h
+++ b/src/TNL/Allocators/Default.h
@@ -14,10 +14,8 @@
 
 #include <TNL/Allocators/Host.h>
 #include <TNL/Allocators/Cuda.h>
-#include <TNL/Allocators/MIC.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/Devices/MIC.h>
 
 namespace TNL {
 namespace Allocators {
@@ -45,13 +43,5 @@ struct Default< Devices::Cuda >
    using Allocator = Allocators::Cuda< T >;
 };
 
-//! Sets \ref Allocators::MIC as the default allocator for \ref Devices::MIC.
-template<>
-struct Default< Devices::MIC >
-{
-   template< typename T >
-   using Allocator = Allocators::MIC< T >;
-};
-
 } // namespace Allocators
 } // namespace TNL
diff --git a/src/TNL/Allocators/MIC.h b/src/TNL/Allocators/MIC.h
deleted file mode 100644
index c3599f449..000000000
--- a/src/TNL/Allocators/MIC.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/***************************************************************************
-                          MIC.h  -  description
-                             -------------------
-    begin                : Jul 2, 2019
-    copyright            : (C) 2019 by Tomas Oberhuber et al.
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-// Implemented by: Jakub Klinkovsky
-
-#pragma once
-
-#include <TNL/Devices/MIC.h>
-#include <TNL/Exceptions/MICSupportMissing.h>
-
-namespace TNL {
-namespace Allocators {
-
-/**
- * \brief Allocator for the MIC device memory space.
- */
-template< class T >
-struct MIC
-{
-   using value_type = T;
-   using size_type = std::size_t;
-   using difference_type = std::ptrdiff_t;
-
-   MIC() = default;
-   MIC( const MIC& ) = default;
-   MIC( MIC&& ) = default;
-
-   MIC& operator=( const MIC& ) = default;
-   MIC& operator=( MIC&& ) = default;
-
-   template< class U >
-   MIC( const MIC< U >& )
-   {}
-
-   template< class U >
-   MIC( MIC< U >&& )
-   {}
-
-   template< class U >
-   MIC& operator=( const MIC< U >& )
-   {
-      return *this;
-   }
-
-   template< class U >
-   MIC& operator=( MIC< U >&& )
-   {
-      return *this;
-   }
-
-   value_type* allocate( size_type size )
-   {
-#ifdef HAVE_MIC
-      Devices::MICHider<void> hide_ptr;
-      #pragma offload target(mic) out(hide_ptr) in(size)
-      {
-         hide_ptr.pointer = malloc(size * sizeof(value_type));
-      }
-      return hide_ptr.pointer;
-#else
-      throw Exceptions::MICSupportMissing();
-#endif
-   }
-
-   void deallocate(value_type* ptr, size_type)
-   {
-#ifdef HAVE_MIC
-      Devices::MICHider<void> hide_ptr;
-      hide_ptr.pointer=ptr;
-      #pragma offload target(mic) in(hide_ptr)
-      {
-         free(hide_ptr.pointer);
-      }
-#else
-      throw Exceptions::MICSupportMissing();
-#endif
-   }
-};
-
-template<class T1, class T2>
-bool operator==(const MIC<T1>&, const MIC<T2>&)
-{
-   return true;
-}
-
-template<class T1, class T2>
-bool operator!=(const MIC<T1>& lhs, const MIC<T2>& rhs)
-{
-   return !(lhs == rhs);
-}
-
-} // namespace Allocators
-} // namespace TNL
diff --git a/src/TNL/Assert.h b/src/TNL/Assert.h
index 27f3b11b2..3d91c8c76 100644
--- a/src/TNL/Assert.h
+++ b/src/TNL/Assert.h
@@ -38,7 +38,7 @@
    #define TNL_NVCC_HD_WARNING_DISABLE
 #endif
 
-#if defined(NDEBUG) || defined(HAVE_MIC)
+#ifdef NDEBUG
 
 // empty macros for optimized build
 /**
diff --git a/src/TNL/Containers/Algorithms/ArrayOperations.h b/src/TNL/Containers/Algorithms/ArrayOperations.h
index ca62f5b7e..d4c35f5b1 100644
--- a/src/TNL/Containers/Algorithms/ArrayOperations.h
+++ b/src/TNL/Containers/Algorithms/ArrayOperations.h
@@ -12,7 +12,6 @@
 
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/Devices/MIC.h>
 
 namespace TNL {
 namespace Containers {
@@ -194,95 +193,6 @@ struct ArrayOperations< Devices::Host, Devices::Cuda >
                         const Index size );
 };
 
-
-template<>
-struct ArrayOperations< Devices::MIC >
-{
-   template< typename Element >
-   static void setElement( Element* data,
-                           const Element& value );
-
-   template< typename Element >
-   static Element getElement( const Element* data );
-
-   template< typename Element, typename Index >
-   static void set( Element* data,
-                    const Element& value,
-                    const Index size );
-
-   template< typename DestinationElement,
-             typename SourceElement,
-             typename Index >
-   static void copy( DestinationElement* destination,
-                     const SourceElement* source,
-                     const Index size );
-
-   template< typename DestinationElement,
-             typename Index,
-             typename SourceIterator >
-   static void copyFromIterator( DestinationElement* destination,
-                                 Index destinationSize,
-                                 SourceIterator first,
-                                 SourceIterator last );
-
-   template< typename Element1,
-             typename Element2,
-             typename Index >
-   static bool compare( const Element1* destination,
-                        const Element2* source,
-                        const Index size );
-
-   template< typename Element,
-             typename Index >
-   static bool containsValue( const Element* data,
-                              const Index size,
-                              const Element& value );
-
-   template< typename Element,
-             typename Index >
-   static bool containsOnlyValue( const Element* data,
-                                  const Index size,
-                                  const Element& value );
-};
-
-template<>
-struct ArrayOperations< Devices::MIC, Devices::Host >
-{
-   public:
-
-      template< typename DestinationElement,
-                typename SourceElement,
-                typename Index >
-      static void copy( DestinationElement* destination,
-                        const SourceElement* source,
-                        const Index size );
-
-      template< typename DestinationElement,
-                typename SourceElement,
-                typename Index >
-      static bool compare( const DestinationElement* destination,
-                           const SourceElement* source,
-                           const Index size );
-};
-
-template<>
-struct ArrayOperations< Devices::Host, Devices::MIC >
-{
-   template< typename DestinationElement,
-             typename SourceElement,
-             typename Index >
-   static void copy( DestinationElement* destination,
-                     const SourceElement* source,
-                     const Index size );
-
-   template< typename DestinationElement,
-             typename SourceElement,
-             typename Index >
-   static bool compare( const DestinationElement* destination,
-                        const SourceElement* source,
-                        const Index size );
-};
-
 } // namespace Algorithms
 } // namespace Containers
 } // namespace TNL
@@ -290,4 +200,3 @@ struct ArrayOperations< Devices::Host, Devices::MIC >
 #include <TNL/Containers/Algorithms/ArrayOperationsStatic.hpp>
 #include <TNL/Containers/Algorithms/ArrayOperationsHost.hpp>
 #include <TNL/Containers/Algorithms/ArrayOperationsCuda.hpp>
-#include <TNL/Containers/Algorithms/ArrayOperationsMIC.hpp>
diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsMIC.hpp b/src/TNL/Containers/Algorithms/ArrayOperationsMIC.hpp
deleted file mode 100644
index 4113bbcd9..000000000
--- a/src/TNL/Containers/Algorithms/ArrayOperationsMIC.hpp
+++ /dev/null
@@ -1,429 +0,0 @@
-/***************************************************************************
-                          ArrayOperationsMIC_impl.h  -  description
-                             -------------------
-    begin                : Mar 4, 2017
-    copyright            : (C) 2017 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-// Implemented by: Vit Hanousek
-
-#pragma once
-
-#include <iostream>
-
-#include <TNL/Math.h>
-#include <TNL/Exceptions/MICSupportMissing.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
-#include <TNL/Exceptions/NotImplementedError.h>
-
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-static constexpr std::size_t MIC_STACK_VAR_LIM = 5*1024*1024;
-
-template< typename Element >
-void
-ArrayOperations< Devices::MIC >::
-setElement( Element* data,
-            const Element& value )
-{
-   TNL_ASSERT( data, );
-   ArrayOperations< Devices::MIC >::set( data, value, 1 );
-}
-
-template< typename Element >
-Element
-ArrayOperations< Devices::MIC >::
-getElement( const Element* data )
-{
-   TNL_ASSERT( data, );
-   Element result;
-   ArrayOperations< Devices::Host, Devices::MIC >::copy< Element, Element, int >( &result, data, 1 );
-   return result;
-}
-
-template< typename Element, typename Index >
-void
-ArrayOperations< Devices::MIC >::
-set( Element* data,
-     const Element& value,
-     const Index size )
-{
-   TNL_ASSERT( data, );
-#ifdef HAVE_MIC
-   Element tmp=value;
-   Devices::MICHider<Element> hide_ptr;
-   hide_ptr.pointer=data;
-   #pragma offload target(mic) in(hide_ptr,tmp,size)
-   {
-       Element * dst= hide_ptr.pointer;
-       for(int i=0;i<size;i++)
-           dst[i]=tmp;
-   }
-#else
-   throw Exceptions::MICSupportMissing();
-#endif
-}
-
-template< typename DestinationElement,
-          typename SourceElement,
-          typename Index >
-void
-ArrayOperations< Devices::MIC >::
-copy( DestinationElement* destination,
-      const SourceElement* source,
-      const Index size )
-{
-   TNL_ASSERT( destination, );
-   TNL_ASSERT( source, );
-   #ifdef HAVE_MIC
-      if( std::is_same< DestinationElement, SourceElement >::value )
-      {
-         Devices::MICHider<void> src_ptr;
-         src_ptr.pointer=(void*)source;
-         Devices::MICHider<void> dst_ptr;
-         dst_ptr.pointer=(void*)destination;
-         #pragma offload target(mic) in(src_ptr,dst_ptr,size)
-         {
-             memcpy(dst_ptr.pointer,src_ptr.pointer,size*sizeof(DestinationElement));
-         }
-      }
-      else
-      {
-         Devices::MICHider<const SourceElement> src_ptr;
-         src_ptr.pointer=source;
-         Devices::MICHider<DestinationElement> dst_ptr;
-         dst_ptr.pointer=destination;
-         #pragma offload target(mic) in(src_ptr,dst_ptr,size)
-         {
-             for(int i=0;i<size;i++)
-                 dst_ptr.pointer[i]=src_ptr.pointer[i];
-         }
-      }
-   #else
-      throw Exceptions::MICSupportMissing();
-   #endif
-}
-
-template< typename DestinationElement,
-          typename Index,
-          typename SourceIterator >
-void
-ArrayOperations< Devices::MIC >::
-copyFromIterator( DestinationElement* destination,
-                  Index destinationSize,
-                  SourceIterator first,
-                  SourceIterator last )
-{
-   throw Exceptions::NotImplementedError();
-}
-
-template< typename Element1,
-          typename Element2,
-          typename Index >
-bool
-ArrayOperations< Devices::MIC >::
-compare( const Element1* destination,
-         const Element2* source,
-         const Index size )
-{
-   TNL_ASSERT( destination, );
-   TNL_ASSERT( source, );
-#ifdef HAVE_MIC
-   if( std::is_same< Element1, Element2 >::value )
-   {
-      Devices::MICHider<void> src_ptr;
-      src_ptr.pointer=(void*)source;
-      Devices::MICHider<void> dst_ptr;
-      dst_ptr.pointer=(void*)destination;
-      int ret=0;
-      #pragma offload target(mic) in(src_ptr,dst_ptr,size) out(ret)
-      {
-          ret=memcmp(dst_ptr.pointer,src_ptr.pointer,size*sizeof(Element1));
-      }
-      if(ret==0)
-          return true;
-   }
-   else
-   {
-      Devices::MICHider<const Element2> src_ptr;
-      src_ptr.pointer=source;
-      Devices::MICHider<const Element1> dst_ptr;
-      dst_ptr.pointer=destination;
-      bool ret=false;
-      #pragma offload target(mic) in(src_ptr,dst_ptr,size) out(ret)
-      {
-          int i=0;
-          for(i=0;i<size;i++)
-              if(dst_ptr.pointer[i]!=src_ptr.pointer[i])
-                  break;
-          if(i==size)
-              ret=true;
-          else
-              ret=false;
-      }
-      return ret;
-   }
-   return false;
-#else
-   throw Exceptions::MICSupportMissing();
-#endif
-}
-
-template< typename Element,
-          typename Index >
-bool
-ArrayOperations< Devices::MIC >::
-containsValue( const Element* data,
-               const Index size,
-               const Element& value )
-{
-   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
-   TNL_ASSERT_GE( size, 0, "" );
-#ifdef HAVE_MIC
-   throw Exceptions::NotImplementedError();
-#else
-   throw Exceptions::MICSupportMissing();
-#endif
-}
-
-template< typename Element,
-          typename Index >
-bool
-ArrayOperations< Devices::MIC >::
-containsOnlyValue( const Element* data,
-                   const Index size,
-                   const Element& value )
-{
-   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
-   TNL_ASSERT_GE( size, 0, "" );
-#ifdef HAVE_MIC
-   throw Exceptions::NotImplementedError();
-#else
-   throw Exceptions::MICSupportMissing();
-#endif
-}
-
-
-
-/****
- * Operations MIC -> Host
- */
-
-template< typename DestinationElement,
-          typename SourceElement,
-          typename Index >
-void
-ArrayOperations< Devices::Host, Devices::MIC >::
-copy( DestinationElement* destination,
-      const SourceElement* source,
-      const Index size )
-{
-   TNL_ASSERT( destination, );
-   TNL_ASSERT( source, );
-#ifdef HAVE_MIC
-   if( std::is_same< DestinationElement, SourceElement >::value )
-   {
-      Devices::MICHider<void> src_ptr;
-      src_ptr.pointer=(void*)source;
-
-      //JAKA KONSTANTA se vejde do stacku 5MB?
-      if(size<MIC_STACK_VAR_LIM)
-      {
-         uint8_t tmp[size*sizeof(SourceElement)];
-
-         #pragma offload target(mic) in(src_ptr,size) out(tmp)
-         {
-              memcpy((void*)&tmp,src_ptr.pointer,size*sizeof(SourceElement));
-         }
-
-         memcpy((void*)destination,(void*)&tmp,size*sizeof(SourceElement));
-      }
-      else
-      {
-          //direct -- pomalejší
-          uint8_t* tmp=(uint8_t*)destination;
-          #pragma offload target(mic) in(src_ptr,size) out(tmp:length(size))
-          {
-              memcpy((void*)tmp,src_ptr.pointer,size*sizeof(SourceElement));
-          }
-      }
-   }
-   else
-   {
-      Devices::MICHider<const SourceElement> src_ptr;
-      src_ptr.pointer=source;
-
-      if(size<MIC_STACK_VAR_LIM)
-      {
-         uint8_t tmp[size*sizeof(DestinationElement)];
-
-         #pragma offload target(mic) in(src_ptr,size) out(tmp)
-         {
-              DestinationElement *dst=(DestinationElement*)&tmp;
-              for(int i=0;i<size;i++)
-                  dst[i]=src_ptr.pointer[i];
-         }
-
-         memcpy((void*)destination,(void*)&tmp,size*sizeof(DestinationElement));
-      }
-      else
-      {
-          //direct pseudo heap-- pomalejší
-          uint8_t* tmp=(uint8_t*)destination;
-          #pragma offload target(mic) in(src_ptr,size) out(tmp:length(size*sizeof(DestinationElement)))
-          {
-              DestinationElement *dst=(DestinationElement*)tmp;
-              for(int i=0;i<size;i++)
-                  dst[i]=src_ptr.pointer[i];
-          }
-      }
-   }
-#else
-   throw Exceptions::MICSupportMissing();
-#endif
-}
-
-
-template< typename Element1,
-          typename Element2,
-          typename Index >
-bool
-ArrayOperations< Devices::Host, Devices::MIC >::
-compare( const Element1* destination,
-         const Element2* source,
-         const Index size )
-{
-   /***
-    * Here, destination is on host and source is on MIC device.
-    */
-   TNL_ASSERT( destination, );
-   TNL_ASSERT( source, );
-   TNL_ASSERT( size >= 0, std::cerr << "size = " << size );
-#ifdef HAVE_MIC
-   Index compared( 0 );
-   Index transfer( 0 );
-   std::size_t max_transfer=MIC_STACK_VAR_LIM/sizeof(Element2);
-   uint8_t host_buffer[max_transfer*sizeof(Element2)];
-
-   Devices::MICHider<const Element2> src_ptr;
-
-   while( compared < size )
-   {
-     transfer=min(size-compared,max_transfer);
-     src_ptr.pointer=source+compared;
-     #pragma offload target(mic) out(host_buffer) in(src_ptr,transfer)
-     {
-         memcpy((void*)&host_buffer,(void*)src_ptr.pointer,transfer*sizeof(Element2));
-     }
-     if( ! ArrayOperations< Devices::Host >::compare( &destination[ compared ], (Element2*)&host_buffer, transfer ) )
-     {
-        return false;
-     }
-     compared += transfer;
-   }
-   return true;
-#else
-   throw Exceptions::MICSupportMissing();
-#endif
-}
-
-/****
- * Operations Host -> MIC
- */
-template< typename DestinationElement,
-          typename SourceElement,
-          typename Index >
-void
-ArrayOperations< Devices::MIC, Devices::Host >::
-copy( DestinationElement* destination,
-      const SourceElement* source,
-      const Index size )
-{
-   TNL_ASSERT( destination, );
-   TNL_ASSERT( source, );
-   TNL_ASSERT( size >= 0, std::cerr << "size = " << size );
-#ifdef HAVE_MIC
-   if( std::is_same< DestinationElement, SourceElement >::value )
-   {
-      Devices::MICHider<void> dst_ptr;
-      dst_ptr.pointer=(void*)destination;
-
-      //JAKA KONSTANTA se vejde do stacku 5MB?
-      if(size<MIC_STACK_VAR_LIM)
-      {
-         uint8_t tmp[size*sizeof(SourceElement)];
-         memcpy((void*)&tmp,(void*)source,size*sizeof(SourceElement));
-
-         #pragma offload target(mic) in(dst_ptr,tmp,size)
-         {
-              memcpy(dst_ptr.pointer,(void*)&tmp,size*sizeof(SourceElement));
-         }
-      }
-      else
-      {
-          //direct pseudo heap-- pomalejší
-          uint8_t* tmp=(uint8_t*)source;
-          #pragma offload target(mic) in(dst_ptr,size) in(tmp:length(size))
-          {
-              memcpy(dst_ptr.pointer,(void*)tmp,size*sizeof(SourceElement));
-          }
-      }
-   }
-   else
-   {
-      Devices::MICHider<DestinationElement> dst_ptr;
-      dst_ptr.pointer=destination;
-
-      if(size<MIC_STACK_VAR_LIM)
-      {
-         uint8_t tmp[size*sizeof(SourceElement)];
-         memcpy((void*)&tmp,(void*)source,size*sizeof(SourceElement));
-
-         #pragma offload target(mic) in(dst_ptr,size,tmp)
-         {
-              SourceElement *src=(SourceElement*)&tmp;
-              for(int i=0;i<size;i++)
-                  dst_ptr.pointer[i]=src[i];
-         }
-      }
-      else
-      {
-          //direct pseudo heap-- pomalejší
-          uint8_t* tmp=(uint8_t*)source;
-          #pragma offload target(mic) in(dst_ptr,size) in(tmp:length(size*sizeof(SourceElement)))
-          {
-              SourceElement *src=(SourceElement*)tmp;
-              for(int i=0;i<size;i++)
-                  dst_ptr.pointer[i]=src[i];
-          }
-      }
-   }
-#else
-   throw Exceptions::MICSupportMissing();
-#endif
-}
-
-template< typename Element1,
-          typename Element2,
-          typename Index >
-bool
-ArrayOperations< Devices::MIC, Devices::Host >::
-compare( const Element1* hostData,
-         const Element2* deviceData,
-         const Index size )
-{
-   TNL_ASSERT( hostData, );
-   TNL_ASSERT( deviceData, );
-   TNL_ASSERT( size >= 0, std::cerr << "size = " << size );
-   return ArrayOperations< Devices::Host, Devices::MIC >::compare( deviceData, hostData, size );
-}
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Devices/CudaCallable.h b/src/TNL/Devices/CudaCallable.h
index f9311443f..f63e4e430 100644
--- a/src/TNL/Devices/CudaCallable.h
+++ b/src/TNL/Devices/CudaCallable.h
@@ -20,11 +20,9 @@
  * This macro serves for definition of function which are supposed to be called
  * even from device. If HAVE_CUDA is defined, the __cuda_callable__ function
  * is compiled for both CPU and GPU. If HAVE_CUDA is not defined, this macro has
- * no effect. Support for Intel Xeon Phi is now in "hibernated" state.
+ * no effect.
  */
-#ifdef HAVE_MIC 
-   #define __cuda_callable__ __attribute__((target(mic)))
-#elif HAVE_CUDA
+#ifdef HAVE_CUDA
    #define __cuda_callable__ __device__ __host__
 #else
    #define __cuda_callable__
diff --git a/src/TNL/Devices/MIC.h b/src/TNL/Devices/MIC.h
deleted file mode 100644
index f347a24d1..000000000
--- a/src/TNL/Devices/MIC.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/***************************************************************************
-                          MIC.h  -  description
-                          -------------------
-    begin                : Nov 7, 2016
-    copyright            : (C) 2016 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-// Implemented by: Vit Hanousek
-
-#pragma once
-
-#include <iostream>
-#include <cstring>
-#include <unistd.h>
-#include <TNL/String.h>
-#include <TNL/Assert.h>
-#include <TNL/Pointers/SmartPointersRegister.h>
-#include <TNL/Timer.h>
-
-#include <TNL/Devices/CudaCallable.h>
-
-
-namespace TNL {
-namespace Devices {
-namespace {
-
-//useful macros from Intel's tutorials -- but we do not use it, becaouse it is tricky (system of maping variables CPU-MIC)
-#define ALLOC alloc_if(1) //alloac variable at begining of offloaded block -- default
-#define FREE free_if(1) // delete variable at the end of offloaded block -- default
-#define RETAIN free_if(0) //do not delete variable at the end of offladed block
-#define REUSE alloc_if(0) //do not alloc variable at begin of offloaded block, reuse variable on MIC which was not deleted befeore
-
-//structure which hides pointer - bypass mapping of variables and addresses of arrays and allow get RAW addres of MIC memory to RAM
-template< typename Type >
-struct MICHider{
-    Type *pointer;
-};
-
-//inflatable structure -- structures can be copied to MIC - classes not (viz paper published after CSJP 2016 in Krakow)
-//object can be copied in side this structure and then copied into MIC memory
-template <unsigned int VELIKOST>
-struct MICStruct{
-	uint8_t data[VELIKOST];
-};
-
-//Macros which can make code better readeble --but they are tricky, creating variables with specific names...
-//version using inflatable structure
-#define TNLMICSTRUCT(bb,typ) Devices::MICStruct<sizeof(typ)> s ## bb; \
-                             memcpy((void*)& s ## bb,(void*)& bb,sizeof(typ));
-#define TNLMICSTRUCTOFF(bb,typ) s ## bb
-#define TNLMICSTRUCTUSE(bb,typ) typ * kernel ## bb = (typ*) &s ## bb;
-#define TNLMICSTRUCTALLOC(bb,typ) typ * kernel ## bb = (typ*) malloc (sizeof(typ)); \
-                                memcpy((void*)kernel ## bb,(void*) & s ## bb, sizeof(typ));
-
-//version which retypes pointer of object to pointer to array of uint8_t,
-//object can be copied using uint8_t pointer as array with same length as object size
-#define TNLMICHIDE(bb,typ) uint8_t * u ## bb=(uint8_t *)&bb; \
-                           MICHider<typ> kernel ## bb;
-#define TNLMICHIDEALLOCOFF(bb,typ) in(u ## bb:length(sizeof(typ))) out(kernel ## bb)
-#define TNLMICHIDEALLOC(bb,typ) kernel ## bb.pointer=(typ*)malloc(sizeof(typ)); \
-                                memcpy((void*)kernel ## bb.pointer,(void*)u ## bb,sizeof(typ));
-#define TNLMICHIDEFREEOFF(bb,typ) in(kernel ## bb)
-#define TNLMICHIDEFREE(bb,typ) free((void*)kernel ## bb.pointer
-
-class MIC
-{
-   public:
-
-      static String getDeviceType()
-      {
-         return String( "Devices::MIC" );
-      };
-
-      // TODO: Remove getDeviceType();
-      static inline String getType() { return getDeviceType(); };
-        
-#ifdef HAVE_MIC
-
-       //useful debuging -- but produce warning
-       __cuda_callable__ static inline void CheckMIC(void)
-       {
-            #ifdef __MIC__
-                    std::cout<<"ON MIC"<<std::endl;
-            #else
-                    std::cout<<"ON CPU" <<std::endl;
-            #endif
-        };
-
-
-        //old copying funciton  -- deprecated
-        template <typename TYP>
-        static
-        TYP * passToDevice(TYP &objektCPU)
-        {
-                uint8_t * uk=(uint8_t *)&objektCPU;
-                MICHider<TYP> ret;
-
-                #pragma offload target(mic) in(uk:length(sizeof(TYP))) out(ret)
-                {
-                    ret.pointer=(TYP*)malloc(sizeof(TYP));
-                    std::memcpy((void*)ret.pointer,(void*)uk,sizeof(TYP));
-                }
-                return ret.pointer;
-
-                std::cout << "Někdo mně volá :-D" <<std::endl;
-        };
-
-        //old cleaning function -- deprecated
-        template <typename TYP>
-        static
-        void freeFromDevice(TYP *objektMIC)
-        {
-            MICHider<TYP> ptr;
-            ptr.pointer=objektMIC;
-            #pragma offload target(mic) in(ptr)
-            {
-                free((void*)ptr.pointer);
-            }
-        };
-
-        static inline
-        void CopyToMIC(void* mic_ptr,void* ptr,size_t size)
-        {
-            uint8_t image[size];
-            std::memcpy((void*)&image,ptr,size);
-            Devices::MICHider<void> hide_ptr;
-            hide_ptr.pointer=mic_ptr;
-            #pragma offload target(mic) in(hide_ptr) in(image) in(size)
-            {
-                std::memcpy((void*)hide_ptr.pointer,(void*)&image,size);
-            }
-        };
-
-#endif
-
-   static void insertSmartPointer( Pointers::SmartPointer* pointer )
-   {
-      smartPointersRegister.insert( pointer, -1 );
-   }
-
-   static void removeSmartPointer( Pointers::SmartPointer* pointer )
-   {
-      smartPointersRegister.remove( pointer, -1 );
-   }
-
-   // Negative deviceId means that CudaDeviceInfo::getActiveDevice will be
-   // called to get the device ID.
-   static bool synchronizeDevice( int deviceId = -1 )
-   {
-      smartPointersSynchronizationTimer.start();
-      bool b = smartPointersRegister.synchronizeDevice( deviceId );
-      smartPointersSynchronizationTimer.stop();
-      return b;
-   }
-
-   static Timer smartPointersSynchronizationTimer;
-
-protected:
-   static Pointers::SmartPointersRegister smartPointersRegister;
-};
-
-Pointers::SmartPointersRegister MIC::smartPointersRegister;
-Timer MIC::smartPointersSynchronizationTimer;
-
-} // namespace <unnamed>
-} // namespace Devices
-} // namespace TNL
diff --git a/src/TNL/Exceptions/MICBadAlloc.h b/src/TNL/Exceptions/MICBadAlloc.h
deleted file mode 100644
index b8f3a9157..000000000
--- a/src/TNL/Exceptions/MICBadAlloc.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/***************************************************************************
-                          MICBadAlloc.h  -  description
-                             -------------------
-    begin                : Jul 31, 2017
-    copyright            : (C) 2017 by Tomas Oberhuber et al.
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-// Implemented by: Jakub Klinkovsky
-
-#pragma once
-
-#include <new>
-
-namespace TNL {
-namespace Exceptions {
-
-struct MICBadAlloc
-   : public std::bad_alloc
-{
-   const char* what() const throw()
-   {
-      return "Failed to allocate memory on the MIC device: "
-             "most likely there is not enough space on the device memory.";
-   }
-};
-
-} // namespace Exceptions
-} // namespace TNL
diff --git a/src/TNL/Exceptions/MICSupportMissing.h b/src/TNL/Exceptions/MICSupportMissing.h
deleted file mode 100644
index 6d4260e6a..000000000
--- a/src/TNL/Exceptions/MICSupportMissing.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/***************************************************************************
-                          MICSupportMissing.h  -  description
-                             -------------------
-    begin                : Jul 31, 2017
-    copyright            : (C) 2017 by Tomas Oberhuber et al.
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-// Implemented by: Jakub Klinkovsky
-
-#pragma once
-
-#include <stdexcept>
-
-namespace TNL {
-namespace Exceptions {
-
-struct MICSupportMissing
-   : public std::runtime_error
-{
-   MICSupportMissing()
-   : std::runtime_error( "MIC support is missing, but the program called a function which needs it. "
-                         "Please recompile the program with MIC support." )
-   {}
-};
-
-} // namespace Exceptions
-} // namespace TNL
diff --git a/src/TNL/File.h b/src/TNL/File.h
index 1aa5615e5..70eb013b7 100644
--- a/src/TNL/File.h
+++ b/src/TNL/File.h
@@ -16,7 +16,6 @@
 #include <TNL/String.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/Devices/MIC.h>
 
 namespace TNL {
 
@@ -154,14 +153,6 @@ class File
                 typename = void >
       void load_impl( Type* buffer, std::streamsize elements );
 
-      template< typename Type,
-                typename SourceType,
-                typename Device,
-                typename = typename std::enable_if< std::is_same< Device, Devices::MIC >::value >::type,
-                typename = void,
-                typename = void >
-      void load_impl( Type* buffer, std::streamsize elements );
-
       template< typename Type,
                 typename TargetType,
                 typename Device,
@@ -175,14 +166,6 @@ class File
                 typename = void >
       void save_impl( const Type* buffer, std::streamsize elements );
 
-      template< typename Type,
-                typename TargetType,
-                typename Device,
-                typename = typename std::enable_if< std::is_same< Device, Devices::MIC >::value >::type,
-                typename = void,
-                typename = void >
-      void save_impl( const Type* buffer, std::streamsize elements );
-
       std::fstream file;
       String fileName;
 
diff --git a/src/TNL/File.hpp b/src/TNL/File.hpp
index f4edd2b96..19a9eaa06 100644
--- a/src/TNL/File.hpp
+++ b/src/TNL/File.hpp
@@ -18,7 +18,6 @@
 #include <TNL/File.h>
 #include <TNL/Assert.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
-#include <TNL/Exceptions/MICSupportMissing.h>
 #include <TNL/Exceptions/FileSerializationError.h>
 #include <TNL/Exceptions/FileDeserializationError.h>
 #include <TNL/Exceptions/NotImplementedError.h>
@@ -168,48 +167,6 @@ void File::load_impl( Type* buffer, std::streamsize elements )
 #endif
 }
 
-// MIC
-template< typename Type,
-          typename SourceType,
-          typename Device,
-          typename, typename, typename >
-void File::load_impl( Type* buffer, std::streamsize elements )
-{
-#ifdef HAVE_MIC
-   const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements );
-   using BaseType = typename std::remove_cv< Type >::type;
-   std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] };
-
-   std::streamsize readElements = 0;
-   if( std::is_same< Type, SourceType >::value )
-   {
-      while( readElements < elements )
-      {
-         const std::streamsize transfer = std::min( elements - readElements, host_buffer_size );
-         file.read( reinterpret_cast<char*>(host_buffer.get()), sizeof(Type) * transfer );
-
-         Devices::MICHider<Type> device_buff;
-         device_buff.pointer=buffer;
-         #pragma offload target(mic) in(device_buff,readElements) in(host_buffer:length(transfer))
-         {
-            /*
-            for(int i=0;i<transfer;i++)
-                 device_buff.pointer[readElements+i]=host_buffer[i];
-             */
-            memcpy(&(device_buff.pointer[readElements]), host_buffer.get(), transfer*sizeof(Type) );
-         }
-
-         readElements += transfer;
-      }
-      free( host_buffer );
-   }
-   else
-      throw Exceptions::NotImplementedError("Type conversion during loading is not implemented for MIC.");
-#else
-   throw Exceptions::MICSupportMissing();
-#endif
-}
-
 template< typename Type,
           typename TargetType,
           typename Device >
@@ -303,48 +260,6 @@ void File::save_impl( const Type* buffer, std::streamsize elements )
 #endif
 }
 
-// MIC
-template< typename Type,
-          typename TargetType,
-          typename Device,
-          typename, typename, typename >
-void File::save_impl( const Type* buffer, std::streamsize elements )
-{
-#ifdef HAVE_MIC
-   const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements );
-   using BaseType = typename std::remove_cv< Type >::type;
-   std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] };
-
-   std::streamsize writtenElements = 0;
-   if( std::is_same< Type, TargetType >::value )
-   {
-      while( this->writtenElements < elements )
-      {
-         const std::streamsize transfer = std::min( elements - writtenElements, host_buffer_size );
-
-         Devices::MICHider<const Type> device_buff;
-         device_buff.pointer=buffer;
-         #pragma offload target(mic) in(device_buff,writtenElements) out(host_buffer:length(transfer))
-         {
-            //THIS SHOULD WORK... BUT NOT WHY?
-            /*for(int i=0;i<transfer;i++)
-                 host_buffer[i]=device_buff.pointer[writtenElements+i];
-             */
-
-            memcpy(host_buffer.get(), &(device_buff.pointer[writtenElements]), transfer*sizeof(Type) );
-         }
-
-         file.write( reinterpret_cast<const char*>(host_buffer.get()), sizeof(Type) * transfer );
-         writtenElements += transfer;
-      }
-   }
-   else
-      throw Exceptions::NotImplementedError("Type conversion during saving is not implemented for MIC.");
-#else
-   throw Exceptions::MICSupportMissing();
-#endif
-}
-
 inline bool fileExists( const String& fileName )
 {
    std::fstream file;
diff --git a/src/TNL/Math.h b/src/TNL/Math.h
index cd73b020e..b7591bf65 100644
--- a/src/TNL/Math.h
+++ b/src/TNL/Math.h
@@ -30,7 +30,7 @@ ResultType sum( const T1& a, const T2& b )
  * \brief This function returns minimum of two numbers.
  *
  * GPU device code uses the functions defined in the CUDA's math_functions.h,
- * MIC uses trivial override and host uses the STL functions.
+ * host uses the STL functions.
  */
 template< typename T1, typename T2, typename ResultType = typename std::common_type< T1, T2 >::type,
           // enable_if is necessary to avoid ambiguity in vector expressions
@@ -44,8 +44,6 @@ ResultType min( const T1& a, const T2& b )
 #else
  #if defined(__CUDA_ARCH__)
    return ::min( (ResultType) a, (ResultType) b );
- #elif defined(__MIC__)
-   return a < b ? a : b;
  #else
    return std::min( (ResultType) a, (ResultType) b );
  #endif
@@ -57,7 +55,7 @@ ResultType min( const T1& a, const T2& b )
  * \brief This function returns maximum of two numbers.
  *
  * GPU device code uses the functions defined in the CUDA's math_functions.h,
- * MIC uses trivial override and host uses the STL functions.
+ * host uses the STL functions.
  */
 template< typename T1, typename T2, typename ResultType = typename std::common_type< T1, T2 >::type,
           // enable_if is necessary to avoid ambiguity in vector expressions
@@ -71,8 +69,6 @@ ResultType max( const T1& a, const T2& b )
 #else
  #if defined(__CUDA_ARCH__)
    return ::max( (ResultType) a, (ResultType) b );
- #elif defined(__MIC__)
-   return a > b ? a : b;
  #else
    return std::max( (ResultType) a, (ResultType) b );
  #endif
@@ -92,10 +88,6 @@ T abs( const T& n )
       return ::abs( n );
    else
       return ::fabs( n );
-#elif defined(__MIC__)
-   if( n < ( T ) 0 )
-      return -n;
-   return n;
 #else
    return std::abs( n );
 #endif
@@ -159,7 +151,7 @@ template< typename T1, typename T2, typename ResultType = typename std::common_t
 __cuda_callable__ inline
 ResultType pow( const T1& base, const T2& exp )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::pow( (ResultType) base, (ResultType) exp );
 #else
    return std::pow( (ResultType) base, (ResultType) exp );
@@ -173,7 +165,7 @@ template< typename T >
 __cuda_callable__ inline
 auto exp( const T& value ) -> decltype( std::exp(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::exp( value );
 #else
    return std::exp( value );
@@ -187,7 +179,7 @@ template< typename T >
 __cuda_callable__ inline
 auto sqrt( const T& value ) -> decltype( std::sqrt(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::sqrt( value );
 #else
    return std::sqrt( value );
@@ -201,7 +193,7 @@ template< typename T >
 __cuda_callable__ inline
 auto cbrt( const T& value ) -> decltype( std::cbrt(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::cbrt( value );
 #else
    return std::cbrt( value );
@@ -215,7 +207,7 @@ template< typename T >
 __cuda_callable__ inline
 auto log( const T& value ) -> decltype( std::log(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::log( value );
 #else
    return std::log( value );
@@ -229,7 +221,7 @@ template< typename T >
 __cuda_callable__ inline
 auto log10( const T& value ) -> decltype( std::log10(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::log10( value );
 #else
    return std::log10( value );
@@ -243,7 +235,7 @@ template< typename T >
 __cuda_callable__ inline
 auto log2( const T& value ) -> decltype( std::log2(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::log2( value );
 #else
    return std::log2( value );
@@ -257,7 +249,7 @@ template< typename T >
 __cuda_callable__ inline
 auto sin( const T& value ) -> decltype( std::sin(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::sin( value );
 #else
    return std::sin( value );
@@ -271,7 +263,7 @@ template< typename T >
 __cuda_callable__ inline
 auto cos( const T& value ) -> decltype( std::cos(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::cos( value );
 #else
    return std::cos( value );
@@ -285,7 +277,7 @@ template< typename T >
 __cuda_callable__ inline
 auto tan( const T& value ) -> decltype( std::tan(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::tan( value );
 #else
    return std::tan( value );
@@ -299,7 +291,7 @@ template< typename T >
 __cuda_callable__ inline
 auto asin( const T& value ) -> decltype( std::asin(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::asin( value );
 #else
    return std::asin( value );
@@ -313,7 +305,7 @@ template< typename T >
 __cuda_callable__ inline
 auto acos( const T& value ) -> decltype( std::acos(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::acos( value );
 #else
    return std::acos( value );
@@ -327,7 +319,7 @@ template< typename T >
 __cuda_callable__ inline
 auto atan( const T& value ) -> decltype( std::atan(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::atan( value );
 #else
    return std::atan( value );
@@ -341,7 +333,7 @@ template< typename T >
 __cuda_callable__ inline
 auto sinh( const T& value ) -> decltype( std::sinh(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::sinh( value );
 #else
    return std::sinh( value );
@@ -355,7 +347,7 @@ template< typename T >
 __cuda_callable__ inline
 auto cosh( const T& value ) -> decltype( std::cosh(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::cosh( value );
 #else
    return std::cosh( value );
@@ -369,7 +361,7 @@ template< typename T >
 __cuda_callable__ inline
 auto tanh( const T& value ) -> decltype( std::tanh(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::tanh( value );
 #else
    return std::tanh( value );
@@ -383,7 +375,7 @@ template< typename T >
 __cuda_callable__ inline
 auto asinh( const T& value ) -> decltype( std::asinh(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::asinh( value );
 #else
    return std::asinh( value );
@@ -397,7 +389,7 @@ template< typename T >
 __cuda_callable__ inline
 auto acosh( const T& value ) -> decltype( std::acosh(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::acosh( value );
 #else
    return std::acosh( value );
@@ -411,7 +403,7 @@ template< typename T >
 __cuda_callable__ inline
 auto atanh( const T& value ) -> decltype( std::atanh(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::atanh( value );
 #else
    return std::atanh( value );
@@ -425,7 +417,7 @@ template< typename T >
 __cuda_callable__ inline
 auto floor( const T& value ) -> decltype( std::floor(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::floor( value );
 #else
    return std::floor( value );
@@ -439,7 +431,7 @@ template< typename T >
 __cuda_callable__ inline
 auto ceil( const T& value ) -> decltype( std::ceil(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::ceil( value );
 #else
    return std::ceil( value );
diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/CSR_impl.h
index 74ff682fd..cddf6f9a7 100644
--- a/src/TNL/Matrices/CSR_impl.h
+++ b/src/TNL/Matrices/CSR_impl.h
@@ -831,38 +831,6 @@ class CSRDeviceDependentCode< Devices::Host >
 
 };
 
-#ifdef HAVE_MIC
-template<>
-class CSRDeviceDependentCode< Devices::MIC >
-{
-   public:
-
-      typedef Devices::MIC Device;
-
-      template< typename Real,
-                typename Index,
-                typename InVector,
-                typename OutVector >
-      static void vectorProduct( const CSR< Real, Device, Index >& matrix,      
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-      {
-         throw Exceptions::NotImplementedError("CSRDeviceDependentCode is not implemented for MIC.");
-      }
-  /*       const Index rows = matrix.getRows();
-         const tnlCSRMatrix< Real, Device, Index >* matrixPtr = &matrix;
-         const InVector* inVectorPtr = &inVector;
-         OutVector* outVectorPtr = &outVector;
-#ifdef HAVE_OPENMP
-#pragma omp parallel for firstprivate( matrixPtr, inVectorPtr, outVectorPtr ), schedule(static ), if( Devices::Host::isOMPEnabled() )
-#endif         
-         for( Index row = 0; row < rows; row ++ )
-            ( *outVectorPtr )[ row ] = matrixPtr->rowVectorProduct( row, *inVectorPtr );
-      }*/
-
-};
-#endif
-
 #ifdef HAVE_CUDA
 template< typename Real,
           typename Index,
diff --git a/src/TNL/Matrices/SlicedEllpack_impl.h b/src/TNL/Matrices/SlicedEllpack_impl.h
index 016edf699..4ce70d3ef 100644
--- a/src/TNL/Matrices/SlicedEllpack_impl.h
+++ b/src/TNL/Matrices/SlicedEllpack_impl.h
@@ -638,9 +638,9 @@ template< typename Real,
 SlicedEllpack< Real, Device, Index, SliceSize >&
 SlicedEllpack< Real, Device, Index, SliceSize >::operator=( const SlicedEllpack< Real2, Device2, Index2, SliceSize >& matrix )
 {
-   static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value || std::is_same< Device, Devices::MIC >::value,
+   static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
                   "unknown device" );
-   static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value || std::is_same< Device2, Devices::MIC >::value,
+   static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value,
                   "unknown device" );
 
    this->setLike( matrix );
@@ -693,10 +693,6 @@ SlicedEllpack< Real, Device, Index, SliceSize >::operator=( const SlicedEllpack<
             }
       }
    }
-   
-   if( std::is_same< Device, Devices::MIC >::value ) {
-      throw Exceptions::NotImplementedError("Cross-device assignment for the SlicedEllpack format is not implemented for MIC.");
-   }
 
    return *this;
 }
@@ -1064,61 +1060,6 @@ class SlicedEllpackDeviceDependentCode< Devices::Cuda >
             cudaDeviceSynchronize();
          #endif
       }
-
-};
-
-template<>
-class SlicedEllpackDeviceDependentCode< Devices::MIC >
-{
-   public:
-
-      typedef Devices::MIC Device;
-
-      template< typename Real,
-                typename Index,
-                int SliceSize >
-      static void initRowTraverse( const SlicedEllpack< Real, Device, Index, SliceSize >& matrix,
-                                   const Index row,
-                                   Index& rowBegin,
-                                   Index& rowEnd,
-                                   Index& step )
-      {
-         throw Exceptions::NotImplementedError("Not Implemented yet SlicedEllpackDeviceDependentCode< Devices::MIC >::initRowTraverse");
-      }
-
-      template< typename Real,
-                typename Index,
-                int SliceSize >
-      __cuda_callable__
-      static void initRowTraverseFast( const SlicedEllpack< Real, Device, Index, SliceSize >& matrix,
-                                       const Index row,
-                                       Index& rowBegin,
-                                       Index& rowEnd,
-                                       Index& step )
-      {
-         throw Exceptions::NotImplementedError("Not Implemented yet SlicedEllpackDeviceDependentCode< Devices::MIC >::initRowTraverseFast");
-      }
-
-      template< typename Real,
-                typename Index,
-                int SliceSize >
-      static bool computeMaximalRowLengthInSlices( SlicedEllpack< Real, Device, Index, SliceSize >& matrix,
-                                                   typename SlicedEllpack< Real, Device, Index >::ConstCompressedRowLengthsVectorView rowLengths )
-      {
-         throw Exceptions::NotImplementedError("Not Implemented yet SlicedEllpackDeviceDependentCode< Devices::MIC >::computeMaximalRowLengthInSlices");
-      }
-
-      template< typename Real,
-                typename Index,
-                typename InVector,
-                typename OutVector,
-                int SliceSize >
-      static void vectorProduct( const SlicedEllpack< Real, Device, Index, SliceSize >& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-      {
-         throw Exceptions::NotImplementedError("Not Implemented yet SlicedEllpackDeviceDependentCode< Devices::MIC >::vectorProduct");
-      }
 };
 
 } // namespace Matrices
diff --git a/src/TNL/Meshes/GridDetails/GridTraverser.h b/src/TNL/Meshes/GridDetails/GridTraverser.h
index fb6b34da1..7ce106f5d 100644
--- a/src/TNL/Meshes/GridDetails/GridTraverser.h
+++ b/src/TNL/Meshes/GridDetails/GridTraverser.h
@@ -89,38 +89,6 @@ class GridTraverser< Meshes::Grid< 1, Real, Devices::Cuda, Index > >
          const int& stream = 0 );
 };
 
-/****
- * 1D grid, Devices::MIC
- */
-template< typename Real,
-          typename Index >
-class GridTraverser< Meshes::Grid< 1, Real, Devices::MIC, Index > >
-{
-   public:
-      
-      typedef Meshes::Grid< 1, Real, Devices::MIC, Index > GridType;
-      typedef Pointers::SharedPointer<  GridType > GridPointer;
-      typedef Real RealType;
-      typedef Devices::MIC DeviceType;
-      typedef Index IndexType;
-      typedef typename GridType::CoordinatesType CoordinatesType;
- 
-      template<
-         typename GridEntity,
-         typename EntitiesProcessor,
-         typename UserData,
-         bool processOnlyBoundaryEntities  >
-      static void
-      processEntities(
-         const GridPointer& gridPointer,
-         const CoordinatesType& begin,
-         const CoordinatesType& end,
-         UserData& userData,
-         GridTraverserMode mode = synchronousMode,
-         const int& stream = 0 );
-};
-
-
 
 /****
  * 2D grid, Devices::Host
@@ -202,45 +170,6 @@ class GridTraverser< Meshes::Grid< 2, Real, Devices::Cuda, Index > >
          const GridEntityParameters&... gridEntityParameters );
 };
 
-/****
- * 2D grid, Devices::MIC
- */
-template< typename Real,
-          typename Index >
-class GridTraverser< Meshes::Grid< 2, Real, Devices::MIC, Index > >
-{
-   public:
-      
-      typedef Meshes::Grid< 2, Real, Devices::MIC, Index > GridType;
-      typedef Pointers::SharedPointer<  GridType > GridPointer;
-      typedef Real RealType;
-      typedef Devices::MIC DeviceType;
-      typedef Index IndexType;
-      typedef typename GridType::CoordinatesType CoordinatesType;
- 
-      template<
-         typename GridEntity,
-         typename EntitiesProcessor,
-         typename UserData,
-         bool processOnlyBoundaryEntities,
-         int XOrthogonalBoundary = 1,
-         int YOrthogonalBoundary = 1,
-         typename... GridEntityParameters >
-      static void
-      processEntities(
-         const GridPointer& gridPointer,
-         const CoordinatesType& begin,
-         const CoordinatesType& end,
-         UserData& userData,
-         // FIXME: hack around nvcc bug (error: default argument not at end of parameter list)
-         //GridTraverserMode mode = synchronousMode,
-         GridTraverserMode mode,
-         // const int& stream = 0,
-         const int& stream,
-         // gridEntityParameters are passed to GridEntity's constructor
-         // (i.e. orientation and basis for faces)
-         const GridEntityParameters&... gridEntityParameters );
-};
 
 /****
  * 3D grid, Devices::Host
@@ -324,51 +253,9 @@ class GridTraverser< Meshes::Grid< 3, Real, Devices::Cuda, Index > >
          const GridEntityParameters&... gridEntityParameters );
 };
 
-/****
- * 3D grid, Devices::Cuda
- */
-template< typename Real,
-          typename Index >
-class GridTraverser< Meshes::Grid< 3, Real, Devices::MIC, Index > >
-{
-   public:
-      
-      typedef Meshes::Grid< 3, Real, Devices::MIC, Index > GridType;
-      typedef Pointers::SharedPointer<  GridType > GridPointer;
-      typedef Real RealType;
-      typedef Devices::MIC DeviceType;
-      typedef Index IndexType;
-      typedef typename GridType::CoordinatesType CoordinatesType;
- 
-      template<
-         typename GridEntity,
-         typename EntitiesProcessor,
-         typename UserData,
-         bool processOnlyBoundaryEntities,
-         int XOrthogonalBoundary = 1,
-         int YOrthogonalBoundary = 1,
-         int ZOrthogonalBoundary = 1,
-         typename... GridEntityParameters >
-      static void
-      processEntities(
-         const GridPointer& gridPointer,
-         const CoordinatesType& begin,
-         const CoordinatesType& end,
-         UserData& userData,
-         // FIXME: hack around nvcc bug (error: default argument not at end of parameter list)
-         //GridTraverserMode mode = synchronousMode,
-         GridTraverserMode mode,
-         // const int& stream = 0,
-         const int& stream,
-         // gridEntityParameters are passed to GridEntity's constructor
-         // (i.e. orientation and basis for faces and edges)
-         const GridEntityParameters&... gridEntityParameters );
-};
-
 } // namespace Meshes
 } // namespace TNL
 
 #include <TNL/Meshes/GridDetails/GridTraverser_1D.hpp>
 #include <TNL/Meshes/GridDetails/GridTraverser_2D.hpp>
 #include <TNL/Meshes/GridDetails/GridTraverser_3D.hpp>
-
diff --git a/src/TNL/Meshes/GridDetails/GridTraverser_1D.hpp b/src/TNL/Meshes/GridDetails/GridTraverser_1D.hpp
index 59989bb2a..533708538 100644
--- a/src/TNL/Meshes/GridDetails/GridTraverser_1D.hpp
+++ b/src/TNL/Meshes/GridDetails/GridTraverser_1D.hpp
@@ -14,7 +14,6 @@
 
 #pragma once
 
-#include <TNL/Devices/MIC.h>
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Pointers/SharedPointer.h>
 #include <TNL/CudaStreamPool.h>
@@ -255,69 +254,5 @@ processEntities(
 #endif
 }
 
-/****
- * 1D traverser, MIC
- */
-
-template< typename Real,
-          typename Index >
-   template<
-      typename GridEntity,
-      typename EntitiesProcessor,
-      typename UserData,
-      bool processOnlyBoundaryEntities >
-void
-GridTraverser< Meshes::Grid< 1, Real, Devices::MIC, Index > >::
-processEntities(
-   const GridPointer& gridPointer,
-   const CoordinatesType& begin,
-   const CoordinatesType& end,
-   UserData& userData,
-   GridTraverserMode mode,
-   const int& stream )
-{
-    throw Exceptions::NotImplementedError("Not Implemented yet Grid Traverser <1, Real, Device::MIC>");
-/*
-   auto& pool = CudaStreamPool::getInstance();
-   const cudaStream_t& s = pool.getStream( stream );
-
-   Devices::Cuda::synchronizeDevice();
-   if( processOnlyBoundaryEntities )
-   {
-      dim3 cudaBlockSize( 2 );
-      dim3 cudaBlocks( 1 );
-      GridBoundaryTraverser1D< Real, Index, GridEntity, UserData, EntitiesProcessor >
-            <<< cudaBlocks, cudaBlockSize, 0, s >>>
-            ( &gridPointer.template getData< Devices::Cuda >(),
-              userData,
-              begin,
-              end );
-   }
-   else
-   {
-      dim3 cudaBlockSize( 256 );
-      dim3 cudaBlocks;
-      cudaBlocks.x = Devices::Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x );
-      const IndexType cudaXGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.x );
-
-      for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
-         GridTraverser1D< Real, Index, GridEntity, UserData, EntitiesProcessor >
-            <<< cudaBlocks, cudaBlockSize, 0, s >>>
-            ( &gridPointer.template getData< Devices::Cuda >(),
-              userData,
-              begin,
-              end,
-              gridXIdx );
-   }
-
-   // only launches into the stream 0 are synchronized
-   if( stream == 0 )
-   {
-      cudaStreamSynchronize( s );
-      TNL_CHECK_CUDA_DEVICE;
-   }
-*/
-}
-
-   } // namespace Meshes
+} // namespace Meshes
 } // namespace TNL
diff --git a/src/TNL/Meshes/GridDetails/GridTraverser_2D.hpp b/src/TNL/Meshes/GridDetails/GridTraverser_2D.hpp
index 50b30c019..3efdb478f 100644
--- a/src/TNL/Meshes/GridDetails/GridTraverser_2D.hpp
+++ b/src/TNL/Meshes/GridDetails/GridTraverser_2D.hpp
@@ -10,7 +10,6 @@
 
 #pragma once
 
-#include <TNL/Devices/MIC.h>
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Pointers/SharedPointer.h>
 #include <TNL/CudaStreamPool.h>
@@ -553,104 +552,5 @@ processEntities(
 #endif
 }
 
-
-/****
- * 2D traverser, MIC
- */
-template< typename Real,
-          typename Index >
-   template<
-      typename GridEntity,
-      typename EntitiesProcessor,
-      typename UserData,
-      bool processOnlyBoundaryEntities,
-         int XOrthogonalBoundary,
-         int YOrthogonalBoundary,
-      typename... GridEntityParameters >
-void
-GridTraverser< Meshes::Grid< 2, Real, Devices::MIC, Index > >::
-processEntities(
-   const GridPointer& gridPointer,
-   const CoordinatesType& begin,
-   const CoordinatesType& end,
-   UserData& userData,
-   GridTraverserMode mode,
-   const int& stream,
-   const GridEntityParameters&... gridEntityParameters )
-{
-        
-    
-#ifdef HAVE_MIC   
-   Devices::MIC::synchronizeDevice();
-
-    //TOHLE JE PRUSER -- nemim poslat vypustku -- 
-    //GridEntity entity( gridPointer.template getData< Devices::MIC >(), begin, gridEntityParameters... );
-
-
-    Devices::MICHider<const GridType> hMicGrid;
-    hMicGrid.pointer=& gridPointer.template getData< Devices::MIC >();
-    Devices::MICHider<UserData> hMicUserData;
-    hMicUserData.pointer=& userDataPointer.template modifyData<Devices::MIC>();
-    TNLMICSTRUCT(begin, const CoordinatesType);
-    TNLMICSTRUCT(end, const CoordinatesType);
-
-    #pragma offload target(mic) in(sbegin,send,hMicUserData,hMicGrid)  
-    {
-        
-        #pragma omp parallel firstprivate( sbegin, send )
-        {     
-            TNLMICSTRUCTUSE(begin, const CoordinatesType);
-            TNLMICSTRUCTUSE(end, const CoordinatesType);    
-            GridEntity entity( *(hMicGrid.pointer), *(kernelbegin) );
-          
-            if( processOnlyBoundaryEntities )
-             {      
-               if( YOrthogonalBoundary )
-                  #pragma omp for
-                  for( auto k = kernelbegin->x();
-                       k <= kernelend->x();
-                       k ++ )
-                  {
-                     entity.getCoordinates().x() = k;
-                     entity.getCoordinates().y() = kernelbegin->y();
-                     entity.refresh();
-                     EntitiesProcessor::processEntity( entity.getMesh(), *(hMicUserData.pointer), entity );
-                     entity.getCoordinates().y() = kernelend->y();
-                     entity.refresh();
-                     EntitiesProcessor::processEntity( entity.getMesh(), *(hMicUserData.pointer), entity );
-                  }
-               if( XOrthogonalBoundary )
-                  #pragma omp for
-                  for( auto k = kernelbegin->y();
-                       k <= kernelend->y();
-                       k ++ )
-                  {
-                     entity.getCoordinates().y() = k;
-                     entity.getCoordinates().x() = kernelbegin->x();
-                     entity.refresh();
-                     EntitiesProcessor::processEntity( entity.getMesh(), *(hMicUserData.pointer), entity );
-                     entity.getCoordinates().x() = kernelend->x();
-                     entity.refresh();
-                     EntitiesProcessor::processEntity( entity.getMesh(), *(hMicUserData.pointer), entity );
-                  }
-             }
-            else
-            {
-                  #pragma omp for
-                  for( IndexType y = kernelbegin->y(); y <= kernelend->y(); y ++ )
-                     for( IndexType x = kernelbegin->x(); x <= kernelend->x(); x ++ )
-                     {
-                        // std::cerr << x << "   " <<y << std::endl;
-                        entity.getCoordinates().x() = x;
-                        entity.getCoordinates().y() = y;
-                        entity.refresh();
-                        EntitiesProcessor::processEntity( entity.getMesh(), *(hMicUserData.pointer), entity );
-                     }      
-             }
-        }
-    }
-      
-#endif
-}
-   } // namespace Meshes
+} // namespace Meshes
 } // namespace TNL
diff --git a/src/TNL/Meshes/GridDetails/GridTraverser_3D.hpp b/src/TNL/Meshes/GridDetails/GridTraverser_3D.hpp
index 5a3cd568f..24200c15d 100644
--- a/src/TNL/Meshes/GridDetails/GridTraverser_3D.hpp
+++ b/src/TNL/Meshes/GridDetails/GridTraverser_3D.hpp
@@ -10,7 +10,6 @@
 
 #pragma once
 
-#include <TNL/Devices/MIC.h>
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Pointers/SharedPointer.h>
 #include <TNL/CudaStreamPool.h>
@@ -488,68 +487,5 @@ processEntities(
 #endif
 }
 
-/****
- * 3D traverser, MIC
- */
-template< typename Real,
-          typename Index >
-   template<
-      typename GridEntity,
-      typename EntitiesProcessor,
-      typename UserData,
-      bool processOnlyBoundaryEntities,
-         int XOrthogonalBoundary,
-         int YOrthogonalBoundary,
-         int ZOrthogonalBoundary,
-      typename... GridEntityParameters >
-void
-GridTraverser< Meshes::Grid< 3, Real, Devices::MIC, Index > >::
-processEntities(
-   const GridPointer& gridPointer,
-   const CoordinatesType& begin,
-   const CoordinatesType& end,
-   UserData& userData,
-   GridTraverserMode mode,
-   const int& stream,
-   const GridEntityParameters&... gridEntityParameters )
-{
-    throw Exceptions::NotImplementedError("Not Implemented yet Grid Traverser <3, Real, Device::MIC>");
-    
-/* HAVE_CUDA   
-   dim3 cudaBlockSize( 8, 8, 8 );
-   dim3 cudaBlocks;
-   cudaBlocks.x = Devices::Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x );
-   cudaBlocks.y = Devices::Cuda::getNumberOfBlocks( end.y() - begin.y() + 1, cudaBlockSize.y );
-   cudaBlocks.z = Devices::Cuda::getNumberOfBlocks( end.z() - begin.z() + 1, cudaBlockSize.z );
-   const IndexType cudaXGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.x );
-   const IndexType cudaYGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.y );
-   const IndexType cudaZGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.z );
-
-   auto& pool = CudaStreamPool::getInstance();
-   const cudaStream_t& s = pool.getStream( stream );
-
-   Devices::Cuda::synchronizeDevice();
-   for( IndexType gridZIdx = 0; gridZIdx < cudaZGrids; gridZIdx ++ )
-      for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ )
-         for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
-            GridTraverser3D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
-               <<< cudaBlocks, cudaBlockSize, 0, s >>>
-               ( &gridPointer.template getData< Devices::Cuda >(),
-                 userData,
-                 begin,
-                 end,
-                 gridXIdx,
-                 gridYIdx,
-                 gridZIdx,
-                 gridEntityParameters... );
-
-   // only launches into the stream 0 are synchronized
-   if( stream == 0 )
-   {
-      cudaStreamSynchronize( s );
-      TNL_CHECK_CUDA_DEVICE;
-   }
- */
-}
-   } // namespace Meshes
+} // namespace Meshes
 } // namespace TNL
diff --git a/src/TNL/Object.h b/src/TNL/Object.h
index 24ced9a5c..356b91eda 100644
--- a/src/TNL/Object.h
+++ b/src/TNL/Object.h
@@ -128,9 +128,7 @@ class Object
        * Since it is not defined as \ref __cuda_callable__, objects inherited
        * from Object should not be created in CUDA kernels.
        */
-#ifndef HAVE_MIC
       virtual ~Object(){};
-#endif
 };
 
 /**
diff --git a/src/TNL/Pointers/DevicePointer.h b/src/TNL/Pointers/DevicePointer.h
index b0c0a934f..7c0982dca 100644
--- a/src/TNL/Pointers/DevicePointer.h
+++ b/src/TNL/Pointers/DevicePointer.h
@@ -15,7 +15,6 @@
 #include <TNL/Allocators/Default.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/Devices/MIC.h>
 #include <TNL/Pointers/SmartPointer.h>
 
 #include <cstring>  // std::memcpy, std::memcmp
@@ -470,288 +469,9 @@ class DevicePointer< Object, Devices::Cuda > : public SmartPointer
       Object* cuda_pointer;
 };
 
-/****
- * Specialization for MIC
- */
-
-#ifdef HAVE_MIC
-template< typename Object >
-class DevicePointer< Object, Devices::MIC > : public SmartPointer
-{
-   private:
-      // Convenient template alias for controlling the selection of copy- and
-      // move-constructors and assignment operators using SFINAE.
-      // The type Object_ is "enabled" iff Object_ and Object are not the same,
-      // but after removing const and volatile qualifiers they are the same.
-      template< typename Object_ >
-      using Enabler = std::enable_if< ! std::is_same< Object_, Object >::value &&
-                                      std::is_same< typename std::remove_cv< Object >::type, Object_ >::value >;
-
-      // friend class will be needed for templated assignment operators
-      template< typename Object_, typename Device_ >
-      friend class DevicePointer;
-
-   public:
-
-      typedef Object ObjectType;
-      typedef Devices::MIC DeviceType;
-
-      explicit  DevicePointer( ObjectType& obj )
-      : pointer( nullptr ),
-        pd( nullptr ),
-        mic_pointer( nullptr )
-      {
-         this->allocate( obj );
-      }
-
-      // this is needed only to avoid the default compiler-generated constructor
-      DevicePointer( const DevicePointer& pointer )
-      : pointer( pointer.pointer ),
-        pd( (PointerData*) pointer.pd ),
-        mic_pointer( pointer.mic_pointer )
-      {
-         this->pd->counter += 1;
-      }
-
-      // conditional constructor for non-const -> const data
-      template< typename Object_,
-                typename = typename Enabler< Object_ >::type >
-      DevicePointer( const DevicePointer< Object_, DeviceType >& pointer )
-      : pointer( pointer.pointer ),
-        pd( (PointerData*) pointer.pd ),
-        mic_pointer( pointer.mic_pointer )
-      {
-         this->pd->counter += 1;
-      }
-
-      // this is needed only to avoid the default compiler-generated constructor
-      DevicePointer( DevicePointer&& pointer )
-      : pointer( pointer.pointer ),
-        pd( (PointerData*) pointer.pd ),
-        mic_pointer( pointer.mic_pointer )
-      {
-         pointer.pointer = nullptr;
-         pointer.pd = nullptr;
-         pointer.mic_pointer = nullptr;
-      }
-
-      // conditional constructor for non-const -> const data
-      template< typename Object_,
-                typename = typename Enabler< Object_ >::type >
-      DevicePointer( DevicePointer< Object_, DeviceType >&& pointer )
-      : pointer( pointer.pointer ),
-        pd( (PointerData*) pointer.pd ),
-        mic_pointer( pointer.mic_pointer )
-      {
-         pointer.pointer = nullptr;
-         pointer.pd = nullptr;
-         pointer.mic_pointer = nullptr;
-      }
-
-      const Object* operator->() const
-      {
-         return this->pointer;
-      }
-
-      Object* operator->()
-      {
-         this->pd->maybe_modified = true;
-         return this->pointer;
-      }
-
-      const Object& operator *() const
-      {
-         return *( this->pointer );
-      }
-
-      Object& operator *()
-      {
-         this->pd->maybe_modified = true;
-         return *( this->pointer );
-      }
-
-      operator bool()
-      {
-         return this->pd;
-      }
-
-      template< typename Device = Devices::Host >
-      __cuda_callable__
-      const Object& getData() const
-      {
-         static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
-         TNL_ASSERT( this->pointer, );
-         TNL_ASSERT( this->pd, );
-         TNL_ASSERT( this->mic_pointer, );
-         if( std::is_same< Device, Devices::Host >::value )
-            return *( this->pointer );
-         if( std::is_same< Device, Devices::MIC >::value )
-            return *( this->mic_pointer );
-      }
-
-      template< typename Device = Devices::Host >
-      __cuda_callable__
-      Object& modifyData()
-      {
-         static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
-         TNL_ASSERT( this->pointer, );
-         TNL_ASSERT( this->pd, );
-         TNL_ASSERT( this->mic_pointer, );
-         if( std::is_same< Device, Devices::Host >::value )
-         {
-            this->pd->maybe_modified = true;
-            return *( this->pointer );
-         }
-         if( std::is_same< Device, Devices::MIC >::value )
-            return *( this->mic_pointer );
-      }
-
-      // this is needed only to avoid the default compiler-generated operator
-      const DevicePointer& operator=( const DevicePointer& ptr )
-      {
-         this->free();
-         this->pointer = ptr.pointer;
-         this->pd = (PointerData*) ptr.pd;
-         this->mic_pointer = ptr.mic_pointer;
-         this->pd->counter += 1;
-         return *this;
-      }
-
-      // conditional operator for non-const -> const data
-      template< typename Object_,
-                typename = typename Enabler< Object_ >::type >
-      const DevicePointer& operator=( const DevicePointer< Object_, DeviceType >& ptr )
-      {
-         this->free();
-         this->pointer = ptr.pointer;
-         this->pd = (PointerData*) ptr.pd;
-         this->mic_pointer = ptr.mic_pointer;
-         this->pd->counter += 1;
-         return *this;
-      }
-
-      // this is needed only to avoid the default compiler-generated operator
-      const DevicePointer& operator=( DevicePointer&& ptr )
-      {
-         this->free();
-         this->pointer = ptr.pointer;
-         this->pd = (PointerData*) ptr.pd;
-         this->mic_pointer = ptr.mic_pointer;
-         ptr.pointer = nullptr;
-         ptr.pd = nullptr;
-         ptr.mic_pointer = nullptr;
-         return *this;
-      }
-
-      // conditional operator for non-const -> const data
-      template< typename Object_,
-                typename = typename Enabler< Object_ >::type >
-      const DevicePointer& operator=( DevicePointer< Object_, DeviceType >&& ptr )
-      {
-         this->free();
-         this->pointer = ptr.pointer;
-         this->pd = (PointerData*) ptr.pd;
-         this->mic_pointer = ptr.mic_pointer;
-         ptr.pointer = nullptr;
-         ptr.pd = nullptr;
-         ptr.mic_pointer = nullptr;
-         return *this;
-      }
-
-      bool synchronize()
-      {
-         if( ! this->pd )
-            return true;
-         if( this->modified() )
-         {
-            TNL_ASSERT( this->pointer, );
-            TNL_ASSERT( this->mic_pointer, );
-            Devices::MIC::CopyToMIC((void*) this->mic_pointer, (void*) this->pointer, sizeof( ObjectType ));
-            this->set_last_sync_state();
-            return true;
-         }
-         return true;
-
-      }
-
-      ~DevicePointer()
-      {
-         this->free();
-         Devices::MIC::removeSmartPointer( this );
-      }
-
-   protected:
-
-      struct PointerData
-      {
-         char data_image[ sizeof(Object) ];
-         int counter = 1;
-         bool maybe_modified = false;
-      };
-
-      bool allocate( ObjectType& obj )
-      {
-         this->pointer = &obj;
-         this->pd = new PointerData();
-         if( ! this->pd )
-            return false;
-         // pass to device
-         this->mic_pointer = Allocators:::MIC< ObjectType >().allocate(1);
-         if( ! this->mic_pointer )
-            return false;
-         Devices::MIC::CopyToMIC((void*)this->mic_pointer,(void*)this->pointer,sizeof(ObjectType));
-
-         // set last-sync state
-         this->set_last_sync_state();
-         Devices::MIC::insertSmartPointer( this );
-         return true;
-      }
-
-      void set_last_sync_state()
-      {
-         TNL_ASSERT( this->pointer, );
-         TNL_ASSERT( this->pd, );
-         std::memcpy( (void*) &this->pd->data_image, (void*) this->pointer, sizeof( Object ) );
-         this->pd->maybe_modified = false;
-      }
-
-      bool modified()
-      {
-         TNL_ASSERT( this->pointer, );
-         TNL_ASSERT( this->pd, );
-         // optimization: skip bitwise comparison if we're sure that the data is the same
-         if( ! this->pd->maybe_modified )
-            return false;
-         return std::memcmp( (void*) &this->pd->data_image, (void*) this->pointer, sizeof( Object ) ) != 0;
-      }
-
-      void free()
-      {
-         if( this->pd )
-         {
-            if( ! --this->pd->counter )
-            {
-               delete this->pd;
-               this->pd = nullptr;
-               if( this->mic_pointer )
-                  Allocators:::MIC< ObjectType >().deallocate(this->mic_pointer, 1);
-            }
-         }
-      }
-
-      Object* pointer;
-
-      PointerData* pd;
-
-      // mic_pointer can't be part of PointerData structure, since we would be
-      // unable to dereference this-pd on the device
-      Object* mic_pointer;
-};
-#endif
-
 } // namespace Pointers
 
-#if (!defined(NDEBUG)) && (!defined(HAVE_MIC))
+#ifndef NDEBUG
 namespace Assert {
 
 template< typename Object, typename Device >
diff --git a/src/TNL/Pointers/SharedPointer.h b/src/TNL/Pointers/SharedPointer.h
index e6908e479..51aff2a78 100644
--- a/src/TNL/Pointers/SharedPointer.h
+++ b/src/TNL/Pointers/SharedPointer.h
@@ -49,7 +49,7 @@ class SharedPointer
 
 } // namespace Pointers
 
-#if (!defined(NDEBUG)) && (!defined(HAVE_MIC))
+#ifndef NDEBUG
 namespace Assert {
 
 template< typename Object, typename Device >
@@ -72,4 +72,3 @@ struct Formatter< Pointers::SharedPointer< Object, Device > >
 
 #include <TNL/Pointers/SharedPointerHost.h>
 #include <TNL/Pointers/SharedPointerCuda.h>
-#include <TNL/Pointers/SharedPointerMic.h>
diff --git a/src/TNL/Pointers/SharedPointerMic.h b/src/TNL/Pointers/SharedPointerMic.h
deleted file mode 100644
index 0c2958b4a..000000000
--- a/src/TNL/Pointers/SharedPointerMic.h
+++ /dev/null
@@ -1,373 +0,0 @@
-/***************************************************************************
-                          SharedPointerMic.h  -  description
-                             -------------------
-    begin                : Aug 22, 2018
-    copyright            : (C) 2018 by Tomas Oberhuber et al.
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-// Implemented by: Tomas Oberhuber, Jakub Klinkovsky
-
-#pragma once
-
-#include "SharedPointer.h"
-
-#include <TNL/Allocators/Default.h>
-#include <TNL/Devices/MIC.h>
-#include <TNL/Pointers/SmartPointer.h>
-
-#include <cstring>   // std::memcpy, std::memcmp
-#include <cstddef>   // std::nullptr_t
-#include <algorithm> // swap
-
-namespace TNL {
-namespace Pointers {
-
-#ifdef HAVE_MIC
-template< typename Object>
-class SharedPointer< Object, Devices::MIC > : public SmartPointer
-{
-   private:
-      // Convenient template alias for controlling the selection of copy- and
-      // move-constructors and assignment operators using SFINAE.
-      // The type Object_ is "enabled" iff Object_ and Object are not the same,
-      // but after removing const and volatile qualifiers they are the same.
-      template< typename Object_ >
-      using Enabler = std::enable_if< ! std::is_same< Object_, Object >::value &&
-                                      std::is_same< typename std::remove_cv< Object >::type, Object_ >::value >;
-
-      // friend class will be needed for templated assignment operators
-      template< typename Object_, typename Device_>
-      friend class SharedPointer;
-
-   public:
-
-      using ObjectType = Object;
-      using DeviceType = Devices::MIC; 
-
-      SharedPointer( std::nullptr_t )
-      : pd( nullptr ),
-        mic_pointer( nullptr )
-      {}
-
-      template< typename... Args >
-      explicit  SharedPointer( Args... args )
-      : pd( nullptr ),
-        mic_pointer( nullptr )
-      {
-            this->allocate( args... );
-      }
-
-      // this is needed only to avoid the default compiler-generated constructor
-      SharedPointer( const SharedPointer& pointer )
-      : pd( (PointerData*) pointer.pd ),
-        mic_pointer( pointer.mic_pointer )
-      {
-         this->pd->counter += 1;
-      }
-
-      // conditional constructor for non-const -> const data
-      template< typename Object_,
-                typename = typename Enabler< Object_ >::type >
-      SharedPointer( const SharedPointer< Object_, DeviceType >& pointer )
-      : pd( (PointerData*) pointer.pd ),
-        mic_pointer( pointer.mic_pointer )
-      {
-         this->pd->counter += 1;
-      }
-
-      // this is needed only to avoid the default compiler-generated constructor
-      SharedPointer( SharedPointer&& pointer )
-      : pd( (PointerData*) pointer.pd ),
-        mic_pointer( pointer.mic_pointer )
-      {
-         pointer.pd = nullptr;
-         pointer.mic_pointer = nullptr;
-      }
-
-      // conditional constructor for non-const -> const data
-      template< typename Object_,
-                typename = typename Enabler< Object_ >::type >
-      SharedPointer( SharedPointer< Object_, DeviceType >&& pointer )
-      : pd( (PointerData*) pointer.pd ),
-        mic_pointer( pointer.mic_pointer )
-      {
-         pointer.pd = nullptr;
-         pointer.mic_pointer = nullptr;
-      }
-
-      template< typename... Args >
-      bool recreate( Args... args )
-      {
-#ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Recreating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl;
-#endif
-         if( ! this->pd )
-            return this->allocate( args... );
-
-         if( this->pd->counter == 1 )
-         {
-            /****
-             * The object is not shared -> recreate it in-place, without reallocation
-             */
-            this->pd->data.~Object();
-            new ( &this->pd->data ) Object( args... );
-            Devices::MIC::CopyToMIC(this->mic_pointer,(void*) &this->pd->data,sizeof(Object));
-            this->set_last_sync_state();
-            return true;
-         }
-
-         // free will just decrement the counter
-         this->free();
-
-         return this->allocate( args... );
-      }
-
-      const Object* operator->() const
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         return &this->pd->data;
-      }
-
-      Object* operator->()
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         this->pd->maybe_modified = true;
-         return &this->pd->data;
-      }
-
-      const Object& operator *() const
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         return this->pd->data;
-      }
-
-      Object& operator *()
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         this->pd->maybe_modified = true;
-         return this->pd->data;
-      }
-
-      operator bool()
-      {
-         return this->pd;
-      }
-
-      template< typename Device = Devices::Host >
-      __cuda_callable__
-      const Object& getData() const
-      {
-         static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         TNL_ASSERT_TRUE( this->mic_pointer, "Attempt to dereference a null pointer" );
-         if( std::is_same< Device, Devices::Host >::value )
-            return this->pd->data;
-         if( std::is_same< Device, Devices::MIC >::value )
-            return *( this->mic_pointer );
-
-      }
-
-      template< typename Device = Devices::Host >
-      __cuda_callable__
-      Object& modifyData()
-      {
-         static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         TNL_ASSERT_TRUE( this->mic_pointer, "Attempt to dereference a null pointer" );
-         if( std::is_same< Device, Devices::Host >::value )
-         {
-            this->pd->maybe_modified = true;
-            return this->pd->data;
-         }
-         if( std::is_same< Device, Devices::MIC >::value )
-            return *( this->mic_pointer );
-
-      }
-
-      // this is needed only to avoid the default compiler-generated operator
-      const SharedPointer& operator=( const SharedPointer& ptr )
-      {
-         this->free();
-         this->pd = (PointerData*) ptr.pd;
-         this->mic_pointer = ptr.mic_pointer;
-         if( this->pd != nullptr )
-            this->pd->counter += 1;
-#ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
-#endif
-         return *this;
-      }
-
-      // conditional operator for non-const -> const data
-      template< typename Object_,
-                typename = typename Enabler< Object_ >::type >
-      const SharedPointer& operator=( const SharedPointer< Object_, DeviceType >& ptr )
-      {
-         this->free();
-         this->pd = (PointerData*) ptr.pd;
-         this->mic_pointer = ptr.mic_pointer;
-         if( this->pd != nullptr )
-            this->pd->counter += 1;
-#ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
-#endif
-         return *this;
-      }
-
-      // this is needed only to avoid the default compiler-generated operator
-      const SharedPointer& operator=( SharedPointer&& ptr )
-      {
-         this->free();
-         this->pd = (PointerData*) ptr.pd;
-         this->mic_pointer = ptr.mic_pointer;
-         ptr.pd = nullptr;
-         ptr.mic_pointer = nullptr;
-#ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
-#endif
-         return *this;
-      }
-
-      // conditional operator for non-const -> const data
-      template< typename Object_,
-                typename = typename Enabler< Object_ >::type >
-      const SharedPointer& operator=( SharedPointer< Object_, DeviceType >&& ptr )
-      {
-         this->free();
-         this->pd = (PointerData*) ptr.pd;
-         this->mic_pointer = ptr.mic_pointer;
-         ptr.pd = nullptr;
-         ptr.mic_pointer = nullptr;
-#ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
-#endif
-         return *this;
-      }
-
-      bool synchronize()
-      {
-         if( ! this->pd )
-            return true;
-
-         if( this->modified() )
-         {
-#ifdef TNL_DEBUG_SHARED_POINTERS
-            std::cerr << "Synchronizing shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(Object).name()) << std::endl;
-            std::cerr << "   ( " << sizeof( Object ) << " bytes, MIC adress " << this->mic_pointer << " )" << std::endl;
-#endif
-            TNL_ASSERT( this->mic_pointer, );
-
-            Devices::MIC::CopyToMIC((void*)this->mic_pointer,(void*) &this->pd->data,sizeof(Object));
-            this->set_last_sync_state();
-            return true;
-         }
-         return false; //??
-      }
-
-      void clear()
-      {
-         this->free();
-      }
-
-      void swap( SharedPointer& ptr2 )
-      {
-         std::swap( this->pd, ptr2.pd );
-         std::swap( this->mic_pointer, ptr2.mic_pointer );
-      }
-
-      ~SharedPointer()
-      {
-         this->free();
-         Devices::MIC::removeSmartPointer( this );
-      }
-
-   protected:
-
-      struct PointerData
-      {
-         Object data;
-         uint8_t data_image[ sizeof(Object) ];
-         int counter;
-         bool maybe_modified;
-
-         template< typename... Args >
-         explicit PointerData( Args... args )
-         : data( args... ),
-           counter( 1 ),
-           maybe_modified( false )
-         {}
-      };
-
-      template< typename... Args >
-      bool allocate( Args... args )
-      {
-         this->pd = new PointerData( args... );
-         if( ! this->pd )
-            return false;
-
-         mic_pointer = Allocators::MIC< Object >().allocate(1);
-         Devices::MIC::CopyToMIC((void*)this->mic_pointer,(void*) &this->pd->data,sizeof(Object));
-
-         if( ! this->mic_pointer )
-            return false;
-         // set last-sync state
-         this->set_last_sync_state();
-#ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Created shared pointer to " << demangle(typeid(ObjectType).name()) << " (mic_pointer = " << this->mic_pointer << ")" << std::endl;
-#endif
-         Devices::MIC::insertSmartPointer( this );
-         return true;
-      }
-
-      void set_last_sync_state()
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         std::memcpy( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( Object ) );
-         this->pd->maybe_modified = false;
-      }
-
-      bool modified()
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         // optimization: skip bitwise comparison if we're sure that the data is the same
-         if( ! this->pd->maybe_modified )
-            return false;
-         return std::memcmp( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( Object ) ) != 0;
-      }
-
-      void free()
-      {
-         if( this->pd )
-         {
-#ifdef TNL_DEBUG_SHARED_POINTERS
-            std::cerr << "Freeing shared pointer: counter = " << this->pd->counter << ", mic_pointer = " << this->mic_pointer << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
-#endif
-            if( ! --this->pd->counter )
-            {
-               delete this->pd;
-               this->pd = nullptr;
-               if( this->mic_pointer )
-               {
-                  Allocators:::MIC< ObjectType >().deallocate(mic_pointer, 1);
-                  mic_pointer=nullptr;
-               }
-#ifdef TNL_DEBUG_SHARED_POINTERS
-               std::cerr << "...deleted data." << std::endl;
-#endif
-            }
-         }
-      }
-
-      PointerData* pd;
-
-      // cuda_pointer can't be part of PointerData structure, since we would be
-      // unable to dereference this-pd on the device -- Nevím zda to platí pro MIC, asi jo
-      Object* mic_pointer;
-};
-#endif
-
-} // namespace Pointers
-} // namespace TNL
diff --git a/src/TNL/Pointers/UniquePointer.h b/src/TNL/Pointers/UniquePointer.h
index cfb7b543f..e85e18d18 100644
--- a/src/TNL/Pointers/UniquePointer.h
+++ b/src/TNL/Pointers/UniquePointer.h
@@ -15,7 +15,6 @@
 #include <TNL/Allocators/Default.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/Devices/MIC.h>
 #include <TNL/Pointers/SmartPointer.h>
 
 #include <cstring>  // std::memcpy, std::memcmp
@@ -311,187 +310,9 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer
       Object* cuda_pointer;
 };
 
-#ifdef HAVE_MIC
-template< typename Object >
-class UniquePointer< Object, Devices::MIC > : public SmartPointer
-{
-   public:
-
-      typedef Object ObjectType;
-      typedef Devices::MIC DeviceType;
-
-      UniquePointer( std::nullptr_t )
-      : pd( nullptr ),
-        mic_pointer( nullptr )
-      {}
-
-      template< typename... Args >
-      explicit  UniquePointer( const Args... args )
-      : pd( nullptr ),
-        mic_pointer( nullptr )
-      {
-         this->allocate( args... );
-      }
-
-      const Object* operator->() const
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         return &this->pd->data;
-      }
-
-      Object* operator->()
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         this->pd->maybe_modified = true;
-         return &this->pd->data;
-      }
-
-      const Object& operator *() const
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         return this->pd->data;
-      }
-
-      Object& operator *()
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         this->pd->maybe_modified = true;
-         return this->pd->data;
-      }
-
-      operator bool()
-      {
-         return this->pd;
-      }
-
-      template< typename Device = Devices::Host >
-      const Object& getData() const
-      {
-         static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         TNL_ASSERT_TRUE( this->mic_pointer, "Attempt to dereference a null pointer" );
-         if( std::is_same< Device, Devices::Host >::value )
-            return this->pd->data;
-         if( std::is_same< Device, Devices::MIC >::value )
-            return *( this->mic_pointer );
-      }
-
-      template< typename Device = Devices::Host >
-      Object& modifyData()
-      {
-         static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         TNL_ASSERT_TRUE( this->mic_pointer, "Attempt to dereference a null pointer" );
-         if( std::is_same< Device, Devices::Host >::value )
-         {
-            this->pd->maybe_modified = true;
-            return this->pd->data;
-         }
-         if( std::is_same< Device, Devices::MIC >::value )
-            return *( this->mic_pointer );
-      }
-
-      const UniquePointer& operator=( UniquePointer& ptr )
-      {
-         this->free();
-         this->pd = ptr.pd;
-         this->mic_pointer = ptr.mic_pointer;
-         ptr.pd = nullptr;
-         ptr.mic_pointer = nullptr;
-         return *this;
-      }
-
-      const UniquePointer& operator=( UniquePointer&& ptr )
-      {
-         return this->operator=( ptr );
-      }
-
-      bool synchronize()
-      {
-         if( ! this->pd )
-            return true;
-         if( this->modified() )
-         {
-            Devices::MIC::CopyToMIC(this->mic_pointer,(void*) &this->pd->data,sizeof(Object));
-            this->set_last_sync_state();
-            return true;
-         }
-         return true;//??
-      }
-
-      ~UniquePointer()
-      {
-         this->free();
-         Devices::MIC::removeSmartPointer( this );
-      }
-
-   protected:
-
-      struct PointerData
-      {
-         Object data;
-         char data_image[ sizeof(Object) ];
-         bool maybe_modified;
-
-         template< typename... Args >
-         explicit PointerData( Args... args )
-         : data( args... ),
-           maybe_modified( false )
-         {}
-      };
-
-      template< typename... Args >
-      bool allocate( Args... args )
-      {
-         this->pd = new PointerData( args... );
-         if( ! this->pd )
-            return false;
-         // pass to device
-         this->mic_pointer = Allocators::MIC< Object >().allocate(1);
-         if( ! this->mic_pointer )
-            return false;
-         Devices::MIC::CopyToMIC((void*)mic_pointer,(void*)&this->pd->data,sizeof(Object));
-         // set last-sync state
-         this->set_last_sync_state();
-         Devices::MIC::insertSmartPointer( this );
-         return true;
-      }
-
-      void set_last_sync_state()
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         std::memcpy( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( ObjectType ) );
-         this->pd->maybe_modified = false;
-      }
-
-      bool modified()
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         // optimization: skip bitwise comparison if we're sure that the data is the same
-         if( ! this->pd->maybe_modified )
-            return false;
-         return std::memcmp( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( ObjectType ) ) != 0;
-      }
-
-      void free()
-      {
-         if( this->pd )
-            delete this->pd;
-         if( this->mic_pointer )
-            Allocators:::MIC< ObjectType >().deallocate(mic_pointer, 1);
-      }
-
-      PointerData* pd;
-
-      // mic_pointer can't be part of PointerData structure, since we would be
-      // unable to dereference this-pd on the device
-      Object* mic_pointer;
-};
-#endif
-
 } // namespace Pointers
 
-#if (!defined(NDEBUG)) && (!defined(HAVE_MIC))
+#ifndef NDEBUG
 namespace Assert {
 
 template< typename Object, typename Device >
diff --git a/src/TNL/Solvers/BuildConfigTags.h b/src/TNL/Solvers/BuildConfigTags.h
index 19bb42129..bcd4cdafc 100644
--- a/src/TNL/Solvers/BuildConfigTags.h
+++ b/src/TNL/Solvers/BuildConfigTags.h
@@ -27,10 +27,6 @@ template< typename ConfigTag, typename Device > struct ConfigTagDevice{ enum { e
 template< typename ConfigTag > struct ConfigTagDevice< ConfigTag, Devices::Cuda >{ enum { enabled = false }; };
 #endif
 
-#ifndef HAVE_MIC
-template< typename ConfigTag > struct ConfigTagDevice< ConfigTag, Devices::MIC >{ enum { enabled = false }; };
-#endif
-
 /****
  * All real types are enabled by default.
  */
diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
index 1fc2fa3fa..97bc854ce 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
@@ -199,29 +199,6 @@ public:
    }
 };
 
-template< typename Matrix, typename Real, typename Index >
-class ILU0_impl< Matrix, Real, Devices::MIC, Index >
-: public Preconditioner< Matrix >
-{
-public:
-   using RealType = Real;
-   using DeviceType = Devices::MIC;
-   using IndexType = Index;
-   using typename Preconditioner< Matrix >::VectorViewType;
-   using typename Preconditioner< Matrix >::ConstVectorViewType;
-   using typename Preconditioner< Matrix >::MatrixPointer;
-
-   virtual void update( const MatrixPointer& matrixPointer ) override
-   {
-      throw Exceptions::NotImplementedError("Not Iplemented yet for MIC");
-   }
-
-   virtual void solve( ConstVectorViewType b, VectorViewType x ) const override
-   {
-      throw Exceptions::NotImplementedError("Not Iplemented yet for MIC");
-   }
-};
-
 } // namespace Preconditioners
 } // namespace Linear
 } // namespace Solvers
diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILUT.h b/src/TNL/Solvers/Linear/Preconditioners/ILUT.h
index 8f4c27d7a..fa7c814fc 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/ILUT.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/ILUT.h
@@ -111,29 +111,6 @@ public:
    }
 };
 
-template< typename Matrix, typename Real, typename Index >
-class ILUT_impl< Matrix, Real, Devices::MIC, Index >
-: public Preconditioner< Matrix >
-{
-public:
-   using RealType = Real;
-   using DeviceType = Devices::MIC;
-   using IndexType = Index;
-   using typename Preconditioner< Matrix >::VectorViewType;
-   using typename Preconditioner< Matrix >::ConstVectorViewType;
-   using typename Preconditioner< Matrix >::MatrixPointer;
-
-   virtual void update( const MatrixPointer& matrixPointer ) override
-   {
-      throw std::runtime_error("Not Iplemented yet for MIC");
-   }
-
-   virtual void solve( ConstVectorViewType b, VectorViewType x ) const override
-   {
-      throw std::runtime_error("Not Iplemented yet for MIC");
-   }
-};
-
 } // namespace Preconditioners
 } // namespace Linear
 } // namespace Solvers
diff --git a/src/TNL/Solvers/ODE/Euler.h b/src/TNL/Solvers/ODE/Euler.h
index 2ba128073..508d77b6a 100644
--- a/src/TNL/Solvers/ODE/Euler.h
+++ b/src/TNL/Solvers/ODE/Euler.h
@@ -10,12 +10,10 @@
 
 #pragma once
 
-#include <math.h>
 #include <TNL/Config/ConfigDescription.h>
 #include <TNL/Solvers/ODE/ExplicitSolver.h>
 #include <TNL/Solvers/DummyProblem.h>
 #include <TNL/Config/ParameterContainer.h>
-#include <TNL/Timer.h>
 
 namespace TNL {
 namespace Solvers {
diff --git a/src/TNL/Solvers/ODE/Euler.hpp b/src/TNL/Solvers/ODE/Euler.hpp
index 12da6439b..1cf5001ae 100644
--- a/src/TNL/Solvers/ODE/Euler.hpp
+++ b/src/TNL/Solvers/ODE/Euler.hpp
@@ -10,9 +10,7 @@
 
 #pragma once
 
-#include <TNL/Devices/MIC.h>
-#include <TNL/Communicators/MpiCommunicator.h>
-#include <TNL/Communicators/NoDistrCommunicator.h>
+#include <TNL/Solvers/ODE/Euler.h>
 
 namespace TNL {
 namespace Solvers {
@@ -77,7 +75,6 @@ bool Euler< Problem, SolverMonitor > :: solve( DofVectorPointer& _u )
    /****
     * First setup the supporting meshes k1...k5 and k_tmp.
     */
-   //timer.start();
    _k1->setLike( *_u );
    auto k1 = _k1->getView();
    auto u = _u->getView();
@@ -104,9 +101,7 @@ bool Euler< Problem, SolverMonitor > :: solve( DofVectorPointer& _u )
       /****
        * Compute the RHS
        */
-      //timer.stop();
       this->problem->getExplicitUpdate( time, currentTau, _u, _k1 );
-      //timer.start();
 
       RealType lastResidue = this->getResidue();
       RealType maxResidue( 0.0 );
diff --git a/src/TNL/Solvers/SolverConfig_impl.h b/src/TNL/Solvers/SolverConfig_impl.h
index 701c5eb73..e5673d5c1 100644
--- a/src/TNL/Solvers/SolverConfig_impl.h
+++ b/src/TNL/Solvers/SolverConfig_impl.h
@@ -67,12 +67,6 @@ bool SolverConfig< ConfigTag, ProblemConfig >::configSetup( Config::ConfigDescri
    if( ConfigTagDevice< ConfigTag, Devices::Cuda >::enabled )
       config.addEntryEnum( "cuda" );
 #endif
-   
-#ifdef HAVE_MIC
-   if( ConfigTagDevice< ConfigTag, Devices::MIC >::enabled )
-      config.addEntryEnum( "mic" );
-#endif
-   
 
    /****
     * Setup index type.
diff --git a/src/TNL/Solvers/SolverInitiator_impl.h b/src/TNL/Solvers/SolverInitiator_impl.h
index c6bc5ca7f..e54a8fe30 100644
--- a/src/TNL/Solvers/SolverInitiator_impl.h
+++ b/src/TNL/Solvers/SolverInitiator_impl.h
@@ -12,7 +12,6 @@
 
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/Devices/MIC.h>
 #include <TNL/Config/ParameterContainer.h>
 #include <TNL/Meshes/TypeResolver/TypeResolver.h>
 #include <TNL/Solvers/BuildConfigTags.h>
@@ -92,8 +91,6 @@ class SolverInitiatorRealResolver< ProblemSetter, Real, ConfigTag, true >
             return SolverInitiatorDeviceResolver< ProblemSetter, Real, Devices::Host, ConfigTag >::run( parameters );
          if( device == "cuda" )
             return SolverInitiatorDeviceResolver< ProblemSetter, Real, Devices::Cuda, ConfigTag >::run( parameters );
-         if(device == "mic")
-             return SolverInitiatorDeviceResolver< ProblemSetter, Real, Devices::MIC, ConfigTag >::run( parameters );
          std::cerr << "The device '" << device << "' is not defined. " << std::endl;
          return false;
       }
diff --git a/src/UnitTests/Containers/ArrayTest.h b/src/UnitTests/Containers/ArrayTest.h
index a18471a4a..7151ed441 100644
--- a/src/UnitTests/Containers/ArrayTest.h
+++ b/src/UnitTests/Containers/ArrayTest.h
@@ -101,27 +101,6 @@ using ArrayTypes = ::testing::Types<
    Array< float,  Devices::Cuda, long >,
    Array< double, Devices::Cuda, long >,
    Array< MyData, Devices::Cuda, long >
-#endif
-#ifdef HAVE_MIC
-   ,
-   Array< int,    Devices::MIC, short >,
-   Array< long,   Devices::MIC, short >,
-   Array< float,  Devices::MIC, short >,
-   Array< double, Devices::MIC, short >,
-   // TODO: MyData does not work on MIC
-//   Array< MyData, Devices::MIC, short >,
-   Array< int,    Devices::MIC, int >,
-   Array< long,   Devices::MIC, int >,
-   Array< float,  Devices::MIC, int >,
-   Array< double, Devices::MIC, int >,
-   // TODO: MyData does not work on MIC
-//   Array< MyData, Devices::MIC, int >,
-   Array< int,    Devices::MIC, long >,
-   Array< long,   Devices::MIC, long >,
-   Array< float,  Devices::MIC, long >,
-   Array< double, Devices::MIC, long >
-   // TODO: MyData does not work on MIC
-//   Array< MyData, Devices::MIC, long >
 #endif
 
    // all array tests should also work with Vector
@@ -136,11 +115,6 @@ using ArrayTypes = ::testing::Types<
    Vector< float,  Devices::Cuda, long >,
    Vector< double, Devices::Cuda, long >
 #endif
-#ifdef HAVE_MIC
-   ,
-   Vector< float,  Devices::MIC, long >,
-   Vector< double, Devices::MIC, long >
-#endif
 >;
 
 TYPED_TEST_SUITE( ArrayTest, ArrayTypes );
@@ -353,14 +327,6 @@ void testArrayElementwiseAccess( Array< Value, Devices::Cuda, Index >&& u )
 #endif
 }
 
-template< typename Value, typename Index >
-void testArrayElementwiseAccess( Array< Value, Devices::MIC, Index >&& u )
-{
-#ifdef HAVE_MIC
-   // TODO
-#endif
-}
-
 TYPED_TEST( ArrayTest, elementwiseAccess )
 {
    using ArrayType = typename TestFixture::ArrayType;
diff --git a/src/UnitTests/Containers/ArrayViewTest.h b/src/UnitTests/Containers/ArrayViewTest.h
index 35344eecf..7f1fb6941 100644
--- a/src/UnitTests/Containers/ArrayViewTest.h
+++ b/src/UnitTests/Containers/ArrayViewTest.h
@@ -98,27 +98,6 @@ using ViewTypes = ::testing::Types<
    ArrayView< float,  Devices::Cuda, long >,
    ArrayView< double, Devices::Cuda, long >,
    ArrayView< MyData, Devices::Cuda, long >
-#endif
-#ifdef HAVE_MIC
-   ,
-   ArrayView< int,    Devices::MIC, short >,
-   ArrayView< long,   Devices::MIC, short >,
-   ArrayView< float,  Devices::MIC, short >,
-   ArrayView< double, Devices::MIC, short >,
-   // TODO: MyData does not work on MIC
-//   ArrayView< MyData, Devices::MIC, short >,
-   ArrayView< int,    Devices::MIC, int >,
-   ArrayView< long,   Devices::MIC, int >,
-   ArrayView< float,  Devices::MIC, int >,
-   ArrayView< double, Devices::MIC, int >,
-   // TODO: MyData does not work on MIC
-//   ArrayView< MyData, Devices::MIC, int >,
-   ArrayView< int,    Devices::MIC, long >,
-   ArrayView< long,   Devices::MIC, long >,
-   ArrayView< float,  Devices::MIC, long >,
-   ArrayView< double, Devices::MIC, long >,
-   // TODO: MyData does not work on MIC
-//   ArrayView< MyData, Devices::MIC, long >,
 #endif
 
    // all ArrayView tests should also work with VectorView
@@ -133,11 +112,6 @@ using ViewTypes = ::testing::Types<
    VectorView< float,  Devices::Cuda, long >,
    VectorView< double, Devices::Cuda, long >
 #endif
-#ifdef HAVE_MIC
-   ,
-   VectorView< float,  Devices::MIC, long >,
-   VectorView< double, Devices::MIC, long >
-#endif
 >;
 
 TYPED_TEST_SUITE( ArrayViewTest, ViewTypes );
@@ -289,14 +263,6 @@ void testArrayViewElementwiseAccess( Array< Value, Devices::Cuda, Index >&& u )
 #endif
 }
 
-template< typename Value, typename Index >
-void testArrayViewElementwiseAccess( Array< Value, Devices::MIC, Index >&& u )
-{
-#ifdef HAVE_MIC
-   // TODO
-#endif
-}
-
 TYPED_TEST( ArrayViewTest, elementwiseAccess )
 {
    using ArrayType = typename TestFixture::ArrayType;
diff --git a/src/UnitTests/Containers/VectorTestSetup.h b/src/UnitTests/Containers/VectorTestSetup.h
index 5c342dced..c8ec42bea 100644
--- a/src/UnitTests/Containers/VectorTestSetup.h
+++ b/src/UnitTests/Containers/VectorTestSetup.h
@@ -76,21 +76,6 @@ using VectorTypes = ::testing::Types<
    //Vector< Quad< float >,  Devices::Cuda, long >,
    //Vector< Quad< double >, Devices::Cuda, long >
 #endif
-#ifdef HAVE_MIC
-   ,
-   Vector< int,    Devices::MIC, short >,
-   Vector< long,   Devices::MIC, short >,
-   Vector< float,  Devices::MIC, short >,
-   Vector< double, Devices::MIC, short >,
-   Vector< int,    Devices::MIC, int >,
-   Vector< long,   Devices::MIC, int >,
-   Vector< float,  Devices::MIC, int >,
-   Vector< double, Devices::MIC, int >,
-   Vector< int,    Devices::MIC, long >,
-   Vector< long,   Devices::MIC, long >,
-   Vector< float,  Devices::MIC, long >,
-   Vector< double, Devices::MIC, long >
-#endif
 >;
 
 TYPED_TEST_SUITE( VectorTest, VectorTypes );
-- 
GitLab


From 4675fbdfa68232528576250cbf10d20a8337a6ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Mon, 19 Aug 2019 12:53:56 +0200
Subject: [PATCH 02/35] Devices: replaced getDeviceType() with getType()

---
 src/TNL/Atomic.h                                          | 4 ++--
 src/TNL/Containers/Algorithms/ArrayIO.h                   | 4 ++--
 src/TNL/Containers/Array.hpp                              | 2 +-
 src/TNL/Containers/ArrayView.hpp                          | 2 +-
 src/TNL/Containers/DistributedArray.hpp                   | 2 +-
 src/TNL/Containers/DistributedArrayView.hpp               | 2 +-
 src/TNL/Containers/DistributedVector.hpp                  | 2 +-
 src/TNL/Containers/DistributedVectorView.hpp              | 2 +-
 src/TNL/Containers/Multimaps/EllpackIndexMultimap.hpp     | 2 +-
 .../Containers/Multimaps/StaticEllpackIndexMultimap.hpp   | 2 +-
 src/TNL/Containers/Vector.hpp                             | 2 +-
 src/TNL/Containers/VectorView.hpp                         | 2 +-
 src/TNL/Devices/Cuda.h                                    | 8 ++------
 src/TNL/Devices/Cuda_impl.h                               | 2 +-
 src/TNL/Devices/Host.h                                    | 5 +----
 src/TNL/Matrices/AdEllpack_impl.h                         | 2 +-
 src/TNL/Matrices/BiEllpackSymmetric_impl.h                | 2 +-
 src/TNL/Matrices/BiEllpack_impl.h                         | 2 +-
 src/TNL/Matrices/COOMatrix_impl.h                         | 2 +-
 src/TNL/Matrices/CSR_impl.h                               | 2 +-
 src/TNL/Matrices/ChunkedEllpack_impl.h                    | 4 ++--
 src/TNL/Matrices/Dense_impl.h                             | 2 +-
 src/TNL/Matrices/EllpackSymmetricGraph_impl.h             | 2 +-
 src/TNL/Matrices/EllpackSymmetric_impl.h                  | 2 +-
 src/TNL/Matrices/Ellpack_impl.h                           | 4 ++--
 src/TNL/Matrices/Multidiagonal_impl.h                     | 2 +-
 src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h       | 2 +-
 src/TNL/Matrices/SlicedEllpackSymmetric_impl.h            | 2 +-
 src/TNL/Matrices/SlicedEllpack_impl.h                     | 4 ++--
 src/TNL/Matrices/Tridiagonal_impl.h                       | 2 +-
 src/TNL/Meshes/GridDetails/Grid1D_impl.h                  | 2 +-
 src/TNL/Meshes/GridDetails/Grid2D_impl.h                  | 2 +-
 src/TNL/Meshes/GridDetails/Grid3D_impl.h                  | 2 +-
 src/TNL/Meshes/TypeResolver/MeshTypeResolver_impl.h       | 6 +++---
 src/TNL/Pointers/DevicePointer.h                          | 2 +-
 src/TNL/Pointers/SharedPointer.h                          | 2 +-
 src/TNL/Pointers/UniquePointer.h                          | 2 +-
 src/TNL/Problems/PDEProblem_impl.h                        | 2 +-
 38 files changed, 46 insertions(+), 53 deletions(-)

diff --git a/src/TNL/Atomic.h b/src/TNL/Atomic.h
index ca36f9676..67d54abe2 100644
--- a/src/TNL/Atomic.h
+++ b/src/TNL/Atomic.h
@@ -53,7 +53,7 @@ public:
    {
       return "Atomic< " +
              TNL::getType< T >() + ", " +
-             Devices::Host::getDeviceType() + " >";
+             Devices::Host::getType() + " >";
    }
 
    // CAS loops for updating maximum and minimum
@@ -125,7 +125,7 @@ public:
    {
       return "Atomic< " +
              TNL::getType< T >() + ", " +
-             Devices::Cuda::getDeviceType() + " >";
+             Devices::Cuda::getType() + " >";
    }
 
    bool is_lock_free() const noexcept
diff --git a/src/TNL/Containers/Algorithms/ArrayIO.h b/src/TNL/Containers/Algorithms/ArrayIO.h
index 35d790558..922fc0ae8 100644
--- a/src/TNL/Containers/Algorithms/ArrayIO.h
+++ b/src/TNL/Containers/Algorithms/ArrayIO.h
@@ -35,7 +35,7 @@ struct ArrayIO< Value, Device, Index, true >
    {
       return String( "Containers::Array< " ) +
              TNL::getType< Value >() + ", " +
-             Devices::Host::getDeviceType() + ", " +
+             Devices::Host::getType() + ", " +
              TNL::getType< Index >() + " >";
    }
 
@@ -81,7 +81,7 @@ struct ArrayIO< Value, Device, Index, false >
    {
       return String( "Containers::Array< " ) +
              TNL::getType< Value >() + ", " +
-             Devices::Host::getDeviceType() + ", " +
+             Devices::Host::getType() + ", " +
              TNL::getType< Index >() + " >";
    }
 
diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp
index 4a9c484a4..94179c925 100644
--- a/src/TNL/Containers/Array.hpp
+++ b/src/TNL/Containers/Array.hpp
@@ -187,7 +187,7 @@ getType()
 {
    return String( "Containers::Array< " ) +
           TNL::getType< Value >() + ", " +
-          Device::getDeviceType() + ", " +
+          Device::getType() + ", " +
           TNL::getType< Index >() + " >";
 }
 
diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp
index ea7882516..05cf7e9e3 100644
--- a/src/TNL/Containers/ArrayView.hpp
+++ b/src/TNL/Containers/ArrayView.hpp
@@ -33,7 +33,7 @@ getType()
 {
    return String( "Containers::ArrayView< " ) + ", " +
                   TNL::getType< Value >() + ", " +
-                  Device::getDeviceType() + ", " +
+                  Device::getType() + ", " +
                   TNL::getType< Index >() + " >";
 }
 
diff --git a/src/TNL/Containers/DistributedArray.hpp b/src/TNL/Containers/DistributedArray.hpp
index b8c655527..af5673a38 100644
--- a/src/TNL/Containers/DistributedArray.hpp
+++ b/src/TNL/Containers/DistributedArray.hpp
@@ -170,7 +170,7 @@ getType()
 {
    return String( "Containers::DistributedArray< " ) +
           TNL::getType< Value >() + ", " +
-          Device::getDeviceType() + ", " +
+          Device::getType() + ", " +
           TNL::getType< Index >() + ", " +
           // TODO: communicators don't have a getType method
           "<Communicator> >";
diff --git a/src/TNL/Containers/DistributedArrayView.hpp b/src/TNL/Containers/DistributedArrayView.hpp
index 5cb9c10ed..a861ee3a1 100644
--- a/src/TNL/Containers/DistributedArrayView.hpp
+++ b/src/TNL/Containers/DistributedArrayView.hpp
@@ -194,7 +194,7 @@ getType()
 {
    return String( "Containers::DistributedArrayView< " ) +
           TNL::getType< Value >() + ", " +
-          Device::getDeviceType() + ", " +
+          Device::getType() + ", " +
           TNL::getType< Index >() + ", " +
           // TODO: communicators don't have a getType method
           "<Communicator> >";
diff --git a/src/TNL/Containers/DistributedVector.hpp b/src/TNL/Containers/DistributedVector.hpp
index dbf8b10b8..e6913b4e6 100644
--- a/src/TNL/Containers/DistributedVector.hpp
+++ b/src/TNL/Containers/DistributedVector.hpp
@@ -93,7 +93,7 @@ getType()
 {
    return String( "Containers::DistributedVector< " ) +
           TNL::getType< Real >() + ", " +
-          Device::getDeviceType() + ", " +
+          Device::getType() + ", " +
           TNL::getType< Index >() + ", " +
           // TODO: communicators don't have a getType method
           "<Communicator> >";
diff --git a/src/TNL/Containers/DistributedVectorView.hpp b/src/TNL/Containers/DistributedVectorView.hpp
index 6a934d8c2..7ecb4e370 100644
--- a/src/TNL/Containers/DistributedVectorView.hpp
+++ b/src/TNL/Containers/DistributedVectorView.hpp
@@ -74,7 +74,7 @@ getType()
 {
    return String( "Containers::DistributedVectorView< " ) +
           TNL::getType< Real >() + ", " +
-          Device::getDeviceType() + ", " +
+          Device::getType() + ", " +
           TNL::getType< Index >() + ", " +
           // TODO: communicators don't have a getType method
           "<Communicator> >";
diff --git a/src/TNL/Containers/Multimaps/EllpackIndexMultimap.hpp b/src/TNL/Containers/Multimaps/EllpackIndexMultimap.hpp
index 8aaba006a..4b94c9cab 100644
--- a/src/TNL/Containers/Multimaps/EllpackIndexMultimap.hpp
+++ b/src/TNL/Containers/Multimaps/EllpackIndexMultimap.hpp
@@ -54,7 +54,7 @@ getType()
    return String( "EllpackIndexMultimap< ") +
           String( TNL::getType< Index >() ) +
           String( ", " ) +
-          Device :: getDeviceType() +
+          Device::getType() +
           String( ", " ) +
           String( TNL::getType< LocalIndexType >() ) +
           String( " >" );
diff --git a/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.hpp b/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.hpp
index c8dcd637e..d1261ff8b 100644
--- a/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.hpp
+++ b/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.hpp
@@ -55,7 +55,7 @@ getType()
    return String( "StaticEllpackIndexMultimap< ") +
           String( TNL::getType< Index >() ) +
           String( ", " ) +
-          Device :: getDeviceType() +
+          Device::getType() +
           String( ", " ) +
           String( TNL::getType< LocalIndexType >() ) +
           String( " >" );
diff --git a/src/TNL/Containers/Vector.hpp b/src/TNL/Containers/Vector.hpp
index 0468fc749..12a699cb3 100644
--- a/src/TNL/Containers/Vector.hpp
+++ b/src/TNL/Containers/Vector.hpp
@@ -37,7 +37,7 @@ getType()
 {
    return String( "Containers::Vector< " ) +
                   TNL::getType< Real >() + ", " +
-                  Device::getDeviceType() + ", " +
+                  Device::getType() + ", " +
                   TNL::getType< Index >() + " >";
 }
 
diff --git a/src/TNL/Containers/VectorView.hpp b/src/TNL/Containers/VectorView.hpp
index 7c342703b..9985594a6 100644
--- a/src/TNL/Containers/VectorView.hpp
+++ b/src/TNL/Containers/VectorView.hpp
@@ -26,7 +26,7 @@ getType()
 {
    return String( "Containers::VectorView< " ) +
                   TNL::getType< Real >() + ", " +
-                  Device::getDeviceType() + ", " +
+                  Device::getType() + ", " +
                   TNL::getType< Index >() + " >";
 }
 
diff --git a/src/TNL/Devices/Cuda.h b/src/TNL/Devices/Cuda.h
index 783101415..9ed546c45 100644
--- a/src/TNL/Devices/Cuda.h
+++ b/src/TNL/Devices/Cuda.h
@@ -25,12 +25,8 @@ namespace Devices {
 
 class Cuda
 {
-   public:
-
-   static inline String getDeviceType();
-
-   // TODO: Remove getDeviceType();
-   static inline String getType() { return getDeviceType();};
+public:
+   static inline String getType();
 
    static inline void configSetup( Config::ConfigDescription& config, const String& prefix = "" );
 
diff --git a/src/TNL/Devices/Cuda_impl.h b/src/TNL/Devices/Cuda_impl.h
index 07e2c1ddc..6703ec0b6 100644
--- a/src/TNL/Devices/Cuda_impl.h
+++ b/src/TNL/Devices/Cuda_impl.h
@@ -21,7 +21,7 @@
 namespace TNL {
 namespace Devices {
 
-inline String Cuda::getDeviceType()
+inline String Cuda::getType()
 {
    return String( "Devices::Cuda" );
 }
diff --git a/src/TNL/Devices/Host.h b/src/TNL/Devices/Host.h
index 40f55711a..b48ee98bc 100644
--- a/src/TNL/Devices/Host.h
+++ b/src/TNL/Devices/Host.h
@@ -25,14 +25,11 @@ namespace Devices {
 class Host
 {
 public:
-   static String getDeviceType()
+   static String getType()
    {
       return String( "Devices::Host" );
    }
 
-   // TODO: Remove getDeviceType();
-   static inline String getType() { return getDeviceType();};
-
    static void disableOMP()
    {
       ompEnabled() = false;
diff --git a/src/TNL/Matrices/AdEllpack_impl.h b/src/TNL/Matrices/AdEllpack_impl.h
index 12d7336b8..d69f6c4da 100644
--- a/src/TNL/Matrices/AdEllpack_impl.h
+++ b/src/TNL/Matrices/AdEllpack_impl.h
@@ -173,7 +173,7 @@ String AdEllpack< Real, Device, Index >::getType()
     return String( "AdEllpack< ") +
            String( TNL::getType< Real >() ) +
            String( ", " ) +
-           Device::getDeviceType() +
+           Device::getType() +
            String( " >" );
 }
 
diff --git a/src/TNL/Matrices/BiEllpackSymmetric_impl.h b/src/TNL/Matrices/BiEllpackSymmetric_impl.h
index d4c755a23..b30cd4d4e 100644
--- a/src/TNL/Matrices/BiEllpackSymmetric_impl.h
+++ b/src/TNL/Matrices/BiEllpackSymmetric_impl.h
@@ -54,7 +54,7 @@ String BiEllpackSymmetric< Real, Device, Index, StripSize >::getType()
     return String( "BiEllpackMatrix< ") +
            String( TNL::getType< Real >() ) +
            String( ", " ) +
-           Device :: getDeviceType() +
+           Device::getType() +
            String( " >" );
 }
 
diff --git a/src/TNL/Matrices/BiEllpack_impl.h b/src/TNL/Matrices/BiEllpack_impl.h
index 0be6ac4b0..441f040cf 100644
--- a/src/TNL/Matrices/BiEllpack_impl.h
+++ b/src/TNL/Matrices/BiEllpack_impl.h
@@ -56,7 +56,7 @@ String BiEllpack< Real, Device, Index, StripSize >::getType()
 	return String( "BiEllpack< ") +
 	       String( TNL::getType< Real >() ) +
 	       String( ", " ) +
-	       Device::getDeviceType() +
+	       Device::getType() +
 	       String( " >" );
 }
 
diff --git a/src/TNL/Matrices/COOMatrix_impl.h b/src/TNL/Matrices/COOMatrix_impl.h
index 090ccd118..1647d684d 100644
--- a/src/TNL/Matrices/COOMatrix_impl.h
+++ b/src/TNL/Matrices/COOMatrix_impl.h
@@ -35,7 +35,7 @@ String COOMatrix< Real, Device, Index >::getType()
 	return String("COOMatrix< ") +
   	 	   String(TNL::getType< Real>()) +
 		   String(", ") +
-		   Device::getDeviceType() +
+		   Device::getType() +
 		   String(" >");
 }
 
diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/CSR_impl.h
index cddf6f9a7..9cf962286 100644
--- a/src/TNL/Matrices/CSR_impl.h
+++ b/src/TNL/Matrices/CSR_impl.h
@@ -46,7 +46,7 @@ String CSR< Real, Device, Index >::getType()
    return String( "Matrices::CSR< ") +
           String( TNL::getType< Real>() ) +
           String( ", " ) +
-          Device :: getDeviceType() +
+          Device::getType() +
           String( " >" );
 }
 
diff --git a/src/TNL/Matrices/ChunkedEllpack_impl.h b/src/TNL/Matrices/ChunkedEllpack_impl.h
index 6106ba2cd..5ba898f0c 100644
--- a/src/TNL/Matrices/ChunkedEllpack_impl.h
+++ b/src/TNL/Matrices/ChunkedEllpack_impl.h
@@ -44,7 +44,7 @@ String ChunkedEllpack< Real, Device, Index >::getType()
    return String( "Matrices::ChunkedEllpack< ") +
           String( TNL::getType< Real >() ) +
           String( ", " ) +
-          Device :: getDeviceType() +
+          Device::getType() +
           String( " >" );
 }
 
@@ -314,7 +314,7 @@ template< typename Real,
 Index ChunkedEllpack< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const
 {
     ConstMatrixRow matrixRow = getRow( row );
-    return matrixRow.getNonZeroElementsCount( Device::getDeviceType() );
+    return matrixRow.getNonZeroElementsCount( Device::getType() );
     
 //    IndexType elementCount ( 0 );
 //    ConstMatrixRow matrixRow = this->getRow( row );
diff --git a/src/TNL/Matrices/Dense_impl.h b/src/TNL/Matrices/Dense_impl.h
index 5b55dbc29..357bc8bfc 100644
--- a/src/TNL/Matrices/Dense_impl.h
+++ b/src/TNL/Matrices/Dense_impl.h
@@ -31,7 +31,7 @@ String Dense< Real, Device, Index >::getType()
 {
    return String( "Matrices::Dense< " ) +
           String( TNL::getType< RealType >() ) + ", " +
-          String( Device :: getDeviceType() ) + ", " +
+          String( Device::getType() ) + ", " +
           String( TNL::getType< IndexType >() ) + " >";
 }
 
diff --git a/src/TNL/Matrices/EllpackSymmetricGraph_impl.h b/src/TNL/Matrices/EllpackSymmetricGraph_impl.h
index 6304d5f9d..eff31f4b7 100644
--- a/src/TNL/Matrices/EllpackSymmetricGraph_impl.h
+++ b/src/TNL/Matrices/EllpackSymmetricGraph_impl.h
@@ -50,7 +50,7 @@ String EllpackSymmetricGraph< Real, Device, Index > :: getType()
    return String( "EllpackSymmetricGraph< ") +
           String( TNL::getType< Real >() ) +
           String( ", " ) +
-          Device::getDeviceType() +
+          Device::getType() +
           String( " >" );
 }
 
diff --git a/src/TNL/Matrices/EllpackSymmetric_impl.h b/src/TNL/Matrices/EllpackSymmetric_impl.h
index 7207afc56..da65f22f1 100644
--- a/src/TNL/Matrices/EllpackSymmetric_impl.h
+++ b/src/TNL/Matrices/EllpackSymmetric_impl.h
@@ -34,7 +34,7 @@ String EllpackSymmetric< Real, Device, Index > :: getType()
    return String( "EllpackSymmetric< ") +
           String( TNL::getType< Real >() ) +
           String( ", " ) +
-          Device::getDeviceType() +
+          Device::getType() +
           String( " >" );
 }
 
diff --git a/src/TNL/Matrices/Ellpack_impl.h b/src/TNL/Matrices/Ellpack_impl.h
index b4e453793..e42bf5e42 100644
--- a/src/TNL/Matrices/Ellpack_impl.h
+++ b/src/TNL/Matrices/Ellpack_impl.h
@@ -34,7 +34,7 @@ String Ellpack< Real, Device, Index > :: getType()
    return String( "Matrices::Ellpack< ") +
           String( TNL::getType< Real >() ) +
           String( ", " ) +
-          Device :: getDeviceType() +
+          Device::getType() +
           String( ", " ) +
           String( TNL::getType< Index >() ) +
           String( " >" );
@@ -130,7 +130,7 @@ template< typename Real,
 Index Ellpack< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const
 {
     ConstMatrixRow matrixRow = getRow( row );
-    return matrixRow.getNonZeroElementsCount( Device::getDeviceType() );
+    return matrixRow.getNonZeroElementsCount( Device::getType() );
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/Multidiagonal_impl.h b/src/TNL/Matrices/Multidiagonal_impl.h
index 065e7780d..b9641babd 100644
--- a/src/TNL/Matrices/Multidiagonal_impl.h
+++ b/src/TNL/Matrices/Multidiagonal_impl.h
@@ -36,7 +36,7 @@ String Multidiagonal< Real, Device, Index > :: getType()
    return String( "Matrices::Multidiagonal< ") +
           String( TNL::getType< Real >() ) +
           String( ", " ) +
-          Device :: getDeviceType() +
+          Device::getType() +
           String( " >" );
 }
 
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h b/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h
index 866211d53..e126f2c75 100644
--- a/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h
+++ b/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h
@@ -36,7 +36,7 @@ String SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >::getType()
    return String( "SlicedEllpackSymmetricGraph< ") +
           String( TNL::getType< Real >() ) +
           String( ", " ) +
-          Device::getDeviceType() +
+          Device::getType() +
           String( " >" );
 }
 
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h
index c9dee062c..5f0af6d30 100644
--- a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h
+++ b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h
@@ -35,7 +35,7 @@ String SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::getType()
    return String( "SlicedEllpackSymmetric< ") +
           String( TNL::getType< Real >() ) +
           String( ", " ) +
-          Device :: getDeviceType() +
+          Device::getType() +
           String( " >" );
 }
 
diff --git a/src/TNL/Matrices/SlicedEllpack_impl.h b/src/TNL/Matrices/SlicedEllpack_impl.h
index 4ce70d3ef..0517e913f 100644
--- a/src/TNL/Matrices/SlicedEllpack_impl.h
+++ b/src/TNL/Matrices/SlicedEllpack_impl.h
@@ -35,7 +35,7 @@ String SlicedEllpack< Real, Device, Index, SliceSize >::getType()
    return String( "Matrices::SlicedEllpack< ") +
           String( TNL::getType< Real >() ) +
           String( ", " ) +
-          Device :: getDeviceType() +
+          Device::getType() +
           String( " >" );
 }
 
@@ -129,7 +129,7 @@ template< typename Real,
 Index SlicedEllpack< Real, Device, Index, SliceSize >::getNonZeroRowLength( const IndexType row ) const
 {
     ConstMatrixRow matrixRow = getRow( row );
-    return matrixRow.getNonZeroElementsCount( Device::getDeviceType() );
+    return matrixRow.getNonZeroElementsCount( Device::getType() );
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/Tridiagonal_impl.h b/src/TNL/Matrices/Tridiagonal_impl.h
index 9a2d5e4a8..2a77a8a59 100644
--- a/src/TNL/Matrices/Tridiagonal_impl.h
+++ b/src/TNL/Matrices/Tridiagonal_impl.h
@@ -34,7 +34,7 @@ String Tridiagonal< Real, Device, Index >::getType()
 {
    return String( "Matrices::Tridiagonal< " ) +
           String( TNL::getType< RealType >() ) + ", " +
-          String( Device :: getDeviceType() ) + ", " +
+          String( Device::getType() ) + ", " +
           String( TNL::getType< IndexType >() ) + " >";
 }
 
diff --git a/src/TNL/Meshes/GridDetails/Grid1D_impl.h b/src/TNL/Meshes/GridDetails/Grid1D_impl.h
index a747544df..55055c818 100644
--- a/src/TNL/Meshes/GridDetails/Grid1D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Grid1D_impl.h
@@ -52,7 +52,7 @@ String Grid< 1, Real, Device, Index >::getType()
    return String( "Meshes::Grid< " ) +
           convertToString( getMeshDimension() ) + ", " +
           String( TNL::getType< RealType >() ) + ", " +
-          String( Device::getDeviceType() ) + ", " +
+          String( Device::getType() ) + ", " +
           String( TNL::getType< IndexType >() ) + " >";
 }
 
diff --git a/src/TNL/Meshes/GridDetails/Grid2D_impl.h b/src/TNL/Meshes/GridDetails/Grid2D_impl.h
index 7b673e0a8..c6c0420b2 100644
--- a/src/TNL/Meshes/GridDetails/Grid2D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Grid2D_impl.h
@@ -59,7 +59,7 @@ String Grid< 2, Real, Device, Index > :: getType()
    return String( "Meshes::Grid< " ) +
           convertToString( getMeshDimension() ) + ", " +
           String( TNL::getType< RealType >() ) + ", " +
-          String( Device :: getDeviceType() ) + ", " +
+          String( Device::getType() ) + ", " +
           String( TNL::getType< IndexType >() ) + " >";
 }
 
diff --git a/src/TNL/Meshes/GridDetails/Grid3D_impl.h b/src/TNL/Meshes/GridDetails/Grid3D_impl.h
index dbd5fcf47..d2d86c8ba 100644
--- a/src/TNL/Meshes/GridDetails/Grid3D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Grid3D_impl.h
@@ -73,7 +73,7 @@ String Grid< 3, Real, Device, Index > :: getType()
    return String( "Meshes::Grid< " ) +
           convertToString( getMeshDimension() ) + ", " +
           String( TNL::getType< RealType >() ) + ", " +
-          String( Device :: getDeviceType() ) + ", " +
+          String( Device::getType() ) + ", " +
           String( TNL::getType< IndexType >() ) + " >";
 }
 
diff --git a/src/TNL/Meshes/TypeResolver/MeshTypeResolver_impl.h b/src/TNL/Meshes/TypeResolver/MeshTypeResolver_impl.h
index 46c4e9c58..7feba3d02 100644
--- a/src/TNL/Meshes/TypeResolver/MeshTypeResolver_impl.h
+++ b/src/TNL/Meshes/TypeResolver/MeshTypeResolver_impl.h
@@ -334,9 +334,9 @@ MeshTypeResolver< Reader, ConfigTag, Device, ProblemSetter, ProblemSetterArgs...
 resolveTerminate( const Reader& reader,
                   ProblemSetterArgs&&... problemSetterArgs )
 {
-   std::cerr << "The mesh config type " << TNL::getType< MeshConfig >() << " is disabled in the build configuration for device " << Device::getDeviceType() << "." << std::endl;
+   std::cerr << "The mesh config type " << TNL::getType< MeshConfig >() << " is disabled in the build configuration for device " << Device::getType() << "." << std::endl;
    return false;
-};
+}
 
 template< typename Reader,
           typename ConfigTag,
@@ -352,7 +352,7 @@ resolveTerminate( const Reader& reader,
 {
    using MeshType = Meshes::Mesh< MeshConfig, Device >;
    return ProblemSetter< MeshType >::run( std::forward<ProblemSetterArgs>(problemSetterArgs)... );
-};
+}
 
 } // namespace Meshes
 } // namespace TNL
diff --git a/src/TNL/Pointers/DevicePointer.h b/src/TNL/Pointers/DevicePointer.h
index 7c0982dca..f384c75e5 100644
--- a/src/TNL/Pointers/DevicePointer.h
+++ b/src/TNL/Pointers/DevicePointer.h
@@ -481,7 +481,7 @@ struct Formatter< Pointers::DevicePointer< Object, Device > >
    printToString( const Pointers::DevicePointer< Object, Device >& value )
    {
       ::std::stringstream ss;
-      ss << "(DevicePointer< " << Object::getType() << ", " << Device::getDeviceType()
+      ss << "(DevicePointer< " << Object::getType() << ", " << Device::getType()
          << " > object at " << &value << ")";
       return ss.str();
    }
diff --git a/src/TNL/Pointers/SharedPointer.h b/src/TNL/Pointers/SharedPointer.h
index 51aff2a78..05383c39e 100644
--- a/src/TNL/Pointers/SharedPointer.h
+++ b/src/TNL/Pointers/SharedPointer.h
@@ -59,7 +59,7 @@ struct Formatter< Pointers::SharedPointer< Object, Device > >
    printToString( const Pointers::SharedPointer< Object, Device >& value )
    {
       ::std::stringstream ss;
-      ss << "(SharedPointer< " << Object::getType() << ", " << Device::getDeviceType()
+      ss << "(SharedPointer< " << Object::getType() << ", " << Device::getType()
          << " > object at " << &value << ")";
       return ss.str();
    }
diff --git a/src/TNL/Pointers/UniquePointer.h b/src/TNL/Pointers/UniquePointer.h
index e85e18d18..a2a8551ec 100644
--- a/src/TNL/Pointers/UniquePointer.h
+++ b/src/TNL/Pointers/UniquePointer.h
@@ -322,7 +322,7 @@ struct Formatter< Pointers::UniquePointer< Object, Device > >
    printToString( const Pointers::UniquePointer< Object, Device >& value )
    {
       ::std::stringstream ss;
-      ss << "(UniquePointer< " << Object::getType() << ", " << Device::getDeviceType()
+      ss << "(UniquePointer< " << Object::getType() << ", " << Device::getType()
          << " > object at " << &value << ")";
       return ss.str();
    }
diff --git a/src/TNL/Problems/PDEProblem_impl.h b/src/TNL/Problems/PDEProblem_impl.h
index 151f1e2ac..ba7612de7 100644
--- a/src/TNL/Problems/PDEProblem_impl.h
+++ b/src/TNL/Problems/PDEProblem_impl.h
@@ -28,7 +28,7 @@ getType()
    return String( "PDEProblem< " ) +
           Mesh::getType() + ", " +
           TNL::getType< Real >() + ", " +
-          Device::getDeviceType() + ", " +
+          Device::getType() + ", " +
           TNL::getType< Index >() + " >";
 }
 
-- 
GitLab


From 6d17baa38fc84cbc508308d888f7ff7b68a8b8c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Tue, 20 Aug 2019 11:37:02 +0200
Subject: [PATCH 03/35] Refactoring VectorFieldVTKWriter

Fixes #11
---
 src/TNL/Functions/MeshFunctionGnuplotWriter.h |  28 +-
 src/TNL/Functions/MeshFunctionVTKWriter.h     |   2 +-
 src/TNL/Functions/VectorFieldGnuplotWriter.h  | 138 +--
 .../Functions/VectorFieldGnuplotWriter_impl.h | 111 +--
 src/TNL/Functions/VectorFieldVTKWriter.h      | 281 +-----
 src/TNL/Functions/VectorFieldVTKWriter_impl.h | 881 ------------------
 6 files changed, 154 insertions(+), 1287 deletions(-)
 delete mode 100644 src/TNL/Functions/VectorFieldVTKWriter_impl.h

diff --git a/src/TNL/Functions/MeshFunctionGnuplotWriter.h b/src/TNL/Functions/MeshFunctionGnuplotWriter.h
index d747e84a7..244146ff6 100644
--- a/src/TNL/Functions/MeshFunctionGnuplotWriter.h
+++ b/src/TNL/Functions/MeshFunctionGnuplotWriter.h
@@ -68,11 +68,10 @@ template< typename MeshFunction,
 class MeshFunctionGnuplotWriter
 : public MeshFunctionGnuplotWriterBase
 {
-   public:
-
-      using MeshType = typename MeshFunction::MeshType;
-      using EntityType = typename MeshType::template EntityType< MeshFunction::getEntitiesDimension() >;
-      using GlobalIndex = typename MeshType::GlobalIndexType;
+public:
+   using MeshType = typename MeshFunction::MeshType;
+   using EntityType = typename MeshType::template EntityType< MeshFunction::getEntitiesDimension() >;
+   using GlobalIndex = typename MeshType::GlobalIndexType;
 
    static bool write( const MeshFunction& function,
                       std::ostream& str,
@@ -99,11 +98,10 @@ template< typename MeshFunction,
 class MeshFunctionGnuplotWriter< MeshFunction, Meshes::Grid< 2, Real, Device, Index >, EntityDimension >
 : public MeshFunctionGnuplotWriterBase
 {
-   public:
-
-      using MeshType = typename MeshFunction::MeshType;
-      using EntityType = typename MeshType::template EntityType< MeshFunction::getEntitiesDimension() >;
-      using GlobalIndex = typename MeshType::GlobalIndexType;
+public:
+   using MeshType = typename MeshFunction::MeshType;
+   using EntityType = typename MeshType::template EntityType< MeshFunction::getEntitiesDimension() >;
+   using GlobalIndex = typename MeshType::GlobalIndexType;
 
    static bool write( const MeshFunction& function,
                       std::ostream& str,
@@ -137,11 +135,10 @@ template< typename MeshFunction,
 class MeshFunctionGnuplotWriter< MeshFunction, Meshes::Grid< 3, Real, Device, Index >, EntityDimension >
 : public MeshFunctionGnuplotWriterBase
 {
-   public:
-
-      using MeshType = typename MeshFunction::MeshType;
-      using EntityType = typename MeshType::template EntityType< MeshFunction::getEntitiesDimension() >;
-      using GlobalIndex = typename MeshType::GlobalIndexType;
+public:
+   using MeshType = typename MeshFunction::MeshType;
+   using EntityType = typename MeshType::template EntityType< MeshFunction::getEntitiesDimension() >;
+   using GlobalIndex = typename MeshType::GlobalIndexType;
 
    static bool write( const MeshFunction& function,
                       std::ostream& str,
@@ -167,6 +164,5 @@ class MeshFunctionGnuplotWriter< MeshFunction, Meshes::Grid< 3, Real, Device, In
    }
 };
 
-
 } // namespace Functions
 } // namespace TNL
diff --git a/src/TNL/Functions/MeshFunctionVTKWriter.h b/src/TNL/Functions/MeshFunctionVTKWriter.h
index 78608de74..201178c61 100644
--- a/src/TNL/Functions/MeshFunctionVTKWriter.h
+++ b/src/TNL/Functions/MeshFunctionVTKWriter.h
@@ -13,7 +13,7 @@
 #include <TNL/Meshes/Writers/VTKWriter.h>
 
 namespace TNL {
-namespace Functions {   
+namespace Functions {
 
 template< typename MeshFunction >
 class MeshFunctionVTKWriter
diff --git a/src/TNL/Functions/VectorFieldGnuplotWriter.h b/src/TNL/Functions/VectorFieldGnuplotWriter.h
index 41b59d511..a1a63883e 100644
--- a/src/TNL/Functions/VectorFieldGnuplotWriter.h
+++ b/src/TNL/Functions/VectorFieldGnuplotWriter.h
@@ -16,15 +16,15 @@ namespace TNL {
 namespace Functions {
 
 template< int, typename > class VectorField;
+template< typename, int, typename > class MeshFunction;
 
 template< typename VectorField >
 class VectorFieldGnuplotWriter
 {
-   public:
-
-      static bool write( const VectorField& function,
-                         std::ostream& str,
-                         const double& scale );
+public:
+   static bool write( const VectorField& function,
+                      std::ostream& str,
+                      const double& scale );
 };
 
 /***
@@ -37,14 +37,14 @@ template< typename MeshReal,
           int VectorFieldSize >
 class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 1, Real > > >
 {
-   public:
-      typedef Meshes::Grid< 1, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 1, RealType > > VectorFieldType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
+public:
+   using MeshType = Meshes::Grid< 1, MeshReal, Device, MeshIndex >;
+   using RealType = Real;
+   using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 1, RealType > >;
+
+   static bool write( const VectorFieldType& function,
+                      std::ostream& str,
+                      const double& scale );
 };
 
 /***
@@ -57,14 +57,14 @@ template< typename MeshReal,
           int VectorFieldSize >
 class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 0, Real > > >
 {
-   public:
-      typedef Meshes::Grid< 1, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > > VectorFieldType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
+public:
+   using MeshType = Meshes::Grid< 1, MeshReal, Device, MeshIndex >;
+   using RealType = Real;
+   using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > >;
+
+   static bool write( const VectorFieldType& function,
+                      std::ostream& str,
+                      const double& scale );
 };
 
 
@@ -78,14 +78,14 @@ template< typename MeshReal,
           int VectorFieldSize >
 class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 2, Real > > >
 {
-   public:
-      typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 2, RealType > > VectorFieldType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
+public:
+   using MeshType = Meshes::Grid< 2, MeshReal, Device, MeshIndex >;
+   using RealType = Real;
+   using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 2, RealType > >;
+
+   static bool write( const VectorFieldType& function,
+                      std::ostream& str,
+                      const double& scale );
 };
 
 /***
@@ -98,14 +98,14 @@ template< typename MeshReal,
           int VectorFieldSize >
 class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 1, Real > > >
 {
-   public:
-      typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 1, RealType > > VectorFieldType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
+public:
+   using MeshType = Meshes::Grid< 2, MeshReal, Device, MeshIndex >;
+   using RealType = Real;
+   using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 1, RealType > >;
+
+   static bool write( const VectorFieldType& function,
+                      std::ostream& str,
+                      const double& scale );
 };
 
 /***
@@ -118,14 +118,14 @@ template< typename MeshReal,
           int VectorFieldSize >
 class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 0, Real > > >
 {
-   public:
-      typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > > VectorFieldType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
+public:
+   using MeshType = Meshes::Grid< 2, MeshReal, Device, MeshIndex >;
+   using RealType = Real;
+   using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > >;
+
+   static bool write( const VectorFieldType& function,
+                      std::ostream& str,
+                      const double& scale );
 };
 
 
@@ -139,14 +139,14 @@ template< typename MeshReal,
           int VectorFieldSize >
 class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 3, Real > > >
 {
-   public:
-      typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 3, RealType > > VectorFieldType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
+public:
+   using MeshType = Meshes::Grid< 3, MeshReal, Device, MeshIndex >;
+   using RealType = Real;
+   using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 3, RealType > >;
+
+   static bool write( const VectorFieldType& function,
+                      std::ostream& str,
+                      const double& scale );
 };
 
 /***
@@ -159,14 +159,14 @@ template< typename MeshReal,
           int VectorFieldSize >
 class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 2, Real > > >
 {
-   public:
-      typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 2, RealType > > VectorFieldType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
+public:
+   using MeshType = Meshes::Grid< 3, MeshReal, Device, MeshIndex >;
+   using RealType = Real;
+   using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 2, RealType > >;
+
+   static bool write( const VectorFieldType& function,
+                      std::ostream& str,
+                      const double& scale );
 };
 
 /***
@@ -179,14 +179,14 @@ template< typename MeshReal,
           int VectorFieldSize >
 class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 0, Real > > >
 {
-   public:
-      typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > > VectorFieldType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
+public:
+   using MeshType = Meshes::Grid< 3, MeshReal, Device, MeshIndex >;
+   using RealType = Real;
+   using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > >;
+
+   static bool write( const VectorFieldType& function,
+                      std::ostream& str,
+                      const double& scale );
 };
 
 } // namespace Functions
diff --git a/src/TNL/Functions/VectorFieldGnuplotWriter_impl.h b/src/TNL/Functions/VectorFieldGnuplotWriter_impl.h
index 500bdc4d8..ebda5972a 100644
--- a/src/TNL/Functions/VectorFieldGnuplotWriter_impl.h
+++ b/src/TNL/Functions/VectorFieldGnuplotWriter_impl.h
@@ -43,9 +43,8 @@ write( const VectorFieldType& vectorField,
 {
    const MeshType& mesh = vectorField.getMesh();
    typename MeshType::Cell entity( mesh );
-   for( entity.getCoordinates().x() = 0;
-        entity.getCoordinates().x() < mesh.getDimensions().x();
-        entity.getCoordinates().x() ++ )
+   auto& c = entity.getCoordinates();
+   for( c.x() = 0; c.x() < mesh.getDimensions().x(); c.x()++ )
    {
       entity.refresh();
       typename MeshType::PointType v = entity.getCenter();
@@ -73,9 +72,8 @@ write( const VectorFieldType& vectorField,
 {
    const MeshType& mesh = vectorField.getMesh();
    typename MeshType::Vertex entity( mesh );
-   for( entity.getCoordinates().x() = 0;
-        entity.getCoordinates().x() <= mesh.getDimensions().x();
-        entity.getCoordinates().x() ++ )
+   auto& c = entity.getCoordinates();
+   for( c.x() = 0; c.x() <= mesh.getDimensions().x(); c.x()++ )
    {
       entity.refresh();
       typename MeshType::PointType v = entity.getCenter();
@@ -104,13 +102,10 @@ write( const VectorFieldType& vectorField,
 {
    const MeshType& mesh = vectorField.getMesh();
    typename MeshType::Cell entity( mesh );
-   for( entity.getCoordinates().y() = 0;
-        entity.getCoordinates().y() < mesh.getDimensions().y();
-        entity.getCoordinates().y() ++ )
+   auto& c = entity.getCoordinates();
+   for( c.y() = 0; c.y() < mesh.getDimensions().y(); c.y()++ )
    {
-      for( entity.getCoordinates().x() = 0;
-           entity.getCoordinates().x() < mesh.getDimensions().x();
-           entity.getCoordinates().x() ++ )
+      for( c.x() = 0; c.x() < mesh.getDimensions().x(); c.x()++ )
       {
          entity.refresh();
          typename MeshType::PointType v = entity.getCenter();
@@ -142,15 +137,12 @@ write( const VectorFieldType& vectorField,
    typedef typename MeshType::Face EntityType;
    typedef typename EntityType::EntityOrientationType EntityOrientation;
    EntityType entity( mesh );
+   auto& c = entity.getCoordinates();
 
    entity.setOrientation( EntityOrientation( 1.0, 0.0 ) );
-   for( entity.getCoordinates().y() = 0;
-        entity.getCoordinates().y() < mesh.getDimensions().y();
-        entity.getCoordinates().y() ++ )
+   for( c.y() = 0; c.y() < mesh.getDimensions().y(); c.y()++ )
    {
-      for( entity.getCoordinates().x() = 0;
-           entity.getCoordinates().x() <= mesh.getDimensions().x();
-           entity.getCoordinates().x() ++ )
+      for( c.x() = 0; c.x() <= mesh.getDimensions().x(); c.x()++ )
       {
          entity.refresh();
          typename MeshType::PointType v = entity.getCenter();
@@ -163,15 +155,9 @@ write( const VectorFieldType& vectorField,
    }
 
    entity.setOrientation( EntityOrientation( 0.0, 1.0 ) );
-         for( entity.getCoordinates().x() = 0;
-           entity.getCoordinates().x() < mesh.getDimensions().x();
-           entity.getCoordinates().x() ++ )
-
+   for( c.x() = 0; c.x() < mesh.getDimensions().x(); c.x()++ )
    {
-            for( entity.getCoordinates().y() = 0;
-        entity.getCoordinates().y() <= mesh.getDimensions().y();
-        entity.getCoordinates().y() ++ )
-
+      for( c.y() = 0; c.y() <= mesh.getDimensions().y(); c.y()++ )
       {
          entity.refresh();
          typename MeshType::PointType v = entity.getCenter();
@@ -202,13 +188,10 @@ write( const VectorFieldType& vectorField,
 {
    const MeshType& mesh = vectorField.getMesh();
    typename MeshType::Vertex entity( mesh );
-   for( entity.getCoordinates().y() = 0;
-        entity.getCoordinates().y() <= mesh.getDimensions().y();
-        entity.getCoordinates().y() ++ )
+   auto& c = entity.getCoordinates();
+   for( c.y() = 0; c.y() <= mesh.getDimensions().y(); c.y()++ )
    {
-      for( entity.getCoordinates().x() = 0;
-           entity.getCoordinates().x() <= mesh.getDimensions().x();
-           entity.getCoordinates().x() ++ )
+      for( c.x() = 0; c.x() <= mesh.getDimensions().x(); c.x()++ )
       {
          entity.refresh();
          typename MeshType::PointType v = entity.getCenter();
@@ -239,16 +222,11 @@ write( const VectorFieldType& vectorField,
 {
    const MeshType& mesh = vectorField.getMesh();
    typename MeshType::Cell entity( mesh );
-   for( entity.getCoordinates().z() = 0;
-        entity.getCoordinates().z() < mesh.getDimensions().z();
-        entity.getCoordinates().z() ++ )
-      for( entity.getCoordinates().y() = 0;
-           entity.getCoordinates().y() < mesh.getDimensions().y();
-           entity.getCoordinates().y() ++ )
+   auto& c = entity.getCoordinates();
+   for( c.z() = 0; c.z() < mesh.getDimensions().z(); c.z()++ )
+      for( c.y() = 0; c.y() < mesh.getDimensions().y(); c.y()++ )
       {
-         for( entity.getCoordinates().x() = 0;
-              entity.getCoordinates().x() < mesh.getDimensions().x();
-              entity.getCoordinates().x() ++ )
+         for( c.x() = 0; c.x() < mesh.getDimensions().x(); c.x()++ )
          {
             entity.refresh();
             typename MeshType::PointType v = entity.getCenter();
@@ -280,18 +258,13 @@ write( const VectorFieldType& vectorField,
    typedef typename MeshType::Face EntityType;
    typedef typename EntityType::EntityOrientationType EntityOrientation;
    EntityType entity( mesh );
+   auto& c = entity.getCoordinates();
 
    entity.setOrientation( EntityOrientation( 1.0, 0.0, 0.0 ) );
-   for( entity.getCoordinates().z() = 0;
-        entity.getCoordinates().z() < mesh.getDimensions().z();
-        entity.getCoordinates().z() ++ )
-      for( entity.getCoordinates().y() = 0;
-           entity.getCoordinates().y() < mesh.getDimensions().y();
-           entity.getCoordinates().y() ++ )
+   for( c.z() = 0; c.z() < mesh.getDimensions().z(); c.z()++ )
+      for( c.y() = 0; c.y() < mesh.getDimensions().y(); c.y()++ )
       {
-         for( entity.getCoordinates().x() = 0;
-              entity.getCoordinates().x() <= mesh.getDimensions().x();
-              entity.getCoordinates().x() ++ )
+         for( c.x() = 0; c.x() <= mesh.getDimensions().x(); c.x()++ )
          {
             entity.refresh();
             typename MeshType::PointType v = entity.getCenter();
@@ -304,16 +277,10 @@ write( const VectorFieldType& vectorField,
       }
 
    entity.setOrientation( EntityOrientation( 0.0, 1.0, 0.0 ) );
-   for( entity.getCoordinates().z() = 0;
-        entity.getCoordinates().z() < mesh.getDimensions().z();
-        entity.getCoordinates().z() ++ )
-      for( entity.getCoordinates().x() = 0;
-           entity.getCoordinates().x() < mesh.getDimensions().x();
-           entity.getCoordinates().x() ++ )
+   for( c.z() = 0; c.z() < mesh.getDimensions().z(); c.z()++ )
+      for( c.x() = 0; c.x() < mesh.getDimensions().x(); c.x()++ )
       {
-         for( entity.getCoordinates().y() = 0;
-              entity.getCoordinates().y() <= mesh.getDimensions().y();
-              entity.getCoordinates().y() ++ )
+         for( c.y() = 0; c.y() <= mesh.getDimensions().y(); c.y()++ )
          {
             entity.refresh();
             typename MeshType::PointType v = entity.getCenter();
@@ -326,16 +293,10 @@ write( const VectorFieldType& vectorField,
       }
 
    entity.setOrientation( EntityOrientation( 0.0, 0.0, 1.0 ) );
-   for( entity.getCoordinates().x() = 0;
-        entity.getCoordinates().x() < mesh.getDimensions().x();
-        entity.getCoordinates().x() ++ )
-      for( entity.getCoordinates().y() = 0;
-           entity.getCoordinates().y() <= mesh.getDimensions().y();
-           entity.getCoordinates().y() ++ )
+   for( c.x() = 0; c.x() < mesh.getDimensions().x(); c.x()++ )
+      for( c.y() = 0; c.y() <= mesh.getDimensions().y(); c.y()++ )
       {
-         for( entity.getCoordinates().z() = 0;
-              entity.getCoordinates().z() < mesh.getDimensions().z();
-              entity.getCoordinates().z() ++ )
+         for( c.z() = 0; c.z() < mesh.getDimensions().z(); c.z()++ )
          {
             entity.refresh();
             typename MeshType::PointType v = entity.getCenter();
@@ -366,16 +327,11 @@ write( const VectorFieldType& vectorField,
 {
    const MeshType& mesh = vectorField.getMesh();
    typename MeshType::Vertex entity( mesh );
-   for( entity.getCoordinates().z() = 0;
-        entity.getCoordinates().z() <= mesh.getDimensions().z();
-        entity.getCoordinates().z() ++ )
-      for( entity.getCoordinates().y() = 0;
-           entity.getCoordinates().y() <= mesh.getDimensions().y();
-           entity.getCoordinates().y() ++ )
+   auto& c = entity.getCoordinates();
+   for( c.z() = 0; c.z() <= mesh.getDimensions().z(); c.z()++ )
+      for( c.y() = 0; c.y() <= mesh.getDimensions().y(); c.y()++ )
       {
-         for( entity.getCoordinates().x() = 0;
-              entity.getCoordinates().x() <= mesh.getDimensions().x();
-              entity.getCoordinates().x() ++ )
+         for( c.x() = 0; c.x() <= mesh.getDimensions().x(); c.x()++ )
          {
             entity.refresh();
             typename MeshType::PointType v = entity.getCenter();
@@ -391,4 +347,3 @@ write( const VectorFieldType& vectorField,
 
 } // namespace Functions
 } // namespace TNL
-
diff --git a/src/TNL/Functions/VectorFieldVTKWriter.h b/src/TNL/Functions/VectorFieldVTKWriter.h
index 6d8b1a853..5eceea57f 100644
--- a/src/TNL/Functions/VectorFieldVTKWriter.h
+++ b/src/TNL/Functions/VectorFieldVTKWriter.h
@@ -2,7 +2,7 @@
                           VectorFieldVTKWriter.h  -  description
                              -------------------
     begin                : Jan 10, 2018
-    copyright            : (C) 2018 by oberhuber
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
 
@@ -10,255 +10,52 @@
 
 #pragma once
 
-#include <TNL/Meshes/Grid.h>
+#include <TNL/Meshes/Writers/VTKWriter.h>
 
 namespace TNL {
 namespace Functions {
 
-template< int, typename > class VectorField;
-
 template< typename VectorField >
 class VectorFieldVTKWriter
 {
-   public:
-
-      static bool write( const VectorField& vectorField,
-                         std::ostream& str,
-                         const double& scale );
-      
-      static void writeHeader( const VectorField& vectorField,
-                               std::ostream& str ){}
-      
-};
-
-/***
- * 1D grids cells
- */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 1, Real > > >
-{
-   public:
-      typedef Meshes::Grid< 1, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 1, RealType > > VectorFieldType;
-      using VectorType = typename VectorFieldType::VectorType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
-      
-      static void writeHeader( const VectorFieldType& vectorField,
-                               std::ostream& str );
-      
-};
-
-/***
- * 1D grids vertices
- */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 0, Real > > >
-{
-   public:
-      typedef Meshes::Grid< 1, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > > VectorFieldType;
-      using VectorType = typename VectorFieldType::VectorType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
-      
-      static void writeHeader( const VectorFieldType& vectorField,
-                               std::ostream& str );
-      
-};
-
-
-/***
- * 2D grids cells
- */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 2, Real > > >
-{
-   public:
-      typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 2, RealType > > VectorFieldType;
-      using VectorType = typename VectorFieldType::VectorType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
-
-      static void writeHeader( const VectorFieldType& vectorField,
-                               std::ostream& str );
-      
-};
-
-/***
- * 2D grids faces
- */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 1, Real > > >
-{
-   public:
-      typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 1, RealType > > VectorFieldType;
-      using VectorType = typename VectorFieldType::VectorType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
-      
-      static void writeHeader( const VectorFieldType& vectorField,
-                               std::ostream& str );
-      
-};
-
-/***
- * 2D grids vertices
- */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 0, Real > > >
-{
-   public:
-      typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > > VectorFieldType;
-      using VectorType = typename VectorFieldType::VectorType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
-      
-      static void writeHeader( const VectorFieldType& vectorField,
-                               std::ostream& str );
-      
-};
-
-
-/***
- * 3D grids cells
- */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 3, Real > > >
-{
-   public:
-      typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 3, RealType > > VectorFieldType;
-      using VectorType = typename VectorFieldType::VectorType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
-      
-      static void writeHeader( const VectorFieldType& vectorField,
-                               std::ostream& str );
-      
-};
-
-/***
- * 3D grids faces
- */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 2, Real > > >
-{
-   public:
-      typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 2, RealType > > VectorFieldType;
-      using VectorType = typename VectorFieldType::VectorType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
-      
-      static void writeHeader( const VectorFieldType& vectorField,
-                               std::ostream& str );
-      
-};
-
-/***
- * 3D grids edges
- */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 1, Real > > >
-{
-   public:
-      typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 1, RealType > > VectorFieldType;
-      using VectorType = typename VectorFieldType::VectorType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
-      
-      static void writeHeader( const VectorFieldType& vectorField,
-                               std::ostream& str );
-      
-};
-
-/***
- * 3D grids vertices
- */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 0, Real > > >
-{
-   public:
-      typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > > VectorFieldType;
-      using VectorType = typename VectorFieldType::VectorType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
-      
-      static void writeHeader( const VectorFieldType& vectorField,
-                               std::ostream& str );
-      
+   using MeshType = typename VectorField::MeshType;
+   using MeshWriter = Meshes::Writers::VTKWriter< MeshType >;
+   using EntityType = typename MeshType::template EntityType< VectorField::getEntitiesDimension() >;
+   using GlobalIndex = typename MeshType::GlobalIndexType;
+
+public:
+   static bool write( const VectorField& field,
+                      std::ostream& str,
+                      const double& scale = 1.0,
+                      const String& fieldName = "cellVectorFieldValues" )
+   {
+      const MeshType& mesh = field.getMesh();
+      MeshWriter::template writeEntities< VectorField::getEntitiesDimension() >( mesh, str );
+      appendField( field, str, fieldName, scale );
+      return true;
+   }
+
+   // VTK supports writing multiple fields into the same file.
+   // You can call this after 'write', which initializes the mesh entities,
+   // with different field name.
+   static void appendField( const VectorField& field,
+                            std::ostream& str,
+                            const String& fieldName,
+                            const double& scale = 1.0 )
+   {
+      const MeshType& mesh = field.getMesh();
+      const GlobalIndex entitiesCount = mesh.template getEntitiesCount< EntityType >();
+      str << std::endl << "CELL_DATA " << entitiesCount << std::endl;
+      str << "VECTORS " << fieldName << " " << getType< typename VectorField::RealType >() << " 1" << std::endl;
+      for( GlobalIndex i = 0; i < entitiesCount; i++ ) {
+         const typename VectorField::VectorType vector = field.getElement( i );
+         static_assert( VectorField::getVectorDimension() <= 3, "The VTK format supports only up to 3D vector fields." );
+         for( int i = 0; i < 3; i++ )
+            str << scale * ( i < vector.getSize() ? vector[ i ] : 0.0 ) << " ";
+         str << "\n";
+      }
+   }
 };
 
 } // namespace Functions
 } // namespace TNL
-
-#include <TNL/Functions/VectorFieldVTKWriter_impl.h>
diff --git a/src/TNL/Functions/VectorFieldVTKWriter_impl.h b/src/TNL/Functions/VectorFieldVTKWriter_impl.h
deleted file mode 100644
index 938227d22..000000000
--- a/src/TNL/Functions/VectorFieldVTKWriter_impl.h
+++ /dev/null
@@ -1,881 +0,0 @@
-/***************************************************************************
-                          VectorFieldVTKWriter_impl.h  -  description
-                             -------------------
-    begin                : Jan 10, 2018
-    copyright            : (C) 2018 by oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <TNL/Functions/VectorFieldVTKWriter.h>
-#include <TNL/Functions/VectorField.h>
-
-namespace TNL {
-namespace Functions {   
-
-template< typename VectorField >
-bool
-VectorFieldVTKWriter< VectorField >::
-write( const VectorField& vectorField,
-       std::ostream& str,
-       const double& scale )
-{
-   std::cerr << "VTK writer for vector field defined on mesh type " << VectorField::MeshType::getType() << " is not (yet) implemented." << std::endl;
-   return false;
-}
-
-/****
- * 1D grid, cells
- */
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-void
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 1, Real > > >::
-writeHeader( const VectorFieldType& vectorField,
-             std::ostream& str )
-{
-    const MeshType& mesh = vectorField.getMesh();
-    const typename MeshType::PointType& origin = mesh.getOrigin();
-    const typename MeshType::PointType& proportions = mesh.getProportions();
-    str << "# vtk DataFile Version 2.0" << std::endl;
-    str << "TNL DATA" << std::endl;
-    str << "ASCII" << std::endl;
-    str << "DATASET UNSTRUCTURED_GRID" << std::endl;
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-bool
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 1, Real > > >::
-write( const VectorFieldType& vectorField,
-       std::ostream& str,
-       const double& scale )
-{
-   writeHeader(vectorField, str);
- 
-   const MeshType& mesh = vectorField.getMesh();
-   const RealType origin = mesh.getOrigin().x();
-   const RealType spaceStep = mesh.getSpaceSteps().x();
- 
-   str << "POINTS " << mesh.getDimensions().x() + 1 << " float" << std::endl;
-   for (int i = 0; i <= mesh.getDimensions().x(); i++)
-   {
-       str << origin + i * spaceStep << " 0 0" << std::endl;
-   }
- 
-   str << std::endl << "CELLS " << mesh.getDimensions().x() << " " << mesh.getDimensions().x() * 3 << std::endl;
-   for (int i = 0; i < mesh.getDimensions().x(); i++)
-   {
-       str << "2 " << i << " " << i+1 << std::endl;
-   }
- 
-   str << std::endl << "CELL_TYPES " << mesh.getDimensions().x() << std::endl;
-   for (int i = 0; i < mesh.getDimensions().x(); i++)
-   {
-       str << "3 " << std::endl;
-   }
- 
-   str << std::endl << "CELL_DATA " << mesh.getDimensions().x() << std::endl;
-   str << "VECTORS cellVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl;
-
-   for( MeshIndex i = 0; i < mesh.template getEntitiesCount< typename MeshType::Cell >(); i++ )
-   {
-      typename MeshType::Cell entity = mesh.template getEntity< typename MeshType::Cell >( i );
-      entity.refresh();
-      const VectorType v = vectorField.getElement( entity.getIndex() );
-      for( int i = 0; i < 3; i++ )
-         str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " ";
-      str << std::endl;
-   }
- 
-   return true;
-}
-
-/****
- * 1D grid, vertices
- */
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-void
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 0, Real > > >::
-writeHeader( const VectorFieldType& vectorField,
-             std::ostream& str )
-{
-    const MeshType& mesh = vectorField.getMesh();
-    const typename MeshType::PointType& origin = mesh.getOrigin();
-    const typename MeshType::PointType& proportions = mesh.getProportions();
-    str << "# vtk DataFile Version 2.0" << std::endl;
-    str << "TNL DATA" << std::endl;
-    str << "ASCII" << std::endl;
-    str << "DATASET UNSTRUCTURED_GRID" << std::endl;
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-bool
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 0, Real > > >::
-write( const VectorFieldType& vectorField,
-       std::ostream& str,
-       const double& scale )
-{
-   writeHeader(vectorField, str);
- 
-   const MeshType& mesh = vectorField.getMesh();
-   const RealType origin = mesh.getOrigin().x();
-   const RealType spaceStep = mesh.getSpaceSteps().x();
- 
-   str << "POINTS " << mesh.getDimensions().x() + 1 << " float" << std::endl;
-   for (int i = 0; i < mesh.getDimensions().x() + 1; i++)
-   {
-       str << origin + i * spaceStep << " 0 0" << std::endl;
-   }
- 
-   str << std::endl << "CELLS " << mesh.getDimensions().x() + 1 << " " << ( mesh.getDimensions().x() + 1 ) * 2 << std::endl;
-   for (int i = 0; i < mesh.getDimensions().x() + 1; i++)
-   {
-       str << "1 " << i << std::endl;
-   }
- 
-   str << std::endl << "CELL_TYPES " << mesh.getDimensions().x() + 1 << std::endl;
-   for (int i = 0; i < mesh.getDimensions().x() + 1; i++)
-   {
-       str << "1 " << std::endl;
-   }
- 
-   str << std::endl << "CELL_DATA " << mesh.getDimensions().x() + 1 << std::endl;
-   str << "VECTORS VerticesVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl;
-
-   for( MeshIndex i = 0; i < mesh.template getEntitiesCount< typename MeshType::Vertex >(); i++ )
-   {
-      typename MeshType::Vertex entity = mesh.template getEntity< typename MeshType::Vertex >( i );
-      entity.refresh();
-      const VectorType v = vectorField.getElement( entity.getIndex() );
-      for( int i = 0; i < 3; i++ )
-         str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " ";
-      str << std::endl;
-   }
- 
-   return true;
-}
-
-/****
- * 2D grid, cells
- */
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-void
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 2, Real > > >::
-writeHeader( const VectorFieldType& vectorField,
-             std::ostream& str )
-{
-    const MeshType& mesh = vectorField.getMesh();
-    const typename MeshType::PointType& origin = mesh.getOrigin();
-    const typename MeshType::PointType& proportions = mesh.getProportions();
-    str << "# vtk DataFile Version 2.0" << std::endl;
-    str << "TNL DATA" << std::endl;
-    str << "ASCII" << std::endl;
-    str << "DATASET UNSTRUCTURED_GRID" << std::endl;
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-bool
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 2, Real > > >::
-write( const VectorFieldType& vectorField,
-       std::ostream& str,
-       const double& scale )
-{
-   writeHeader(vectorField, str);
- 
-   const MeshType& mesh = vectorField.getMesh();
-   const RealType originX = mesh.getOrigin().x();
-   const RealType spaceStepX = mesh.getSpaceSteps().x();
-   const RealType originY = mesh.getOrigin().y();
-   const RealType spaceStepY = mesh.getSpaceSteps().y();
-   const MeshIndex verticesCount = mesh.template getEntitiesCount< typename MeshType::Vertex >();
-   const MeshIndex entitiesCount = mesh.template getEntitiesCount< typename MeshType::Cell >();
-
-   str << "POINTS " << verticesCount << " " << getType< RealType >() << std::endl;
-   for (int j = 0; j < mesh.getDimensions().y() + 1; j++)
-   {
-        for (int i = 0; i < mesh.getDimensions().x() + 1; i++)
-        {
-             str << originX + i * spaceStepX << " " << originY + j * spaceStepY << " 0" << std::endl;
-        }
-   }
- 
-   str << std::endl << "CELLS " << entitiesCount << " " << entitiesCount * 5 << std::endl;
-   for (int j = 0; j < mesh.getDimensions().y(); j++)
-   {
-        for (int i = 0; i < mesh.getDimensions().x(); i++)
-        {
-            str << "4 " << j * ( mesh.getDimensions().x() + 1 ) + i << " " << j * ( mesh.getDimensions().x() + 1 )+ i + 1 <<
-                   " " << (j+1) * ( mesh.getDimensions().x() + 1 ) + i << " " << (j+1) * ( mesh.getDimensions().x() + 1 ) + i + 1 << std::endl;
-        }
-   }
- 
-   str << std::endl << "CELL_TYPES " << mesh.getDimensions().x() * mesh.getDimensions().y() << std::endl;
-   for (int i = 0; i < mesh.getDimensions().x()*mesh.getDimensions().y(); i++)
-   {
-       str << "8 " << std::endl;
-   }
- 
-   str << std::endl << "CELL_DATA " << entitiesCount << std::endl;
-   str << "VECTORS cellVectorFieldValues " << getType< RealType >() << std::endl;
-
-   for( MeshIndex i = 0; i < entitiesCount; i++ )
-   {
-      typename MeshType::Cell entity = mesh.template getEntity< typename MeshType::Cell >( i );
-      entity.refresh();
-      const VectorType v = vectorField.getElement( entity.getIndex() );
-      for( int i = 0; i < 3; i++ )
-      {
-         str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 );
-         if( i < 2 )
-            str << " ";
-      }
-      str << std::endl;
-   }
-
-   return true;
-}
-
-/****
- * 2D grid, faces
- */
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-void
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 1, Real > > >::
-writeHeader( const VectorFieldType& vectorField,
-             std::ostream& str )
-{
-    const MeshType& mesh = vectorField.getMesh();
-    const typename MeshType::PointType& origin = mesh.getOrigin();
-    const typename MeshType::PointType& proportions = mesh.getProportions();
-    str << "# vtk DataFile Version 2.0" << std::endl;
-    str << "TNL DATA" << std::endl;
-    str << "ASCII" << std::endl;
-    str << "DATASET UNSTRUCTURED_GRID" << std::endl;
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-bool
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 1, Real > > >::
-write( const VectorFieldType& vectorField,
-       std::ostream& str,
-       const double& scale )
-{
-   typedef typename MeshType::template EntityType< 0 > Vertex;
-   typedef typename MeshType::template EntityType< 1 > Face;
-   writeHeader(vectorField, str);
- 
-   const MeshType& mesh = vectorField.getMesh();
-   const RealType originX = mesh.getOrigin().x();
-   const RealType spaceStepX = mesh.getSpaceSteps().x();
-   const RealType originY = mesh.getOrigin().y();
-   const RealType spaceStepY = mesh.getSpaceSteps().y();
-   const MeshIndex verticesCount = mesh.template getEntitiesCount< typename MeshType::Vertex >();
-   const MeshIndex entitiesCount = mesh.template getEntitiesCount< typename MeshType::Face >();
- 
-   str << "POINTS " << verticesCount << " float" << std::endl;
-   for (int j = 0; j < ( mesh.getDimensions().y() + 1); j++)
-   {
-        for (int i = 0; i < ( mesh.getDimensions().x() + 1 ); i++)
-        {
-             str << originX + i * spaceStepX << " " << originY + j * spaceStepY << " 0" << std::endl;
-        }
-   }
- 
-   str << std::endl << "CELLS " << entitiesCount << " " << entitiesCount * 3 << std::endl;
-   for (int j = 0; j < mesh.getDimensions().y(); j++)
-   {
-        for (int i = 0; i < ( mesh.getDimensions().x() + 1 ); i++)
-        {
-            str << "2 " << j * ( mesh.getDimensions().x() + 1 ) + i << " " << (j+1) * ( mesh.getDimensions().x() + 1 ) + i << std::endl;
-        }
-   }
- 
-   for (int j = 0; j < (mesh.getDimensions().y()+1); j++)
-   {
-        for (int i = 0; i < mesh.getDimensions().x(); i++)
-        {
-            str << "2 " << j * ( mesh.getDimensions().x() + 1 ) + i << " " <<j * ( mesh.getDimensions().x() + 1 ) + i + 1<< std::endl;
-        }
-   }
- 
-   str << std::endl << "CELL_TYPES " << entitiesCount << std::endl;
-   for (int i = 0; i < entitiesCount; i++)
-   {
-       str << "3" << std::endl;
-   }
- 
-   str << std::endl << "CELL_DATA " << entitiesCount << std::endl;
-   str << "VECTORS FaceslVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl;
-
-   for( MeshIndex i = 0; i < entitiesCount; i++ )
-   {
-      typename MeshType::Face entity = mesh.template getEntity< typename MeshType::Face >( i );
-      entity.refresh();
-      const VectorType v = vectorField.getElement( entity.getIndex() );
-      for( int i = 0; i < 3; i++ )
-         str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " ";
-      str << std::endl;
-   }
-
-   return true;
-}
-
-/****
- * 2D grid, vertices
- */
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-void
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 0, Real > > >::
-writeHeader( const VectorFieldType& vectorField,
-             std::ostream& str )
-{
-    const MeshType& mesh = vectorField.getMesh();
-    const typename MeshType::PointType& origin = mesh.getOrigin();
-    const typename MeshType::PointType& proportions = mesh.getProportions();
-    str << "# vtk DataFile Version 2.0" << std::endl;
-    str << "TNL DATA" << std::endl;
-    str << "ASCII" << std::endl;
-    str << "DATASET UNSTRUCTURED_GRID" << std::endl;
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-bool
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 0, Real > > >::
-write( const VectorFieldType& vectorField,
-       std::ostream& str,
-       const double& scale )
-{
-   typedef typename MeshType::template EntityType< 0 > Vertex;
-   writeHeader(vectorField, str);
- 
-   const MeshType& mesh = vectorField.getMesh();
-   const RealType originX = mesh.getOrigin().x();
-   const RealType spaceStepX = mesh.getSpaceSteps().x();
-   const RealType originY = mesh.getOrigin().y();
-   const RealType spaceStepY = mesh.getSpaceSteps().y();
-   const MeshIndex verticesCount = mesh.template getEntitiesCount< typename MeshType::Vertex >();
- 
-   str << "POINTS " << verticesCount << " float" << std::endl;
-   for (int j = 0; j < ( mesh.getDimensions().y() + 1); j++)
-   {
-        for (int i = 0; i < ( mesh.getDimensions().x() + 1 ); i++)
-        {
-             str << originX + i * spaceStepX << " " << originY + j * spaceStepY << " 0" << std::endl;
-        }
-   }
- 
-   str << std::endl << "CELLS " << verticesCount << " " << verticesCount * 2 << std::endl;
-   for (int j = 0; j < ( mesh.getDimensions().y() + 1 ); j++)
-   {
-        for (int i = 0; i < ( mesh.getDimensions().x() + 1 ); i++)
-        {
-            str << "1 " << j * mesh.getDimensions().x() + i  << std::endl;
-        }
-   }
- 
-   str << std::endl << "CELL_TYPES " << verticesCount << std::endl;
-   for (int i = 0; i < verticesCount; i++)
-   {
-       str << "1" << std::endl;
-   }
- 
-   str << std::endl << "CELL_DATA " << verticesCount << std::endl;
-   str << "VECTORS VerticesVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl;
-
-   for( MeshIndex i = 0; i < verticesCount; i++ )
-   {
-      typename MeshType::Vertex entity = mesh.template getEntity< typename MeshType::Vertex >( i );
-      entity.refresh();
-      const VectorType v = vectorField.getElement( entity.getIndex() );
-      for( int i = 0; i < 3; i++ )
-         str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " ";
-      str << std::endl;
-   }
-
-   return true;
-}
-
-/****
- * 3D grid, cells
- */
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-void
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 3, Real > > >::
-writeHeader( const VectorFieldType& vectorField,
-             std::ostream& str )
-{
-    const MeshType& mesh = vectorField.getMesh();
-    const typename MeshType::PointType& origin = mesh.getOrigin();
-    const typename MeshType::PointType& proportions = mesh.getProportions();
-    str << "# vtk DataFile Version 2.0" << std::endl;
-    str << "TNL DATA" << std::endl;
-    str << "ASCII" << std::endl;
-    str << "DATASET UNSTRUCTURED_GRID" << std::endl;
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-bool
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 3, Real > > >::
-write( const VectorFieldType& vectorField,
-       std::ostream& str,
-       const double& scale )
-{
-   writeHeader(vectorField, str);
- 
-   const MeshType& mesh = vectorField.getMesh();
-   const RealType originX = mesh.getOrigin().x();
-   const RealType spaceStepX = mesh.getSpaceSteps().x();
-   const RealType originY = mesh.getOrigin().y();
-   const RealType spaceStepY = mesh.getSpaceSteps().y();
-   const RealType originZ = mesh.getOrigin().z();
-   const RealType spaceStepZ = mesh.getSpaceSteps().z();
-   const MeshIndex verticesCount = mesh.template getEntitiesCount< typename MeshType::Vertex >();
-   const MeshIndex entitiesCount = mesh.template getEntitiesCount< typename MeshType::Cell >();
- 
-   str << "POINTS " << verticesCount << " float" << std::endl;
-   for (int k = 0; k <= mesh.getDimensions().y(); k++)
-   {
-       for (int j = 0; j <= mesh.getDimensions().y(); j++)
-       {
-            for (int i = 0; i <= mesh.getDimensions().x(); i++)
-            {
-                 str << originX + i * spaceStepX << " " << originY + j * spaceStepY << " " <<
-                        originZ + k * spaceStepZ << std::endl;
-            }
-       }
-   }
- 
-   str << std::endl << "CELLS " << entitiesCount << " " <<
-          entitiesCount * 9 << std::endl;
-   for (int k = 0; k < mesh.getDimensions().z(); k++)
-   {
-        for (int j = 0; j < mesh.getDimensions().y(); j++)
-        {
-            for (int i = 0; i < mesh.getDimensions().x(); i++)
-            {
-                str << "8 " <<  k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i + 1 << " "
-                    << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i + 1 << " "
-                    << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i + 1 << " "
-                    << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i + 1 << std::endl;
-            }
-        }
-   }
- 
-   str << std::endl << "CELL_TYPES " << entitiesCount << std::endl;
-   for (int i = 0; i < entitiesCount; i++)
-   {
-       str << "11" << std::endl;
-   }
- 
-   str << std::endl << "CELL_DATA " << entitiesCount << std::endl;
-   str << "VECTORS cellVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl;
-
-   for( MeshIndex i = 0; i < entitiesCount; i++ )
-   {
-      typename MeshType::Cell entity = mesh.template getEntity< typename MeshType::Cell >( i );
-      entity.refresh();
-      const VectorType v = vectorField.getElement( entity.getIndex() );
-      for( int i = 0; i < 3; i++ )
-         str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " ";
-      str << std::endl;
-   }
-
-   return true;
-}
-
-/****
- * 3D grid, faces
- */
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-void
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 2, Real > > >::
-writeHeader( const VectorFieldType& vectorField,
-             std::ostream& str )
-{
-    const MeshType& mesh = vectorField.getMesh();
-    const typename MeshType::PointType& origin = mesh.getOrigin();
-    const typename MeshType::PointType& proportions = mesh.getProportions();
-    str << "# vtk DataFile Version 2.0" << std::endl;
-    str << "TNL DATA" << std::endl;
-    str << "ASCII" << std::endl;
-    str << "DATASET UNSTRUCTURED_GRID" << std::endl;
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-bool
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 2, Real > > >::
-write( const VectorFieldType& vectorField,
-       std::ostream& str,
-       const double& scale )
-{
-   writeHeader(vectorField, str);
- 
-   const MeshType& mesh = vectorField.getMesh();
-   const RealType originX = mesh.getOrigin().x();
-   const RealType spaceStepX = mesh.getSpaceSteps().x();
-   const RealType originY = mesh.getOrigin().y();
-   const RealType spaceStepY = mesh.getSpaceSteps().y();
-   const RealType originZ = mesh.getOrigin().z();
-   const RealType spaceStepZ = mesh.getSpaceSteps().z();
-   const MeshIndex verticesCount = mesh.template getEntitiesCount< typename MeshType::Vertex >();
-   const MeshIndex entitiesCount = mesh.template getEntitiesCount< typename MeshType::Face >();
- 
-   str << "POINTS " << verticesCount << " float" << std::endl;
-   for (int k = 0; k <= mesh.getDimensions().y(); k++)
-   {
-       for (int j = 0; j <= mesh.getDimensions().y(); j++)
-       {
-            for (int i = 0; i <= mesh.getDimensions().x(); i++)
-            {
-                 str << originX + i * spaceStepX << " " << originY + j * spaceStepY << " " <<
-                        originZ + k * spaceStepZ << std::endl;
-            }
-       }
-   }
- 
-   str << std::endl << "CELLS " << entitiesCount << " " << entitiesCount * 5 << std::endl;
-   for (int k = 0; k < mesh.getDimensions().z(); k++)
-   {
-        for (int j = 0; j < mesh.getDimensions().y(); j++)
-        {
-            for (int i = 0; i <= mesh.getDimensions().x(); i++)
-            {
-                str << "4 " << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i << std::endl;
-            }
-        }
-   }
- 
-   for (int k = 0; k < mesh.getDimensions().z(); k++)
-   {
-        for (int j = 0; j <= mesh.getDimensions().y(); j++)
-        {
-            for (int i = 0; i < mesh.getDimensions().x(); i++)
-            {
-                str << "4 " << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i + 1 << " "
-                    << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i + 1 << std::endl;
-            }
-        }
-   }
- 
-   for (int k = 0; k <= mesh.getDimensions().z(); k++)
-   {
-        for (int j = 0; j < mesh.getDimensions().y(); j++)
-        {
-            for (int i = 0; i < mesh.getDimensions().x(); i++)
-            {
-                str << "4 " << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i + 1 << " "
-                    << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i + 1<< std::endl;
-            }
-        }
-   }
- 
-   str << std::endl << "CELL_TYPES " << entitiesCount << std::endl;
-   for (int i = 0; i < entitiesCount; i++)
-   {
-       str << "8" << std::endl;
-   }
- 
-   str << std::endl << "CELL_DATA " << entitiesCount << std::endl;
-   str << "VECTORS facesVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl;
-
-   for( MeshIndex i = 0; i < entitiesCount; i++ )
-   {
-      typename MeshType::Face entity = mesh.template getEntity< typename MeshType::Face >( i );
-      entity.refresh();
-      const VectorType v = vectorField.getElement( entity.getIndex() );
-      for( int i = 0; i < 3; i++ )
-         str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " ";
-      str << std::endl;
-   }
-
-   return true;
-}
-
-/****
- * 3D grid, edges
- */
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-void
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 1, Real > > >::
-writeHeader( const VectorFieldType& vectorField,
-             std::ostream& str )
-{
-    const MeshType& mesh = vectorField.getMesh();
-    const typename MeshType::PointType& origin = mesh.getOrigin();
-    const typename MeshType::PointType& proportions = mesh.getProportions();
-    str << "# vtk DataFile Version 2.0" << std::endl;
-    str << "TNL DATA" << std::endl;
-    str << "ASCII" << std::endl;
-    str << "DATASET UNSTRUCTURED_GRID" << std::endl;
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-bool
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 1, Real > > >::
-write( const VectorFieldType& vectorField,
-       std::ostream& str,
-       const double& scale )
-{
-   writeHeader(vectorField, str);
- 
-   const MeshType& mesh = vectorField.getMesh();
-   const RealType originX = mesh.getOrigin().x();
-   const RealType spaceStepX = mesh.getSpaceSteps().x();
-   const RealType originY = mesh.getOrigin().y();
-   const RealType spaceStepY = mesh.getSpaceSteps().y();
-   const RealType originZ = mesh.getOrigin().z();
-   const RealType spaceStepZ = mesh.getSpaceSteps().z();
-   const MeshIndex verticesCount = mesh.template getEntitiesCount< typename MeshType::Vertex >();
-   const MeshIndex entitiesCount = mesh.template getEntitiesCount< typename MeshType::Edge >();
- 
-   str << "POINTS " << verticesCount << " float" << std::endl;
-   for (int k = 0; k <= mesh.getDimensions().y(); k++)
-   {
-       for (int j = 0; j <= mesh.getDimensions().y(); j++)
-       {
-            for (int i = 0; i <= mesh.getDimensions().x(); i++)
-            {
-                 str << originX + i * spaceStepX << " " << originY + j * spaceStepY << " " <<
-                        originZ + k * spaceStepZ << std::endl;
-            }
-       }
-   }
- 
-   str << std::endl << "CELLS " << entitiesCount << " " << entitiesCount * 3 << std::endl;
-   for (int k = 0; k <= mesh.getDimensions().z(); k++)
-   {
-        for (int j = 0; j <= mesh.getDimensions().y(); j++)
-        {
-            for (int i = 0; i < mesh.getDimensions().x(); i++)
-            {
-                str << "3 " << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i + 1 << std::endl;
-            }
-        }
-   }
- 
-   for (int k = 0; k <= mesh.getDimensions().z(); k++)
-   {
-        for (int j = 0; j < mesh.getDimensions().y(); j++)
-        {
-            for (int i = 0; i <= mesh.getDimensions().x(); i++)
-            {
-                str << "3 " << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i << std::endl;
-            }
-        }
-   }
- 
-   for (int k = 0; k < mesh.getDimensions().z(); k++)
-   {
-        for (int j = 0; j <= mesh.getDimensions().y(); j++)
-        {
-            for (int i = 0; i <= mesh.getDimensions().x(); i++)
-            {
-                str << "3 " << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << std::endl;
-            }
-        }
-   }
- 
-   str << std::endl << "CELL_TYPES " << entitiesCount << std::endl;
-   for (int i = 0; i < entitiesCount; i++)
-   {
-       str << "3" << std::endl;
-   }
- 
-   str << std::endl << "CELL_DATA " << entitiesCount << std::endl;
-   str << "VECTORS edgesVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl;
-
-   for( MeshIndex i = 0; i < entitiesCount; i++ )
-   {
-      typename MeshType::Edge entity = mesh.template getEntity< typename MeshType::Edge >( i );
-      entity.refresh();
-      const VectorType v = vectorField.getElement( entity.getIndex() );
-      for( int i = 0; i < 3; i++ )
-         str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " ";
-      str << std::endl;
-   }
-
-   return true;
-}
-
-/****
- * 3D grid, vertices
- */
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-void
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 0, Real > > >::
-writeHeader( const VectorFieldType& vectorField,
-             std::ostream& str )
-{
-    const MeshType& mesh = vectorField.getMesh();
-    const typename MeshType::PointType& origin = mesh.getOrigin();
-    const typename MeshType::PointType& proportions = mesh.getProportions();
-    str << "# vtk DataFile Version 2.0" << std::endl;
-    str << "TNL DATA" << std::endl;
-    str << "ASCII" << std::endl;
-    str << "DATASET UNSTRUCTURED_GRID" << std::endl;
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-bool
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 0, Real > > >::
-write( const VectorFieldType& vectorField,
-       std::ostream& str,
-       const double& scale )
-{
-   writeHeader(vectorField, str);
- 
-   const MeshType& mesh = vectorField.getMesh();
-   const RealType originX = mesh.getOrigin().x();
-   const RealType spaceStepX = mesh.getSpaceSteps().x();
-   const RealType originY = mesh.getOrigin().y();
-   const RealType spaceStepY = mesh.getSpaceSteps().y();
-   const RealType originZ = mesh.getOrigin().z();
-   const RealType spaceStepZ = mesh.getSpaceSteps().z();
-   const MeshIndex verticesCount = mesh.template getEntitiesCount< typename MeshType::Vertex >();
- 
-   str << "POINTS " << verticesCount << " float" << std::endl;
-   for (int k = 0; k <= mesh.getDimensions().y(); k++)
-   {
-       for (int j = 0; j <= mesh.getDimensions().y(); j++)
-       {
-            for (int i = 0; i <= mesh.getDimensions().x(); i++)
-            {
-                 str << originX + i * spaceStepX << " " << originY + j * spaceStepY << " " <<
-                        originZ + k * spaceStepZ << std::endl;
-            }
-       }
-   }
- 
-   str << std::endl << "CELLS " << verticesCount << " " << verticesCount * 2 << std::endl;
-   for (int k = 0; k < ( mesh.getDimensions().z() + 1 ); k++)
-   {
-        for (int j = 0; j < ( mesh.getDimensions().y() + 1 ); j++)
-        {
-            for (int i = 0; i < ( mesh.getDimensions().x() + 1 ); i++)
-            {
-                str << "1 " << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i  << std::endl;
-            }
-        }
-   }
- 
-   str << std::endl << "CELL_TYPES " << verticesCount << std::endl;
-   for (int i = 0; i < verticesCount; i++)
-   {
-       str << "1" << std::endl;
-   }
- 
-   str << std::endl << "CELL_DATA " << verticesCount << std::endl;
-   str << "VECTORS verticesVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl;
-
-   for( MeshIndex i = 0; i < verticesCount; i++ )
-   {
-      typename MeshType::Vertex entity = mesh.template getEntity< typename MeshType::Vertex >( i );
-      entity.refresh();
-      const VectorType v = vectorField.getElement( entity.getIndex() );
-      for( int i = 0; i < 3; i++ )
-         str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " ";
-      str << std::endl;
-   }
-
-   return true;
-}
-
-} // namespace Functions
-} // namespace TNL
-- 
GitLab


From 826332e4e9793ae031714400155778b122e240f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Tue, 20 Aug 2019 19:56:23 +0200
Subject: [PATCH 04/35] Removed useless operator<< for TNL::String

The implementation for std::string (which is a base class of
TNL::String) is perfectly sufficient.
---
 src/TNL/String.h   | 9 ---------
 src/TNL/String.hpp | 6 ------
 2 files changed, 15 deletions(-)

diff --git a/src/TNL/String.h b/src/TNL/String.h
index a04802216..63924c27f 100644
--- a/src/TNL/String.h
+++ b/src/TNL/String.h
@@ -21,8 +21,6 @@
 
 namespace TNL {
 
-class String;
-
 /**
  * \brief Class for managing strings.
  *
@@ -39,8 +37,6 @@ class String;
  * 
  * \ref operator+
  * 
- * \ref operator<<
- * 
  * \ref mpiSend
  * 
  * \ref mpiReceive
@@ -368,11 +364,6 @@ String operator+( const char* string1, const String& string2 );
  */
 String operator+( const std::string& string1, const String& string2 );
 
-/**
- * \brief Writes the string \e str to given \e stream
- */
-std::ostream& operator<<( std::ostream& stream, const String& str );
-
 /**
  * \brief Converts \e value of type \e T to a String.
  * 
diff --git a/src/TNL/String.hpp b/src/TNL/String.hpp
index 4cdeee7ac..f9846fa6d 100644
--- a/src/TNL/String.hpp
+++ b/src/TNL/String.hpp
@@ -243,12 +243,6 @@ inline String operator+( const std::string& string1, const String& string2 )
    return String( string1 ) + string2;
 }
 
-inline std::ostream& operator<<( std::ostream& stream, const String& str )
-{
-   stream << str.getString();
-   return stream;
-}
-
 #ifdef HAVE_MPI
 inline void mpiSend( const String& str, int target, int tag, MPI_Comm mpi_comm )
 {
-- 
GitLab


From 203ee514b4bc3d9580f5836f8f993ebef878a072 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Tue, 20 Aug 2019 19:59:05 +0200
Subject: [PATCH 05/35] Removed custom implementation of std::make_unique which
 is available in STL since C++14

---
 src/TNL/Config/ConfigDescription.h  |  1 -
 src/TNL/Config/ParameterContainer.h |  1 -
 src/TNL/Config/make_unique.h        | 15 ---------------
 3 files changed, 17 deletions(-)
 delete mode 100644 src/TNL/Config/make_unique.h

diff --git a/src/TNL/Config/ConfigDescription.h b/src/TNL/Config/ConfigDescription.h
index dc32c1684..06ed1ba8a 100644
--- a/src/TNL/Config/ConfigDescription.h
+++ b/src/TNL/Config/ConfigDescription.h
@@ -14,7 +14,6 @@
 #include <string>
 #include <vector>
 #include <memory>
-#include "make_unique.h"
 
 #include <TNL/Assert.h>
 #include <TNL/String.h>
diff --git a/src/TNL/Config/ParameterContainer.h b/src/TNL/Config/ParameterContainer.h
index b298234d8..b8315d3ec 100644
--- a/src/TNL/Config/ParameterContainer.h
+++ b/src/TNL/Config/ParameterContainer.h
@@ -12,7 +12,6 @@
 
 #include <vector>
 #include <memory>
-#include "make_unique.h"
 
 #include <TNL/param-types.h>
 //#include <TNL/Debugging/StackBacktrace.h>
diff --git a/src/TNL/Config/make_unique.h b/src/TNL/Config/make_unique.h
deleted file mode 100644
index 4a4078a02..000000000
--- a/src/TNL/Config/make_unique.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#pragma once
-
-// std::make_unique does not exist until C++14
-// https://stackoverflow.com/a/9657991
-#if __cplusplus < 201402L
-#include <memory>
-
-namespace std {
-   template<typename T, typename ...Args>
-   std::unique_ptr<T> make_unique( Args&& ...args )
-   {
-      return std::unique_ptr<T>( new T( std::forward<Args>(args)... ) );
-   }
-}
-#endif
-- 
GitLab


From 5910a5e8cae4d2f05ae6868e875afaf416a888d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Wed, 21 Aug 2019 08:58:04 +0200
Subject: [PATCH 06/35] Reimplemented getType() function using typeid operator
 and removed useless getType() methods

Fixes #46
---
 .../Examples/ObjectExample_getType.cpp        |  27 +--
 src/Benchmarks/BLAS/spmv.h                    |   2 +-
 .../HeatEquation/BenchmarkLaplace.h           |   6 -
 .../HeatEquation/BenchmarkLaplace_impl.h      |  45 -----
 .../HeatEquationBenchmarkProblem.h            |   2 -
 .../HeatEquationBenchmarkProblem_impl.h       |  12 --
 src/Benchmarks/HeatEquation/tnlTestGrid2D.h   |  56 +-----
 src/Benchmarks/ODESolvers/Euler.h             |   2 -
 src/Benchmarks/ODESolvers/Euler.hpp           |   8 -
 src/Benchmarks/ODESolvers/Merson.h            |   2 -
 src/Benchmarks/ODESolvers/Merson.hpp          |   8 -
 src/Examples/flow-sw/LaxFridrichsContinuity.h |   8 -
 src/Examples/flow-sw/LaxFridrichsEnergy.h     |   8 -
 src/Examples/flow-sw/LaxFridrichsMomentumX.h  |  25 ---
 src/Examples/flow-sw/LaxFridrichsMomentumY.h  |  25 ---
 src/Examples/flow-sw/LaxFridrichsMomentumZ.h  |  25 ---
 src/Examples/flow-sw/UpwindContinuity.h       |   8 -
 src/Examples/flow-sw/UpwindEnergy.h           |   8 -
 src/Examples/flow-sw/UpwindMomentumX.h        |  25 ---
 src/Examples/flow-sw/UpwindMomentumY.h        |  25 ---
 src/Examples/flow-sw/UpwindMomentumZ.h        |  25 ---
 src/Examples/flow-sw/navierStokesProblem.h    |   2 -
 .../flow-sw/navierStokesProblem_impl.h        |  12 --
 src/Examples/flow-vl/LaxFridrichsContinuity.h |   8 -
 src/Examples/flow-vl/LaxFridrichsEnergy.h     |   8 -
 src/Examples/flow-vl/LaxFridrichsMomentumX.h  |  25 ---
 src/Examples/flow-vl/LaxFridrichsMomentumY.h  |  25 ---
 src/Examples/flow-vl/LaxFridrichsMomentumZ.h  |  25 ---
 src/Examples/flow-vl/UpwindContinuity.h       |   8 -
 src/Examples/flow-vl/UpwindEnergy.h           |   8 -
 src/Examples/flow-vl/UpwindMomentumX.h        |  25 ---
 src/Examples/flow-vl/UpwindMomentumY.h        |  25 ---
 src/Examples/flow-vl/UpwindMomentumZ.h        |  25 ---
 src/Examples/flow-vl/navierStokesProblem.h    |   2 -
 .../flow-vl/navierStokesProblem_impl.h        |  12 --
 src/Examples/flow/LaxFridrichsContinuity.h    |   8 -
 .../flow/LaxFridrichsContinuityEuler.h        |   8 -
 src/Examples/flow/LaxFridrichsEnergy.h        |   8 -
 src/Examples/flow/LaxFridrichsEnergyEuler.h   |   8 -
 src/Examples/flow/LaxFridrichsMomentumX.h     |  25 ---
 .../flow/LaxFridrichsMomentumXEuler.h         |  25 ---
 src/Examples/flow/LaxFridrichsMomentumY.h     |  25 ---
 .../flow/LaxFridrichsMomentumYEuler.h         |  25 ---
 src/Examples/flow/LaxFridrichsMomentumZ.h     |  25 ---
 .../flow/LaxFridrichsMomentumZEuler.h         |  25 ---
 src/Examples/flow/navierStokesProblem.h       |   2 -
 src/Examples/flow/navierStokesProblem_impl.h  |  12 --
 .../inviscid-flow-sw/LaxFridrichsContinuity.h |   8 -
 .../inviscid-flow-sw/LaxFridrichsEnergy.h     |   8 -
 .../inviscid-flow-sw/LaxFridrichsMomentumX.h  |  25 ---
 .../inviscid-flow-sw/LaxFridrichsMomentumY.h  |  25 ---
 .../inviscid-flow-sw/LaxFridrichsMomentumZ.h  |  25 ---
 .../inviscid-flow-sw/UpwindContinuity.h       |   8 -
 src/Examples/inviscid-flow-sw/UpwindEnergy.h  |   8 -
 .../inviscid-flow-sw/UpwindMomentumX.h        |  25 ---
 .../inviscid-flow-sw/UpwindMomentumY.h        |  25 ---
 .../inviscid-flow-sw/UpwindMomentumZ.h        |  25 ---
 src/Examples/inviscid-flow-sw/eulerProblem.h  |   2 -
 .../inviscid-flow-sw/eulerProblem_impl.h      |  12 --
 .../inviscid-flow-vl/LaxFridrichsContinuity.h |   8 -
 .../inviscid-flow-vl/LaxFridrichsEnergy.h     |   8 -
 .../inviscid-flow-vl/LaxFridrichsMomentumX.h  |  25 ---
 .../inviscid-flow-vl/LaxFridrichsMomentumY.h  |  25 ---
 .../inviscid-flow-vl/LaxFridrichsMomentumZ.h  |  25 ---
 .../inviscid-flow-vl/UpwindContinuity.h       |   8 -
 src/Examples/inviscid-flow-vl/UpwindEnergy.h  |   8 -
 .../inviscid-flow-vl/UpwindMomentumX.h        |  25 ---
 .../inviscid-flow-vl/UpwindMomentumY.h        |  25 ---
 .../inviscid-flow-vl/UpwindMomentumZ.h        |  25 ---
 src/Examples/inviscid-flow-vl/eulerProblem.h  |   2 -
 .../inviscid-flow-vl/eulerProblem_impl.h      |  12 --
 .../inviscid-flow/2d/Euler2DVelXGetter.h      |   2 -
 .../inviscid-flow/3d/Euler2DVelXGetter.h      |   2 -
 .../inviscid-flow/3d/EulerPressureGetter.h    |   2 -
 .../inviscid-flow/3d/EulerVelGetter.h         |   2 -
 .../3d/LaxFridrichsContinuity_impl .h         |  45 -----
 .../inviscid-flow/3d/LaxFridrichsEnergy.h     |   3 -
 .../inviscid-flow/3d/LaxFridrichsMomentumX.h  |   3 -
 .../inviscid-flow/3d/LaxFridrichsMomentumZ.h  |   3 -
 src/Examples/inviscid-flow/3d/eulerProblem.h  |   2 -
 .../inviscid-flow/3d/eulerProblem_impl.h      |  11 --
 .../inviscid-flow/LaxFridrichsContinuity.h    |   8 -
 .../inviscid-flow/LaxFridrichsEnergy.h        |   8 -
 .../inviscid-flow/LaxFridrichsMomentumX.h     |  25 ---
 .../inviscid-flow/LaxFridrichsMomentumY.h     |  25 ---
 .../inviscid-flow/LaxFridrichsMomentumZ.h     |  25 ---
 src/Examples/inviscid-flow/eulerProblem.h     |   2 -
 .../inviscid-flow/eulerProblem_impl.h         |  12 --
 .../navier-stokes/navierStokesSetter_impl.h   |   2 +-
 .../navier-stokes/navierStokesSolver.h        |   2 -
 .../navier-stokes/navierStokesSolver_impl.h   |   7 -
 .../transportEquationProblem.h                |   2 -
 .../transportEquationProblemEoc.h             |   2 -
 .../transportEquationProblemEoc_impl.h        |  12 --
 .../transportEquationProblem_impl.h           |  12 --
 src/Python/pytnl/tnl/Array.h                  |   2 -
 src/Python/pytnl/tnl/Grid.h                   |   2 -
 src/Python/pytnl/tnl/Mesh.h                   |   2 -
 src/Python/pytnl/tnl/SparseMatrix.h           |   2 -
 src/Python/pytnl/tnl/StaticVector.h           |   1 -
 src/Python/pytnl/tnl/String.cpp               |   1 -
 src/Python/pytnl/tnl/Vector.h                 |   2 -
 src/TNL/Atomic.h                              |  17 --
 src/TNL/Config/ConfigDescription.h            |   1 -
 src/TNL/Config/ConfigEntry.h                  |   1 +
 src/TNL/Config/ConfigEntryList.h              |   1 +
 src/TNL/Config/ParameterContainer.h           |   3 +-
 src/TNL/Config/parseCommandLine.h             |   1 -
 src/TNL/Containers/Algorithms/ArrayIO.h       |  12 +-
 src/TNL/Containers/Array.h                    |  10 --
 src/TNL/Containers/Array.hpp                  |  27 +--
 src/TNL/Containers/ArrayView.h                |   5 -
 src/TNL/Containers/ArrayView.hpp              |  15 +-
 src/TNL/Containers/DistributedArray.h         |   7 -
 src/TNL/Containers/DistributedArray.hpp       |  27 ---
 src/TNL/Containers/DistributedArrayView.h     |   3 -
 src/TNL/Containers/DistributedArrayView.hpp   |  17 --
 src/TNL/Containers/DistributedVector.h        |   5 -
 src/TNL/Containers/DistributedVector.hpp      |  28 ---
 src/TNL/Containers/DistributedVectorView.h    |   2 -
 src/TNL/Containers/DistributedVectorView.hpp  |  16 --
 src/TNL/Containers/List.h                     |   5 +-
 src/TNL/Containers/List_impl.h                |   6 -
 .../Multimaps/EllpackIndexMultimap.h          |   4 -
 .../Multimaps/EllpackIndexMultimap.hpp        |  28 ---
 .../Multimaps/StaticEllpackIndexMultimap.h    |   4 -
 .../Multimaps/StaticEllpackIndexMultimap.hpp  |  30 ----
 src/TNL/Containers/StaticArray.h              |   4 -
 src/TNL/Containers/StaticArray.hpp            |  12 +-
 src/TNL/Containers/StaticVector.h             |   5 -
 src/TNL/Containers/StaticVector.hpp           |  10 --
 src/TNL/Containers/Subrange.h                 |   9 +-
 src/TNL/Containers/Vector.h                   |  10 --
 src/TNL/Containers/Vector.hpp                 |  25 ---
 src/TNL/Containers/VectorView.h               |   5 -
 src/TNL/Containers/VectorView.hpp             |  13 --
 src/TNL/Devices/Cuda.h                        |   2 -
 src/TNL/Devices/Cuda_impl.h                   |   5 -
 src/TNL/Devices/Host.h                        |   5 -
 src/TNL/Experimental/Arithmetics/Quad.h       |   2 -
 src/TNL/Experimental/Arithmetics/Quad_impl.h  |   8 -
 src/TNL/Functions/Analytic/Blob.h             |   6 -
 src/TNL/Functions/Analytic/Blob_impl.h        |  21 ---
 src/TNL/Functions/Analytic/Cylinder.h         |   6 -
 src/TNL/Functions/Analytic/Cylinder_impl.h    |  22 ---
 src/TNL/Functions/Analytic/ExpBump.h          |   7 -
 src/TNL/Functions/Analytic/ExpBump_impl.h     |  21 ---
 src/TNL/Functions/Analytic/Flowerpot.h        |   6 -
 src/TNL/Functions/Analytic/Flowerpot_impl.h   |  21 ---
 src/TNL/Functions/Analytic/PseudoSquare.h     |   6 -
 .../Functions/Analytic/PseudoSquare_impl.h    |  21 ---
 src/TNL/Functions/Analytic/Twins.h            |   6 -
 src/TNL/Functions/Analytic/Twins_impl.h       |  21 ---
 src/TNL/Functions/Analytic/VectorNorm.h       |   6 -
 src/TNL/Functions/MeshFunction.h              |   4 -
 src/TNL/Functions/MeshFunction_impl.h         |  32 +---
 src/TNL/Functions/VectorField.h               |  13 --
 .../Functions/VectorFieldGnuplotWriter_impl.h |   2 +-
 src/TNL/Images/DicomSeries.h                  |   7 +-
 src/TNL/Logger.h                              |   1 +
 src/TNL/Matrices/AdEllpack.h                  |   4 -
 src/TNL/Matrices/AdEllpack_impl.h             |  22 +--
 src/TNL/Matrices/BiEllpack.h                  |   4 -
 src/TNL/Matrices/BiEllpackSymmetric.h         |   4 -
 src/TNL/Matrices/BiEllpackSymmetric_impl.h    |  22 ---
 src/TNL/Matrices/BiEllpack_impl.h             |  22 ---
 src/TNL/Matrices/COOMatrix.h                  |   4 -
 src/TNL/Matrices/COOMatrix_impl.h             |  22 +--
 src/TNL/Matrices/CSR.h                        |   4 -
 src/TNL/Matrices/CSR_impl.h                   |  22 +--
 src/TNL/Matrices/ChunkedEllpack.h             |   7 -
 src/TNL/Matrices/ChunkedEllpack_impl.h        |  26 +--
 src/TNL/Matrices/Dense.h                      |   4 -
 src/TNL/Matrices/Dense_impl.h                 |  24 +--
 src/TNL/Matrices/DistributedMatrix.h          |   7 -
 src/TNL/Matrices/DistributedMatrix_impl.h     |  22 ---
 src/TNL/Matrices/Ellpack.h                    |   4 -
 src/TNL/Matrices/EllpackSymmetric.h           |   4 -
 src/TNL/Matrices/EllpackSymmetricGraph.h      |   4 -
 src/TNL/Matrices/EllpackSymmetricGraph_impl.h |  20 ---
 src/TNL/Matrices/EllpackSymmetric_impl.h      |  20 ---
 src/TNL/Matrices/Ellpack_impl.h               |  26 +--
 src/TNL/Matrices/Multidiagonal.h              |   4 -
 src/TNL/Matrices/Multidiagonal_impl.h         |  22 +--
 src/TNL/Matrices/SlicedEllpack.h              |   4 -
 src/TNL/Matrices/SlicedEllpackSymmetric.h     |   4 -
 .../Matrices/SlicedEllpackSymmetricGraph.h    |   4 -
 .../SlicedEllpackSymmetricGraph_impl.h        |  22 ---
 .../Matrices/SlicedEllpackSymmetric_impl.h    |  22 ---
 src/TNL/Matrices/SlicedEllpack_impl.h         |  28 +--
 src/TNL/Matrices/Tridiagonal.h                |   4 -
 src/TNL/Matrices/Tridiagonal_impl.h           |  24 +--
 src/TNL/Meshes/DefaultConfig.h                |  13 +-
 src/TNL/Meshes/GridDetails/Grid1D.h           |  10 --
 src/TNL/Meshes/GridDetails/Grid1D_impl.h      |  26 +--
 src/TNL/Meshes/GridDetails/Grid2D.h           |  10 --
 src/TNL/Meshes/GridDetails/Grid2D_impl.h      |  24 +--
 src/TNL/Meshes/GridDetails/Grid3D.h           |  10 --
 src/TNL/Meshes/GridDetails/Grid3D_impl.h      |  24 +--
 src/TNL/Meshes/Mesh.h                         |   4 -
 .../MeshEntityReferenceOrientation.h          |   7 +-
 src/TNL/Meshes/MeshDetails/MeshEntity_impl.h  |  22 +--
 src/TNL/Meshes/MeshDetails/Mesh_impl.h        |  18 +-
 .../initializer/EntityInitializer.h           |   4 -
 .../MeshDetails/initializer/EntitySeed.h      |   4 -
 src/TNL/Meshes/MeshEntity.h                   |   8 +-
 src/TNL/Meshes/Topologies/Edge.h              |   7 +-
 src/TNL/Meshes/Topologies/Hexahedron.h        |   5 -
 src/TNL/Meshes/Topologies/Quadrilateral.h     |   5 -
 src/TNL/Meshes/Topologies/Simplex.h           |  10 +-
 src/TNL/Meshes/Topologies/Tetrahedron.h       |   5 -
 src/TNL/Meshes/Topologies/Triangle.h          |   5 -
 src/TNL/Meshes/Topologies/Vertex.h            |   5 -
 .../TypeResolver/MeshTypeResolver_impl.h      |   4 +-
 .../Meshes/TypeResolver/TypeResolver_impl.h   |   2 +-
 src/TNL/Object.h                              |  27 +--
 src/TNL/Object.hpp                            |  10 --
 src/TNL/Operators/Advection/LaxFridrichs.h    |   6 -
 src/TNL/Operators/Advection/Upwind.h          |   6 -
 .../Operators/ExactFunctionInverseOperator.h  |   6 -
 src/TNL/Operators/ExactIdentityOperator.h     |   6 -
 src/TNL/Operators/FunctionInverseOperator.h   |   5 -
 .../diffusion/ExactLinearDiffusion.h          |   6 -
 .../diffusion/ExactLinearDiffusion_impl.h     |  21 ---
 .../Operators/diffusion/ExactMeanCurvature.h  |   7 -
 .../diffusion/ExactNonlinearDiffusion.h       |  15 --
 .../diffusion/FiniteVolumeNonlinearOperator.h |   6 -
 .../FiniteVolumeNonlinearOperator_impl.h      |  51 ------
 src/TNL/Operators/diffusion/LinearDiffusion.h |   6 -
 .../diffusion/LinearDiffusion_impl.h          |  46 -----
 .../diffusion/NonlinearDiffusion_impl.h       |  51 ------
 .../diffusion/OneSidedMeanCurvature.h         |   8 -
 .../diffusion/OneSidedNonlinearDiffusion.h    |  27 ---
 src/TNL/Operators/euler/fvm/LaxFridrichs.h    |   2 -
 .../Operators/euler/fvm/LaxFridrichs_impl.h   |  13 --
 .../Operators/fdm/BackwardFiniteDifference.h  |  11 --
 .../Operators/fdm/CentralFiniteDifference.h   |  12 --
 src/TNL/Operators/fdm/ExactDifference.h       |   9 -
 .../Operators/fdm/ForwardFiniteDifference.h   |  12 --
 .../Operators/geometric/CoFVMGradientNorm.h   |  35 ----
 .../Operators/geometric/ExactGradientNorm.h   |  15 --
 src/TNL/Operators/geometric/FDMGradientNorm.h |  26 ---
 .../geometric/TwoSidedGradientNorm.h          |  26 ---
 .../operator-Q/tnlFiniteVolumeOperatorQ.h     |  12 --
 .../tnlFiniteVolumeOperatorQ_impl.h           |  90 ----------
 .../operator-Q/tnlOneSideDiffOperatorQ.h      |   6 -
 .../operator-Q/tnlOneSideDiffOperatorQ_impl.h |  45 -----
 .../ExactOperatorCurvature.h                  |   6 -
 .../ExactOperatorCurvature_impl.h             |  24 ---
 src/TNL/Pointers/DevicePointer.h              |   5 +-
 src/TNL/Pointers/SharedPointer.h              |  22 +--
 src/TNL/Pointers/SharedPointerCuda.h          |  41 ++---
 src/TNL/Pointers/SharedPointerHost.h          |   4 +-
 src/TNL/Pointers/UniquePointer.h              |   2 +-
 src/TNL/Problems/HeatEquationEocProblem.h     |   2 -
 .../Problems/HeatEquationEocProblem_impl.h    |  14 +-
 src/TNL/Problems/HeatEquationProblem.h        |   2 -
 src/TNL/Problems/HeatEquationProblem_impl.h   |  12 --
 .../Problems/MeanCurvatureFlowEocProblem.h    |   2 -
 .../MeanCurvatureFlowEocProblem_impl.h        |  13 +-
 src/TNL/Problems/MeanCurvatureFlowProblem.h   |   2 -
 .../Problems/MeanCurvatureFlowProblem_impl.h  |  11 --
 src/TNL/Problems/PDEProblem.h                 |   2 -
 src/TNL/Problems/PDEProblem_impl.h            |  16 --
 .../cfd/navier-stokes/NavierStokesSolver.h    |   4 +-
 .../navier-stokes/NavierStokesSolver_impl.h   |  10 --
 src/TNL/Solvers/Linear/BICGStab.h             |   2 -
 src/TNL/Solvers/Linear/BICGStabL.h            |   2 -
 src/TNL/Solvers/Linear/BICGStabL_impl.h       |  10 --
 src/TNL/Solvers/Linear/BICGStab_impl.h        |   8 -
 src/TNL/Solvers/Linear/CG.h                   |   2 -
 src/TNL/Solvers/Linear/CG_impl.h              |   8 -
 src/TNL/Solvers/Linear/GMRES.h                |   2 -
 src/TNL/Solvers/Linear/GMRES_impl.h           |  10 --
 src/TNL/Solvers/Linear/Jacobi.h               |   5 -
 .../Solvers/Linear/Preconditioners/Diagonal.h |  10 --
 src/TNL/Solvers/Linear/Preconditioners/ILU0.h |   8 +-
 src/TNL/Solvers/Linear/Preconditioners/ILUT.h |   5 -
 .../Linear/Preconditioners/Preconditioner.h   |   5 -
 src/TNL/Solvers/Linear/SOR.h                  |   2 -
 src/TNL/Solvers/Linear/SOR_impl.h             |   8 -
 src/TNL/Solvers/Linear/TFQMR.h                |   2 -
 src/TNL/Solvers/Linear/TFQMR_impl.h           |   8 -
 src/TNL/Solvers/Linear/UmfpackWrapper.h       |   2 -
 src/TNL/Solvers/ODE/Euler.h                   |   2 -
 src/TNL/Solvers/ODE/Euler.hpp                 |   8 -
 src/TNL/Solvers/ODE/Merson.h                  |   2 -
 src/TNL/Solvers/ODE/Merson_impl.h             |   8 -
 src/TNL/Solvers/PDE/ExplicitTimeStepper.h     |   2 -
 .../Solvers/PDE/ExplicitTimeStepper_impl.h    |  17 +-
 src/TNL/String.h                              |   5 -
 src/TNL/String.hpp                            |   5 -
 src/TNL/TypeInfo.h                            | 107 ++++++++++++
 src/TNL/TypeTraits.h                          |  28 ++-
 src/TNL/param-types.h                         |  91 ----------
 src/Tools/tnl-lattice-init.h                  |   4 +-
 .../operator-grid-specialization.h.in         |   2 -
 .../operator-grid-specialization_impl.h.in    |  15 --
 src/Tools/tnl-quickstart/problem.h.in         |   2 -
 src/Tools/tnl-quickstart/problem_impl.h.in    |  12 --
 src/Tools/tnl-view.h                          |   4 +-
 src/UnitTests/CMakeLists.txt                  |   5 +
 src/UnitTests/Containers/ArrayTest.h          |   5 -
 src/UnitTests/Containers/ArrayViewTest.h      |   5 -
 src/UnitTests/Containers/ListTest.cpp         |   5 -
 src/UnitTests/Meshes/BoundaryTagsTest.h       |   3 +-
 src/UnitTests/Meshes/MeshEntityTest.h         |  18 +-
 src/UnitTests/Meshes/MeshTest.h               |  15 +-
 src/UnitTests/TypeInfoTest.cpp                | 165 ++++++++++++++++++
 309 files changed, 468 insertions(+), 3652 deletions(-)
 create mode 100644 src/TNL/TypeInfo.h
 delete mode 100644 src/TNL/param-types.h
 create mode 100644 src/UnitTests/TypeInfoTest.cpp

diff --git a/Documentation/Examples/ObjectExample_getType.cpp b/Documentation/Examples/ObjectExample_getType.cpp
index 7cc7476d6..7c45a167d 100644
--- a/Documentation/Examples/ObjectExample_getType.cpp
+++ b/Documentation/Examples/ObjectExample_getType.cpp
@@ -1,5 +1,5 @@
 #include <iostream>
-#include <TNL/param-types.h>
+#include <TNL/TypeInfo.h>
 #include <TNL/Object.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
@@ -13,24 +13,12 @@ class MyArray : public Object
 {
    public:
 
-      using HostType = MyArray< Value, Devices::Host >;
-      
-      static String getType()
-      {
-         return "MyArray< " + TNL::getType< Value >() + ", " + TNL::getType< Device >() + " >";
-      }
-
-      String getTypeVirtual() const
-      {
-         return getType();
-      }
-
       static String getSerializationType()
       {
-         return HostType::getType();
+         return "MyArray< " + TNL::getType< Value >() + ", " + getType< Devices::Host >() + " >";
       }
 
-      String getSerializationTypeVirtual() const
+      virtual String getSerializationTypeVirtual() const override
       {
          return getSerializationType();
       }
@@ -47,11 +35,11 @@ int main()
    Object* cudaArrayPtr = &cudaArray;
 
    // Object types
-   cout << "HostArray type is                  " << HostArray::getType() << endl;
-   cout << "hostArrayPtr type is               " << hostArrayPtr->getTypeVirtual() << endl;
+   cout << "HostArray type is                  " << getType< HostArray >() << endl;
+   cout << "hostArrayPtr type is               " << getType( *hostArrayPtr ) << endl;
 
-   cout << "CudaArray type is                  " << CudaArray::getType() << endl;
-   cout << "cudaArrayPtr type is               " << cudaArrayPtr->getTypeVirtual() << endl;
+   cout << "CudaArray type is                  " << getType< CudaArray >() << endl;
+   cout << "cudaArrayPtr type is               " << getType( *cudaArrayPtr ) << endl;
 
    // Object serialization types
    cout << "HostArray serialization type is    " << HostArray::getSerializationType() << endl;
@@ -60,4 +48,3 @@ int main()
    cout << "CudaArray serialization type is    " << CudaArray::getSerializationType() << endl;
    cout << "cudaArrayPtr serialization type is " << cudaArrayPtr->getSerializationTypeVirtual() << endl;
 }
-
diff --git a/src/Benchmarks/BLAS/spmv.h b/src/Benchmarks/BLAS/spmv.h
index b6c91a247..9fe469509 100644
--- a/src/Benchmarks/BLAS/spmv.h
+++ b/src/Benchmarks/BLAS/spmv.h
@@ -109,7 +109,7 @@ benchmarkSpMV( Benchmark & benchmark,
    CudaVector deviceVector, deviceVector2;
 
    // create benchmark group
-   const std::vector< String > parsedType = parseObjectType( HostMatrix::getType() );
+   const std::vector< String > parsedType = parseObjectType( getType< HostMatrix >() );
 #ifdef HAVE_CUDA
    benchmark.createHorizontalGroup( parsedType[ 0 ], 2 );
 #else
diff --git a/src/Benchmarks/HeatEquation/BenchmarkLaplace.h b/src/Benchmarks/HeatEquation/BenchmarkLaplace.h
index 0a5494e2b..0c2fd92e3 100644
--- a/src/Benchmarks/HeatEquation/BenchmarkLaplace.h
+++ b/src/Benchmarks/HeatEquation/BenchmarkLaplace.h
@@ -33,8 +33,6 @@ class BenchmarkLaplace< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, In
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimension = MeshType::getMeshDimension() };
 
-      static String getType();
-
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
       Real operator()( const MeshFunction& u,
@@ -81,8 +79,6 @@ class BenchmarkLaplace< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, Real, Ind
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimension = MeshType::getMeshDimension() };
 
-      static String getType();
-
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
       Real operator()( const MeshFunction& u,
@@ -144,8 +140,6 @@ class BenchmarkLaplace< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Ind
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimension = MeshType::getMeshDimension() };
 
-      static String getType();
-
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
       Real operator()( const MeshFunction& u,
diff --git a/src/Benchmarks/HeatEquation/BenchmarkLaplace_impl.h b/src/Benchmarks/HeatEquation/BenchmarkLaplace_impl.h
index 34a2e245a..47a677441 100644
--- a/src/Benchmarks/HeatEquation/BenchmarkLaplace_impl.h
+++ b/src/Benchmarks/HeatEquation/BenchmarkLaplace_impl.h
@@ -4,21 +4,6 @@
 /****
  * 1D problem
  */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-BenchmarkLaplace< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "BenchmarkLaplace< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -108,21 +93,6 @@ setMatrixElements( const RealType& time,
 /****
  * 2D problem
  */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-BenchmarkLaplace< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "BenchmarkLaplace< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -288,21 +258,6 @@ setMatrixElements( const RealType& time,
 /****
  * 3D problem
  */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-BenchmarkLaplace< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "BenchmarkLaplace< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
diff --git a/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem.h b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem.h
index 998be646d..95491a1cf 100644
--- a/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem.h
+++ b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem.h
@@ -40,8 +40,6 @@ class HeatEquationBenchmarkProblem:
       using typename BaseType::DofVectorPointer;
 
       HeatEquationBenchmarkProblem();
-      
-      static String getType();
 
       String getPrologHeader() const;
 
diff --git a/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h
index 14f1fd8a9..53cd0ec36 100644
--- a/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h
+++ b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h
@@ -16,18 +16,6 @@
 
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename DifferentialOperator,
-          typename Communicator >
-String
-HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator, Communicator >::
-getType()
-{
-   return String( "HeatEquationBenchmarkProblem< " ) + Mesh :: getType() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/Benchmarks/HeatEquation/tnlTestGrid2D.h b/src/Benchmarks/HeatEquation/tnlTestGrid2D.h
index 084d6cc39..a17e29c12 100644
--- a/src/Benchmarks/HeatEquation/tnlTestGrid2D.h
+++ b/src/Benchmarks/HeatEquation/tnlTestGrid2D.h
@@ -78,10 +78,6 @@ class Meshes::Grid< 2, Real, Device, Index > : public tnlObject
 
    Grid();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    static String getSerializationType();
 
    virtual String getSerializationTypeVirtual() const;
@@ -212,29 +208,13 @@ Meshes::Grid< 2, Real, Device, Index > :: Grid()
 template< typename Real,
           typename Device,
           typename Index >
-String Meshes::Grid< 2, Real, Device, Index > :: getType()
+String Meshes::Grid< 2, Real, Device, Index > :: getSerializationType()
 {
    return String( "Meshes::Grid< " ) +
           convertToString( getMeshDimension() ) + ", " +
-          String( ::getType< RealType >() ) + ", " +
-          String( Device :: getDeviceType() ) + ", " +
-          String( ::getType< IndexType >() ) + " >";
-}
-
-template< typename Real,
-           typename Device,
-           typename Index >
-String Meshes::Grid< 2, Real, Device, Index > :: getTypeVirtual() const
-{
-   return this->getType();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Meshes::Grid< 2, Real, Device, Index > :: getSerializationType()
-{
-   return HostType::getType();
+          getType< RealType >() + ", " +
+          getType< Devices::Host >() + ", " +
+          getType< IndexType >() + " >";
 };
 
 template< typename Real,
@@ -854,10 +834,6 @@ class Meshes::Grid< 2, Real, Device, Index > : public tnlObject
 
    Grid();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    static String getSerializationType();
 
    virtual String getSerializationTypeVirtual() const;
@@ -977,29 +953,13 @@ Meshes::Grid< 2, Real, Device, Index > :: Grid()
 template< typename Real,
           typename Device,
           typename Index >
-String Meshes::Grid< 2, Real, Device, Index > :: getType()
+String Meshes::Grid< 2, Real, Device, Index > :: getSerializationType()
 {
    return String( "Meshes::Grid< " ) +
           convertToString( getMeshDimension() ) + ", " +
-          String( ::getType< RealType >() ) + ", " +
-          String( Device :: getDeviceType() ) + ", " +
-          String( ::getType< IndexType >() ) + " >";
-}
-
-template< typename Real,
-           typename Device,
-           typename Index >
-String Meshes::Grid< 2, Real, Device, Index > :: getTypeVirtual() const
-{
-   return this->getType();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Meshes::Grid< 2, Real, Device, Index > :: getSerializationType()
-{
-   return HostType::getType();
+          getType< RealType >() + ", " +
+          getType< Devices::Host >() + ", " +
+          getType< IndexType >() + " >";
 };
 
 template< typename Real,
diff --git a/src/Benchmarks/ODESolvers/Euler.h b/src/Benchmarks/ODESolvers/Euler.h
index c767eb33e..2df469d6f 100644
--- a/src/Benchmarks/ODESolvers/Euler.h
+++ b/src/Benchmarks/ODESolvers/Euler.h
@@ -38,8 +38,6 @@ class Euler : public Solvers::ODE::ExplicitSolver< Problem, SolverMonitor >
 
    Euler();
 
-   static String getType();
-
    static void configSetup( Config::ConfigDescription& config,
                             const String& prefix = "" );
 
diff --git a/src/Benchmarks/ODESolvers/Euler.hpp b/src/Benchmarks/ODESolvers/Euler.hpp
index efb336aca..22c013041 100644
--- a/src/Benchmarks/ODESolvers/Euler.hpp
+++ b/src/Benchmarks/ODESolvers/Euler.hpp
@@ -32,14 +32,6 @@ Euler< Problem, SolverMonitor >::Euler()
 {
 };
 
-template< typename Problem, typename SolverMonitor >
-String Euler< Problem, SolverMonitor >::getType()
-{
-   return String( "Euler< " ) +
-          Problem :: getType() +
-          String( " >" );
-};
-
 template< typename Problem, typename SolverMonitor >
 void Euler< Problem, SolverMonitor >::configSetup( Config::ConfigDescription& config,
                                                const String& prefix )
diff --git a/src/Benchmarks/ODESolvers/Merson.h b/src/Benchmarks/ODESolvers/Merson.h
index 8d00667c2..74e052705 100644
--- a/src/Benchmarks/ODESolvers/Merson.h
+++ b/src/Benchmarks/ODESolvers/Merson.h
@@ -35,8 +35,6 @@ class Merson : public Solvers::ODE::ExplicitSolver< Problem, SolverMonitor >
    
    Merson();
 
-   static String getType();
-
    static void configSetup( Config::ConfigDescription& config,
                             const String& prefix = "" );
 
diff --git a/src/Benchmarks/ODESolvers/Merson.hpp b/src/Benchmarks/ODESolvers/Merson.hpp
index 3a5cdf660..1ea606c4c 100644
--- a/src/Benchmarks/ODESolvers/Merson.hpp
+++ b/src/Benchmarks/ODESolvers/Merson.hpp
@@ -94,14 +94,6 @@ Merson< Problem, SolverMonitor >::Merson()
    }
 };
 
-template< typename Problem, typename SolverMonitor >
-String Merson< Problem, SolverMonitor >::getType()
-{
-   return String( "Merson< " ) +
-          Problem::getType() +
-          String( " >" );
-};
-
 template< typename Problem, typename SolverMonitor >
 void Merson< Problem, SolverMonitor >::configSetup( Config::ConfigDescription& config,
                                                 const String& prefix )
diff --git a/src/Examples/flow-sw/LaxFridrichsContinuity.h b/src/Examples/flow-sw/LaxFridrichsContinuity.h
index 82747cd18..4195913b6 100644
--- a/src/Examples/flow-sw/LaxFridrichsContinuity.h
+++ b/src/Examples/flow-sw/LaxFridrichsContinuity.h
@@ -38,14 +38,6 @@ class LaxFridrichsContinuityBase
 
       LaxFridrichsContinuityBase()
        : artificialViscosity( 1.0 ){};
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
 
       void setTau(const Real& tau)
       {
diff --git a/src/Examples/flow-sw/LaxFridrichsEnergy.h b/src/Examples/flow-sw/LaxFridrichsEnergy.h
index 03019ed23..df7828be3 100644
--- a/src/Examples/flow-sw/LaxFridrichsEnergy.h
+++ b/src/Examples/flow-sw/LaxFridrichsEnergy.h
@@ -36,14 +36,6 @@ class LaxFridrichsEnergyBase
       LaxFridrichsEnergyBase()
        : artificialViscosity( 1.0 ){};
 
-      static String getType()
-      {
-         return String( "LaxFridrichsEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/flow-sw/LaxFridrichsMomentumX.h b/src/Examples/flow-sw/LaxFridrichsMomentumX.h
index 63def12d3..b1877a2c4 100644
--- a/src/Examples/flow-sw/LaxFridrichsMomentumX.h
+++ b/src/Examples/flow-sw/LaxFridrichsMomentumX.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -119,14 +110,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -200,14 +183,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-sw/LaxFridrichsMomentumY.h b/src/Examples/flow-sw/LaxFridrichsMomentumY.h
index 8ce42282d..b4fe75691 100644
--- a/src/Examples/flow-sw/LaxFridrichsMomentumY.h
+++ b/src/Examples/flow-sw/LaxFridrichsMomentumY.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -189,14 +172,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-sw/LaxFridrichsMomentumZ.h b/src/Examples/flow-sw/LaxFridrichsMomentumZ.h
index a67e862ce..fe8539595 100644
--- a/src/Examples/flow-sw/LaxFridrichsMomentumZ.h
+++ b/src/Examples/flow-sw/LaxFridrichsMomentumZ.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -169,14 +152,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-sw/UpwindContinuity.h b/src/Examples/flow-sw/UpwindContinuity.h
index fc599d3d9..d016cff6b 100644
--- a/src/Examples/flow-sw/UpwindContinuity.h
+++ b/src/Examples/flow-sw/UpwindContinuity.h
@@ -37,14 +37,6 @@ class UpwindContinuityBase
       typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
 
-      static String getType()
-      {
-         return String( "UpwindContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/flow-sw/UpwindEnergy.h b/src/Examples/flow-sw/UpwindEnergy.h
index 6c7e94ec8..8023631ba 100644
--- a/src/Examples/flow-sw/UpwindEnergy.h
+++ b/src/Examples/flow-sw/UpwindEnergy.h
@@ -36,14 +36,6 @@ class UpwindEnergyBase
       UpwindEnergyBase()
        : artificialViscosity( 1.0 ){};
 
-      static String getType()
-      {
-         return String( "UpwindEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/flow-sw/UpwindMomentumX.h b/src/Examples/flow-sw/UpwindMomentumX.h
index edd375620..939e4f555 100644
--- a/src/Examples/flow-sw/UpwindMomentumX.h
+++ b/src/Examples/flow-sw/UpwindMomentumX.h
@@ -46,15 +46,6 @@ class UpwindMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -136,14 +127,6 @@ class UpwindMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -268,14 +251,6 @@ class UpwindMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-sw/UpwindMomentumY.h b/src/Examples/flow-sw/UpwindMomentumY.h
index 4b5a7bcb2..7a4d3d050 100644
--- a/src/Examples/flow-sw/UpwindMomentumY.h
+++ b/src/Examples/flow-sw/UpwindMomentumY.h
@@ -46,15 +46,6 @@ class UpwindMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class UpwindMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -238,14 +221,6 @@ class UpwindMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-sw/UpwindMomentumZ.h b/src/Examples/flow-sw/UpwindMomentumZ.h
index 887eec977..c42588757 100644
--- a/src/Examples/flow-sw/UpwindMomentumZ.h
+++ b/src/Examples/flow-sw/UpwindMomentumZ.h
@@ -46,15 +46,6 @@ class UpwindMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class UpwindMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -169,14 +152,6 @@ class UpwindMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-sw/navierStokesProblem.h b/src/Examples/flow-sw/navierStokesProblem.h
index 0252a5c46..0e79d19df 100644
--- a/src/Examples/flow-sw/navierStokesProblem.h
+++ b/src/Examples/flow-sw/navierStokesProblem.h
@@ -55,8 +55,6 @@ class navierStokesProblem:
       typedef Pointers::SharedPointer< BoundaryCondition > BoundaryConditionPointer;
       typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
       using CommunicatorType = Communicator;
-      
-      static String getTypeStatic();
 
       String getPrologHeader() const;
 
diff --git a/src/Examples/flow-sw/navierStokesProblem_impl.h b/src/Examples/flow-sw/navierStokesProblem_impl.h
index 886c9f03f..96bdb4827 100644
--- a/src/Examples/flow-sw/navierStokesProblem_impl.h
+++ b/src/Examples/flow-sw/navierStokesProblem_impl.h
@@ -30,18 +30,6 @@
 
 namespace TNL {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename InviscidOperators,
-          typename Communicator >
-String
-navierStokesProblem< Mesh, BoundaryCondition, RightHandSide, InviscidOperators, Communicator >::
-getTypeStatic()
-{
-   return String( "navierStokesProblem< " ) + Mesh :: getTypeStatic() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/Examples/flow-vl/LaxFridrichsContinuity.h b/src/Examples/flow-vl/LaxFridrichsContinuity.h
index 82747cd18..4195913b6 100644
--- a/src/Examples/flow-vl/LaxFridrichsContinuity.h
+++ b/src/Examples/flow-vl/LaxFridrichsContinuity.h
@@ -38,14 +38,6 @@ class LaxFridrichsContinuityBase
 
       LaxFridrichsContinuityBase()
        : artificialViscosity( 1.0 ){};
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
 
       void setTau(const Real& tau)
       {
diff --git a/src/Examples/flow-vl/LaxFridrichsEnergy.h b/src/Examples/flow-vl/LaxFridrichsEnergy.h
index 03019ed23..df7828be3 100644
--- a/src/Examples/flow-vl/LaxFridrichsEnergy.h
+++ b/src/Examples/flow-vl/LaxFridrichsEnergy.h
@@ -36,14 +36,6 @@ class LaxFridrichsEnergyBase
       LaxFridrichsEnergyBase()
        : artificialViscosity( 1.0 ){};
 
-      static String getType()
-      {
-         return String( "LaxFridrichsEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/flow-vl/LaxFridrichsMomentumX.h b/src/Examples/flow-vl/LaxFridrichsMomentumX.h
index 63def12d3..b1877a2c4 100644
--- a/src/Examples/flow-vl/LaxFridrichsMomentumX.h
+++ b/src/Examples/flow-vl/LaxFridrichsMomentumX.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -119,14 +110,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -200,14 +183,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-vl/LaxFridrichsMomentumY.h b/src/Examples/flow-vl/LaxFridrichsMomentumY.h
index 8ce42282d..b4fe75691 100644
--- a/src/Examples/flow-vl/LaxFridrichsMomentumY.h
+++ b/src/Examples/flow-vl/LaxFridrichsMomentumY.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -189,14 +172,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-vl/LaxFridrichsMomentumZ.h b/src/Examples/flow-vl/LaxFridrichsMomentumZ.h
index a67e862ce..fe8539595 100644
--- a/src/Examples/flow-vl/LaxFridrichsMomentumZ.h
+++ b/src/Examples/flow-vl/LaxFridrichsMomentumZ.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -169,14 +152,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-vl/UpwindContinuity.h b/src/Examples/flow-vl/UpwindContinuity.h
index 20bae4fbb..fff04e9bb 100644
--- a/src/Examples/flow-vl/UpwindContinuity.h
+++ b/src/Examples/flow-vl/UpwindContinuity.h
@@ -37,14 +37,6 @@ class UpwindContinuityBase
       typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
 
-      static String getType()
-      {
-         return String( "UpwindContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/flow-vl/UpwindEnergy.h b/src/Examples/flow-vl/UpwindEnergy.h
index 8fa7a046a..b4570e608 100644
--- a/src/Examples/flow-vl/UpwindEnergy.h
+++ b/src/Examples/flow-vl/UpwindEnergy.h
@@ -36,14 +36,6 @@ class UpwindEnergyBase
       UpwindEnergyBase()
        : artificialViscosity( 1.0 ){};
 
-      static String getType()
-      {
-         return String( "UpwindEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/flow-vl/UpwindMomentumX.h b/src/Examples/flow-vl/UpwindMomentumX.h
index edd375620..939e4f555 100644
--- a/src/Examples/flow-vl/UpwindMomentumX.h
+++ b/src/Examples/flow-vl/UpwindMomentumX.h
@@ -46,15 +46,6 @@ class UpwindMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -136,14 +127,6 @@ class UpwindMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -268,14 +251,6 @@ class UpwindMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-vl/UpwindMomentumY.h b/src/Examples/flow-vl/UpwindMomentumY.h
index 4b5a7bcb2..7a4d3d050 100644
--- a/src/Examples/flow-vl/UpwindMomentumY.h
+++ b/src/Examples/flow-vl/UpwindMomentumY.h
@@ -46,15 +46,6 @@ class UpwindMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class UpwindMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -238,14 +221,6 @@ class UpwindMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-vl/UpwindMomentumZ.h b/src/Examples/flow-vl/UpwindMomentumZ.h
index 887eec977..c42588757 100644
--- a/src/Examples/flow-vl/UpwindMomentumZ.h
+++ b/src/Examples/flow-vl/UpwindMomentumZ.h
@@ -46,15 +46,6 @@ class UpwindMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class UpwindMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -169,14 +152,6 @@ class UpwindMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-vl/navierStokesProblem.h b/src/Examples/flow-vl/navierStokesProblem.h
index 51cc5f014..dbac46e74 100644
--- a/src/Examples/flow-vl/navierStokesProblem.h
+++ b/src/Examples/flow-vl/navierStokesProblem.h
@@ -57,8 +57,6 @@ class navierStokesProblem:
       typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
       using CommunicatorType = Communicator;
 
-      static String getTypeStatic();
-
       String getPrologHeader() const;
 
       void writeProlog( Logger& logger,
diff --git a/src/Examples/flow-vl/navierStokesProblem_impl.h b/src/Examples/flow-vl/navierStokesProblem_impl.h
index 886c9f03f..96bdb4827 100644
--- a/src/Examples/flow-vl/navierStokesProblem_impl.h
+++ b/src/Examples/flow-vl/navierStokesProblem_impl.h
@@ -30,18 +30,6 @@
 
 namespace TNL {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename InviscidOperators,
-          typename Communicator >
-String
-navierStokesProblem< Mesh, BoundaryCondition, RightHandSide, InviscidOperators, Communicator >::
-getTypeStatic()
-{
-   return String( "navierStokesProblem< " ) + Mesh :: getTypeStatic() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/Examples/flow/LaxFridrichsContinuity.h b/src/Examples/flow/LaxFridrichsContinuity.h
index bf3cc45ec..8a9d22c6a 100644
--- a/src/Examples/flow/LaxFridrichsContinuity.h
+++ b/src/Examples/flow/LaxFridrichsContinuity.h
@@ -38,14 +38,6 @@ class LaxFridrichsContinuityBase
 
       LaxFridrichsContinuityBase()
        : artificialViscosity( 1.0 ){};
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
 
       void setTau(const Real& tau)
       {
diff --git a/src/Examples/flow/LaxFridrichsContinuityEuler.h b/src/Examples/flow/LaxFridrichsContinuityEuler.h
index f444a4e25..ce175d807 100644
--- a/src/Examples/flow/LaxFridrichsContinuityEuler.h
+++ b/src/Examples/flow/LaxFridrichsContinuityEuler.h
@@ -38,14 +38,6 @@ class LaxFridrichsContinuityBase
 
       LaxFridrichsContinuityBase()
        : artificialViscosity( 1.0 ){};
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
 
       void setTau(const Real& tau)
       {
diff --git a/src/Examples/flow/LaxFridrichsEnergy.h b/src/Examples/flow/LaxFridrichsEnergy.h
index dd940243d..630a985fe 100644
--- a/src/Examples/flow/LaxFridrichsEnergy.h
+++ b/src/Examples/flow/LaxFridrichsEnergy.h
@@ -36,14 +36,6 @@ class LaxFridrichsEnergyBase
       LaxFridrichsEnergyBase()
        : artificialViscosity( 1.0 ){};
 
-      static String getType()
-      {
-         return String( "LaxFridrichsEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/flow/LaxFridrichsEnergyEuler.h b/src/Examples/flow/LaxFridrichsEnergyEuler.h
index 30180639d..37cd793a7 100644
--- a/src/Examples/flow/LaxFridrichsEnergyEuler.h
+++ b/src/Examples/flow/LaxFridrichsEnergyEuler.h
@@ -36,14 +36,6 @@ class LaxFridrichsEnergyBase
       LaxFridrichsEnergyBase()
        : artificialViscosity( 1.0 ){};
 
-      static String getType()
-      {
-         return String( "LaxFridrichsEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/flow/LaxFridrichsMomentumX.h b/src/Examples/flow/LaxFridrichsMomentumX.h
index 3e295c029..8fe02be40 100644
--- a/src/Examples/flow/LaxFridrichsMomentumX.h
+++ b/src/Examples/flow/LaxFridrichsMomentumX.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -128,14 +119,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -242,14 +225,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow/LaxFridrichsMomentumXEuler.h b/src/Examples/flow/LaxFridrichsMomentumXEuler.h
index 63def12d3..b1877a2c4 100644
--- a/src/Examples/flow/LaxFridrichsMomentumXEuler.h
+++ b/src/Examples/flow/LaxFridrichsMomentumXEuler.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -119,14 +110,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -200,14 +183,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow/LaxFridrichsMomentumY.h b/src/Examples/flow/LaxFridrichsMomentumY.h
index 0df12c522..61c3e09dc 100644
--- a/src/Examples/flow/LaxFridrichsMomentumY.h
+++ b/src/Examples/flow/LaxFridrichsMomentumY.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -222,14 +205,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow/LaxFridrichsMomentumYEuler.h b/src/Examples/flow/LaxFridrichsMomentumYEuler.h
index 8ce42282d..b4fe75691 100644
--- a/src/Examples/flow/LaxFridrichsMomentumYEuler.h
+++ b/src/Examples/flow/LaxFridrichsMomentumYEuler.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -189,14 +172,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow/LaxFridrichsMomentumZ.h b/src/Examples/flow/LaxFridrichsMomentumZ.h
index e4f8501ec..37056b5ae 100644
--- a/src/Examples/flow/LaxFridrichsMomentumZ.h
+++ b/src/Examples/flow/LaxFridrichsMomentumZ.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -169,14 +152,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow/LaxFridrichsMomentumZEuler.h b/src/Examples/flow/LaxFridrichsMomentumZEuler.h
index a67e862ce..fe8539595 100644
--- a/src/Examples/flow/LaxFridrichsMomentumZEuler.h
+++ b/src/Examples/flow/LaxFridrichsMomentumZEuler.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -169,14 +152,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow/navierStokesProblem.h b/src/Examples/flow/navierStokesProblem.h
index f42c2ed09..71e8243cd 100644
--- a/src/Examples/flow/navierStokesProblem.h
+++ b/src/Examples/flow/navierStokesProblem.h
@@ -57,8 +57,6 @@ class navierStokesProblem:
       typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
       using CommunicatorType = Communicator;
 
-      static String getTypeStatic();
-
       String getPrologHeader() const;
 
       void writeProlog( Logger& logger,
diff --git a/src/Examples/flow/navierStokesProblem_impl.h b/src/Examples/flow/navierStokesProblem_impl.h
index 4b0c79774..c2c84e7a6 100644
--- a/src/Examples/flow/navierStokesProblem_impl.h
+++ b/src/Examples/flow/navierStokesProblem_impl.h
@@ -42,18 +42,6 @@
 */
 namespace TNL {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename InviscidOperators,
-          typename Communicator >
-String
-navierStokesProblem< Mesh, BoundaryCondition, RightHandSide, InviscidOperators, Communicator >::
-getTypeStatic()
-{
-   return String( "navierStokesProblem< " ) + Mesh :: getTypeStatic() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/Examples/inviscid-flow-sw/LaxFridrichsContinuity.h b/src/Examples/inviscid-flow-sw/LaxFridrichsContinuity.h
index 82747cd18..4195913b6 100644
--- a/src/Examples/inviscid-flow-sw/LaxFridrichsContinuity.h
+++ b/src/Examples/inviscid-flow-sw/LaxFridrichsContinuity.h
@@ -38,14 +38,6 @@ class LaxFridrichsContinuityBase
 
       LaxFridrichsContinuityBase()
        : artificialViscosity( 1.0 ){};
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
 
       void setTau(const Real& tau)
       {
diff --git a/src/Examples/inviscid-flow-sw/LaxFridrichsEnergy.h b/src/Examples/inviscid-flow-sw/LaxFridrichsEnergy.h
index 03019ed23..df7828be3 100644
--- a/src/Examples/inviscid-flow-sw/LaxFridrichsEnergy.h
+++ b/src/Examples/inviscid-flow-sw/LaxFridrichsEnergy.h
@@ -36,14 +36,6 @@ class LaxFridrichsEnergyBase
       LaxFridrichsEnergyBase()
        : artificialViscosity( 1.0 ){};
 
-      static String getType()
-      {
-         return String( "LaxFridrichsEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumX.h b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumX.h
index 63def12d3..b1877a2c4 100644
--- a/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumX.h
+++ b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumX.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -119,14 +110,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -200,14 +183,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumY.h b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumY.h
index 8ce42282d..b4fe75691 100644
--- a/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumY.h
+++ b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumY.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -189,14 +172,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumZ.h b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumZ.h
index a67e862ce..fe8539595 100644
--- a/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumZ.h
+++ b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumZ.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -169,14 +152,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-sw/UpwindContinuity.h b/src/Examples/inviscid-flow-sw/UpwindContinuity.h
index 22fc4ffc5..6a763635c 100644
--- a/src/Examples/inviscid-flow-sw/UpwindContinuity.h
+++ b/src/Examples/inviscid-flow-sw/UpwindContinuity.h
@@ -37,14 +37,6 @@ class UpwindContinuityBase
       typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
 
-      static String getType()
-      {
-         return String( "UpwindContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/inviscid-flow-sw/UpwindEnergy.h b/src/Examples/inviscid-flow-sw/UpwindEnergy.h
index 39f609006..7472790db 100644
--- a/src/Examples/inviscid-flow-sw/UpwindEnergy.h
+++ b/src/Examples/inviscid-flow-sw/UpwindEnergy.h
@@ -36,14 +36,6 @@ class UpwindEnergyBase
       UpwindEnergyBase()
        : artificialViscosity( 1.0 ){};
 
-      static String getType()
-      {
-         return String( "UpwindEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/inviscid-flow-sw/UpwindMomentumX.h b/src/Examples/inviscid-flow-sw/UpwindMomentumX.h
index ed49dda94..1a887e7a5 100644
--- a/src/Examples/inviscid-flow-sw/UpwindMomentumX.h
+++ b/src/Examples/inviscid-flow-sw/UpwindMomentumX.h
@@ -46,15 +46,6 @@ class UpwindMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -130,14 +121,6 @@ class UpwindMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -236,14 +219,6 @@ class UpwindMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-sw/UpwindMomentumY.h b/src/Examples/inviscid-flow-sw/UpwindMomentumY.h
index c2126d43a..2ab8ffe82 100644
--- a/src/Examples/inviscid-flow-sw/UpwindMomentumY.h
+++ b/src/Examples/inviscid-flow-sw/UpwindMomentumY.h
@@ -46,15 +46,6 @@ class UpwindMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class UpwindMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -212,14 +195,6 @@ class UpwindMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-sw/UpwindMomentumZ.h b/src/Examples/inviscid-flow-sw/UpwindMomentumZ.h
index 97339e804..fe8be0eb2 100644
--- a/src/Examples/inviscid-flow-sw/UpwindMomentumZ.h
+++ b/src/Examples/inviscid-flow-sw/UpwindMomentumZ.h
@@ -46,15 +46,6 @@ class UpwindMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class UpwindMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -169,14 +152,6 @@ class UpwindMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-sw/eulerProblem.h b/src/Examples/inviscid-flow-sw/eulerProblem.h
index a91e56176..5c10ab7fb 100644
--- a/src/Examples/inviscid-flow-sw/eulerProblem.h
+++ b/src/Examples/inviscid-flow-sw/eulerProblem.h
@@ -57,8 +57,6 @@ class eulerProblem:
       typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
       using CommunicatorType = Communicator;
 
-      static String getTypeStatic();
-
       String getPrologHeader() const;
 
       void writeProlog( Logger& logger,
diff --git a/src/Examples/inviscid-flow-sw/eulerProblem_impl.h b/src/Examples/inviscid-flow-sw/eulerProblem_impl.h
index e0382e9c2..d4f119d4c 100644
--- a/src/Examples/inviscid-flow-sw/eulerProblem_impl.h
+++ b/src/Examples/inviscid-flow-sw/eulerProblem_impl.h
@@ -30,18 +30,6 @@
 
 namespace TNL {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename InviscidOperators,
-          typename Communicator >
-String
-eulerProblem< Mesh, BoundaryCondition, RightHandSide, InviscidOperators, Communicator >::
-getTypeStatic()
-{
-   return String( "eulerProblem< " ) + Mesh :: getTypeStatic() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/Examples/inviscid-flow-vl/LaxFridrichsContinuity.h b/src/Examples/inviscid-flow-vl/LaxFridrichsContinuity.h
index 82747cd18..4195913b6 100644
--- a/src/Examples/inviscid-flow-vl/LaxFridrichsContinuity.h
+++ b/src/Examples/inviscid-flow-vl/LaxFridrichsContinuity.h
@@ -38,14 +38,6 @@ class LaxFridrichsContinuityBase
 
       LaxFridrichsContinuityBase()
        : artificialViscosity( 1.0 ){};
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
 
       void setTau(const Real& tau)
       {
diff --git a/src/Examples/inviscid-flow-vl/LaxFridrichsEnergy.h b/src/Examples/inviscid-flow-vl/LaxFridrichsEnergy.h
index 03019ed23..df7828be3 100644
--- a/src/Examples/inviscid-flow-vl/LaxFridrichsEnergy.h
+++ b/src/Examples/inviscid-flow-vl/LaxFridrichsEnergy.h
@@ -36,14 +36,6 @@ class LaxFridrichsEnergyBase
       LaxFridrichsEnergyBase()
        : artificialViscosity( 1.0 ){};
 
-      static String getType()
-      {
-         return String( "LaxFridrichsEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumX.h b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumX.h
index 63def12d3..b1877a2c4 100644
--- a/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumX.h
+++ b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumX.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -119,14 +110,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -200,14 +183,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumY.h b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumY.h
index 8ce42282d..b4fe75691 100644
--- a/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumY.h
+++ b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumY.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -189,14 +172,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumZ.h b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumZ.h
index a67e862ce..fe8539595 100644
--- a/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumZ.h
+++ b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumZ.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -169,14 +152,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-vl/UpwindContinuity.h b/src/Examples/inviscid-flow-vl/UpwindContinuity.h
index 4a21cd502..3d60dfd9f 100644
--- a/src/Examples/inviscid-flow-vl/UpwindContinuity.h
+++ b/src/Examples/inviscid-flow-vl/UpwindContinuity.h
@@ -37,14 +37,6 @@ class UpwindContinuityBase
       typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
 
-      static String getType()
-      {
-         return String( "UpwindContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/inviscid-flow-vl/UpwindEnergy.h b/src/Examples/inviscid-flow-vl/UpwindEnergy.h
index e3857cbcd..ce26148d7 100644
--- a/src/Examples/inviscid-flow-vl/UpwindEnergy.h
+++ b/src/Examples/inviscid-flow-vl/UpwindEnergy.h
@@ -36,14 +36,6 @@ class UpwindEnergyBase
       UpwindEnergyBase()
        : artificialViscosity( 1.0 ){};
 
-      static String getType()
-      {
-         return String( "UpwindEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/inviscid-flow-vl/UpwindMomentumX.h b/src/Examples/inviscid-flow-vl/UpwindMomentumX.h
index ed49dda94..cc7a01bc9 100644
--- a/src/Examples/inviscid-flow-vl/UpwindMomentumX.h
+++ b/src/Examples/inviscid-flow-vl/UpwindMomentumX.h
@@ -47,15 +47,6 @@ class UpwindMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
       
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
-
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
       Real operator()( const MeshFunction& u,
@@ -130,14 +121,6 @@ class UpwindMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -236,14 +219,6 @@ class UpwindMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-vl/UpwindMomentumY.h b/src/Examples/inviscid-flow-vl/UpwindMomentumY.h
index c2126d43a..2ab8ffe82 100644
--- a/src/Examples/inviscid-flow-vl/UpwindMomentumY.h
+++ b/src/Examples/inviscid-flow-vl/UpwindMomentumY.h
@@ -46,15 +46,6 @@ class UpwindMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class UpwindMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -212,14 +195,6 @@ class UpwindMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-vl/UpwindMomentumZ.h b/src/Examples/inviscid-flow-vl/UpwindMomentumZ.h
index 97339e804..fe8be0eb2 100644
--- a/src/Examples/inviscid-flow-vl/UpwindMomentumZ.h
+++ b/src/Examples/inviscid-flow-vl/UpwindMomentumZ.h
@@ -46,15 +46,6 @@ class UpwindMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class UpwindMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -169,14 +152,6 @@ class UpwindMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-vl/eulerProblem.h b/src/Examples/inviscid-flow-vl/eulerProblem.h
index a91e56176..5c10ab7fb 100644
--- a/src/Examples/inviscid-flow-vl/eulerProblem.h
+++ b/src/Examples/inviscid-flow-vl/eulerProblem.h
@@ -57,8 +57,6 @@ class eulerProblem:
       typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
       using CommunicatorType = Communicator;
 
-      static String getTypeStatic();
-
       String getPrologHeader() const;
 
       void writeProlog( Logger& logger,
diff --git a/src/Examples/inviscid-flow-vl/eulerProblem_impl.h b/src/Examples/inviscid-flow-vl/eulerProblem_impl.h
index e0382e9c2..d4f119d4c 100644
--- a/src/Examples/inviscid-flow-vl/eulerProblem_impl.h
+++ b/src/Examples/inviscid-flow-vl/eulerProblem_impl.h
@@ -30,18 +30,6 @@
 
 namespace TNL {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename InviscidOperators,
-          typename Communicator >
-String
-eulerProblem< Mesh, BoundaryCondition, RightHandSide, InviscidOperators, Communicator >::
-getTypeStatic()
-{
-   return String( "eulerProblem< " ) + Mesh :: getTypeStatic() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/Examples/inviscid-flow/2d/Euler2DVelXGetter.h b/src/Examples/inviscid-flow/2d/Euler2DVelXGetter.h
index 2e79798a3..f87c91033 100644
--- a/src/Examples/inviscid-flow/2d/Euler2DVelXGetter.h
+++ b/src/Examples/inviscid-flow/2d/Euler2DVelXGetter.h
@@ -21,8 +21,6 @@ class EulerVelXGetter
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
-      
       EulerVelXGetter( const MeshFunctionType& rho,
                        const MeshFunctionType& rhoVel)
       : rho( rho ), rhoVel( rhoVel )
diff --git a/src/Examples/inviscid-flow/3d/Euler2DVelXGetter.h b/src/Examples/inviscid-flow/3d/Euler2DVelXGetter.h
index 2e79798a3..f87c91033 100644
--- a/src/Examples/inviscid-flow/3d/Euler2DVelXGetter.h
+++ b/src/Examples/inviscid-flow/3d/Euler2DVelXGetter.h
@@ -21,8 +21,6 @@ class EulerVelXGetter
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
-      
       EulerVelXGetter( const MeshFunctionType& rho,
                        const MeshFunctionType& rhoVel)
       : rho( rho ), rhoVel( rhoVel )
diff --git a/src/Examples/inviscid-flow/3d/EulerPressureGetter.h b/src/Examples/inviscid-flow/3d/EulerPressureGetter.h
index 45611c647..5a39ca84d 100644
--- a/src/Examples/inviscid-flow/3d/EulerPressureGetter.h
+++ b/src/Examples/inviscid-flow/3d/EulerPressureGetter.h
@@ -22,8 +22,6 @@ class EulerPressureGetter
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
-      
       EulerPressureGetter( const MeshFunctionType& rho,
                            const MeshFunctionType& rhoVelX,
                            const MeshFunctionType& rhoVelY,
diff --git a/src/Examples/inviscid-flow/3d/EulerVelGetter.h b/src/Examples/inviscid-flow/3d/EulerVelGetter.h
index 24d06eaf5..82441fcae 100644
--- a/src/Examples/inviscid-flow/3d/EulerVelGetter.h
+++ b/src/Examples/inviscid-flow/3d/EulerVelGetter.h
@@ -21,8 +21,6 @@ class EulerVelGetter
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
-      
       EulerVelGetter( const MeshFunctionType& rho,
                       const MeshFunctionType& rhoVelX,
                       const MeshFunctionType& rhoVelY,
diff --git a/src/Examples/inviscid-flow/3d/LaxFridrichsContinuity_impl .h b/src/Examples/inviscid-flow/3d/LaxFridrichsContinuity_impl .h
index ac469a524..840fc2680 100644
--- a/src/Examples/inviscid-flow/3d/LaxFridrichsContinuity_impl .h	
+++ b/src/Examples/inviscid-flow/3d/LaxFridrichsContinuity_impl .h	
@@ -6,21 +6,6 @@ namespace TNL {
 /****
  * 1D problem
  */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-LaxFridrichsContinuity< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "LaxFridrichsContinuity< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -109,21 +94,6 @@ updateLinearSystem( const RealType& time,
 /****
  * 2D problem
  */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-LaxFridrichsContinuity< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "LaxFridrichsContinuity< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -224,21 +194,6 @@ updateLinearSystem( const RealType& time,
 /****
  * 3D problem
  */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-LaxFridrichsContinuity< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "LaxFridrichsContinuity< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
diff --git a/src/Examples/inviscid-flow/3d/LaxFridrichsEnergy.h b/src/Examples/inviscid-flow/3d/LaxFridrichsEnergy.h
index 9083970ae..9756f46c8 100644
--- a/src/Examples/inviscid-flow/3d/LaxFridrichsEnergy.h
+++ b/src/Examples/inviscid-flow/3d/LaxFridrichsEnergy.h
@@ -29,7 +29,6 @@ class LaxFridrichsEnergy< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, Real, I
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
       Real tau;
       MeshFunctionType velocityX;
       MeshFunctionType velocityY;
@@ -101,7 +100,6 @@ class LaxFridrichsEnergy< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, Real, I
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
       Real tau;
       MeshFunctionType velocityX;
       MeshFunctionType velocityY;
@@ -173,7 +171,6 @@ class LaxFridrichsEnergy< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, I
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
       Real tau;
       MeshFunctionType velocityX;
       MeshFunctionType velocityY;
diff --git a/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumX.h b/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumX.h
index 5d1cf9192..33e9c33ac 100644
--- a/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumX.h
+++ b/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumX.h
@@ -29,7 +29,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, Real
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
       Real tau;
       MeshFunctionType velocityX;
       MeshFunctionType velocityY;
@@ -101,7 +100,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, Real
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
       Real tau;
       MeshFunctionType velocityX;
       MeshFunctionType velocityY;
@@ -173,7 +171,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
       Real tau;
       MeshFunctionType velocityX;
       MeshFunctionType velocityY;
diff --git a/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumZ.h b/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumZ.h
index 0d7882f6d..63be36510 100644
--- a/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumZ.h
+++ b/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumZ.h
@@ -29,7 +29,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, Real
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
       Real tau;
       MeshFunctionType velocityX;
       MeshFunctionType velocityY;
@@ -101,7 +100,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, Real
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
       Real tau;
       MeshFunctionType velocityX;
       MeshFunctionType velocityY;
@@ -173,7 +171,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
       Real tau;
       MeshFunctionType velocityX;
       MeshFunctionType velocityY;
diff --git a/src/Examples/inviscid-flow/3d/eulerProblem.h b/src/Examples/inviscid-flow/3d/eulerProblem.h
index d2ff1fc10..429c9d948 100644
--- a/src/Examples/inviscid-flow/3d/eulerProblem.h
+++ b/src/Examples/inviscid-flow/3d/eulerProblem.h
@@ -45,8 +45,6 @@ class eulerProblem:
       typedef typename DifferentialOperator::VelocityX VelocityX;
       typedef typename DifferentialOperator::Pressure Pressure;
 
-      static String getTypeStatic();
-
       String getPrologHeader() const;
 
       void writeProlog( Logger& logger,
diff --git a/src/Examples/inviscid-flow/3d/eulerProblem_impl.h b/src/Examples/inviscid-flow/3d/eulerProblem_impl.h
index b01979b87..10953ebe2 100644
--- a/src/Examples/inviscid-flow/3d/eulerProblem_impl.h
+++ b/src/Examples/inviscid-flow/3d/eulerProblem_impl.h
@@ -14,17 +14,6 @@
 
 namespace TNL {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename DifferentialOperator >
-String
-eulerProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator >::
-getTypeStatic()
-{
-   return String( "eulerProblem< " ) + Mesh :: getTypeStatic() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/Examples/inviscid-flow/LaxFridrichsContinuity.h b/src/Examples/inviscid-flow/LaxFridrichsContinuity.h
index 0ae10b4f9..93e52f04e 100644
--- a/src/Examples/inviscid-flow/LaxFridrichsContinuity.h
+++ b/src/Examples/inviscid-flow/LaxFridrichsContinuity.h
@@ -38,14 +38,6 @@ class LaxFridrichsContinuityBase
 
       LaxFridrichsContinuityBase()
        : artificialViscosity( 1.0 ){};
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
 
       void setTau(const Real& tau)
       {
diff --git a/src/Examples/inviscid-flow/LaxFridrichsEnergy.h b/src/Examples/inviscid-flow/LaxFridrichsEnergy.h
index 8c6791cd1..a9bb4148c 100644
--- a/src/Examples/inviscid-flow/LaxFridrichsEnergy.h
+++ b/src/Examples/inviscid-flow/LaxFridrichsEnergy.h
@@ -36,14 +36,6 @@ class LaxFridrichsEnergyBase
       LaxFridrichsEnergyBase()
        : artificialViscosity( 1.0 ){};
 
-      static String getType()
-      {
-         return String( "LaxFridrichsEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/inviscid-flow/LaxFridrichsMomentumX.h b/src/Examples/inviscid-flow/LaxFridrichsMomentumX.h
index 63def12d3..b1877a2c4 100644
--- a/src/Examples/inviscid-flow/LaxFridrichsMomentumX.h
+++ b/src/Examples/inviscid-flow/LaxFridrichsMomentumX.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -119,14 +110,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -200,14 +183,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow/LaxFridrichsMomentumY.h b/src/Examples/inviscid-flow/LaxFridrichsMomentumY.h
index 8ce42282d..b4fe75691 100644
--- a/src/Examples/inviscid-flow/LaxFridrichsMomentumY.h
+++ b/src/Examples/inviscid-flow/LaxFridrichsMomentumY.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -189,14 +172,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow/LaxFridrichsMomentumZ.h b/src/Examples/inviscid-flow/LaxFridrichsMomentumZ.h
index a67e862ce..fe8539595 100644
--- a/src/Examples/inviscid-flow/LaxFridrichsMomentumZ.h
+++ b/src/Examples/inviscid-flow/LaxFridrichsMomentumZ.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -169,14 +152,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow/eulerProblem.h b/src/Examples/inviscid-flow/eulerProblem.h
index a854f8098..dfc7be559 100644
--- a/src/Examples/inviscid-flow/eulerProblem.h
+++ b/src/Examples/inviscid-flow/eulerProblem.h
@@ -56,8 +56,6 @@ class eulerProblem:
       typedef Pointers::SharedPointer<  BoundaryCondition > BoundaryConditionPointer;
       typedef Pointers::SharedPointer<  RightHandSide, DeviceType > RightHandSidePointer;
 
-      static String getType();
-
       String getPrologHeader() const;
 
       void writeProlog( Logger& logger,
diff --git a/src/Examples/inviscid-flow/eulerProblem_impl.h b/src/Examples/inviscid-flow/eulerProblem_impl.h
index fd64ae284..d203a16ad 100644
--- a/src/Examples/inviscid-flow/eulerProblem_impl.h
+++ b/src/Examples/inviscid-flow/eulerProblem_impl.h
@@ -31,18 +31,6 @@
 
 namespace TNL {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename Communicator,
-          typename InviscidOperators >
-String
-eulerProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, InviscidOperators >::
-getType()
-{
-   return String( "eulerProblem< " ) + Mesh :: getType() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/Examples/navier-stokes/navierStokesSetter_impl.h b/src/Examples/navier-stokes/navierStokesSetter_impl.h
index 5109284a6..a0369516c 100644
--- a/src/Examples/navier-stokes/navierStokesSetter_impl.h
+++ b/src/Examples/navier-stokes/navierStokesSetter_impl.h
@@ -29,7 +29,7 @@ template< typename MeshType, typename SolverStarter >
              typename IndexType >
 bool navierStokesSetter< MeshType, SolverStarter > :: run( const Config::ParameterContainer& parameters )
 {
-   std::cerr << "The solver is not implemented for the mesh " << MeshType::getType() << "." << std::endl;
+   std::cerr << "The solver is not implemented for the mesh " << getType< MeshType >() << "." << std::endl;
    return false;
 }
 
diff --git a/src/Examples/navier-stokes/navierStokesSolver.h b/src/Examples/navier-stokes/navierStokesSolver.h
index 262d9d480..c0166701a 100644
--- a/src/Examples/navier-stokes/navierStokesSolver.h
+++ b/src/Examples/navier-stokes/navierStokesSolver.h
@@ -55,8 +55,6 @@ class navierStokesSolver
 
    navierStokesSolver();
 
-   static String getType();
-
    String getPrologHeader() const;
 
    void writeProlog( Logger& logger,
diff --git a/src/Examples/navier-stokes/navierStokesSolver_impl.h b/src/Examples/navier-stokes/navierStokesSolver_impl.h
index d4120d380..a42c7b317 100644
--- a/src/Examples/navier-stokes/navierStokesSolver_impl.h
+++ b/src/Examples/navier-stokes/navierStokesSolver_impl.h
@@ -285,13 +285,6 @@ SolverMonitor*
    return &solverMonitor;
 }
 
-template< typename Mesh, typename EulerScheme >
-String navierStokesSolver< Mesh, EulerScheme > :: getType()
-{
-   return String( "navierStokesSolver< " ) +
-          Mesh :: getType() + " >";
-}
-
 template< typename Mesh, typename EulerScheme >
 String navierStokesSolver< Mesh, EulerScheme > :: getPrologHeader() const
 {
diff --git a/src/Examples/transport-equation/transportEquationProblem.h b/src/Examples/transport-equation/transportEquationProblem.h
index b6aa381d5..802100228 100644
--- a/src/Examples/transport-equation/transportEquationProblem.h
+++ b/src/Examples/transport-equation/transportEquationProblem.h
@@ -50,8 +50,6 @@ public PDEProblem< Mesh,
       using typename BaseType::MeshPointer;
       using typename BaseType::DofVectorType;
       using typename BaseType::DofVectorPointer;
-      
-      static String getType();
 
       String getPrologHeader() const;
 
diff --git a/src/Examples/transport-equation/transportEquationProblemEoc.h b/src/Examples/transport-equation/transportEquationProblemEoc.h
index 62f10e273..279af6006 100644
--- a/src/Examples/transport-equation/transportEquationProblemEoc.h
+++ b/src/Examples/transport-equation/transportEquationProblemEoc.h
@@ -49,8 +49,6 @@ public transportEquationProblem< Mesh, BoundaryCondition, RightHandSide, Communi
       using typename BaseType::DofVectorPointer;
       
       //using BaseType::getExplicitUpdate;
-      
-      static String getType();
 
       String getPrologHeader() const;
 
diff --git a/src/Examples/transport-equation/transportEquationProblemEoc_impl.h b/src/Examples/transport-equation/transportEquationProblemEoc_impl.h
index 0ac3af2d8..8de7eb9bc 100644
--- a/src/Examples/transport-equation/transportEquationProblemEoc_impl.h
+++ b/src/Examples/transport-equation/transportEquationProblemEoc_impl.h
@@ -23,18 +23,6 @@
 
 namespace TNL {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename Communicator,
-          typename DifferentialOperator >
-String
-transportEquationProblemEoc< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
-getType()
-{
-   return String( "transportEquationProblemEoc< " ) + Mesh :: getType() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/Examples/transport-equation/transportEquationProblem_impl.h b/src/Examples/transport-equation/transportEquationProblem_impl.h
index 7d83ceb09..96cf1a6ec 100644
--- a/src/Examples/transport-equation/transportEquationProblem_impl.h
+++ b/src/Examples/transport-equation/transportEquationProblem_impl.h
@@ -21,18 +21,6 @@
 
 namespace TNL {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename Communicator,
-          typename DifferentialOperator >
-String
-transportEquationProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
-getType()
-{
-   return String( "transportEquationProblem< " ) + Mesh :: getType() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/Python/pytnl/tnl/Array.h b/src/Python/pytnl/tnl/Array.h
index acebce3d2..6e19878de 100644
--- a/src/Python/pytnl/tnl/Array.h
+++ b/src/Python/pytnl/tnl/Array.h
@@ -15,8 +15,6 @@ void export_Array(py::module & m, const char* name)
     auto array = py::class_<ArrayType>(m, name, py::buffer_protocol())
         .def(py::init<>())
         .def(py::init<int>())
-        .def_static("getType",              &ArrayType::getType)
-        .def("getTypeVirtual",              &ArrayType::getTypeVirtual)
         .def_static("getSerializationType", &ArrayType::getSerializationType)
         .def("getSerializationTypeVirtual", &ArrayType::getSerializationTypeVirtual)
         .def("setSize", &ArrayType::setSize)
diff --git a/src/Python/pytnl/tnl/Grid.h b/src/Python/pytnl/tnl/Grid.h
index afc5b3974..8cf28a8f5 100644
--- a/src/Python/pytnl/tnl/Grid.h
+++ b/src/Python/pytnl/tnl/Grid.h
@@ -59,8 +59,6 @@ void export_Grid( py::module & m, const char* name )
     auto grid = py::class_<Grid, TNL::Object>( m, name )
         .def(py::init<>())
         .def_static("getMeshDimension", &Grid::getMeshDimension)
-        .def_static("getType",              &Grid::getType)
-        .def("getTypeVirtual",              &Grid::getTypeVirtual)
         .def_static("getSerializationType", &Grid::getSerializationType)
         .def("getSerializationTypeVirtual", &Grid::getSerializationTypeVirtual)
         // FIXME: number of parameters depends on the grid dimension
diff --git a/src/Python/pytnl/tnl/Mesh.h b/src/Python/pytnl/tnl/Mesh.h
index ee17a1348..c0207e243 100644
--- a/src/Python/pytnl/tnl/Mesh.h
+++ b/src/Python/pytnl/tnl/Mesh.h
@@ -112,8 +112,6 @@ void export_Mesh( py::module & m, const char* name )
     auto mesh = py::class_< Mesh, TNL::Object >( m, name )
         .def(py::init<>())
         .def_static("getMeshDimension", &Mesh::getMeshDimension)
-        .def_static("getType",              &Mesh::getType)
-        .def("getTypeVirtual",              &Mesh::getTypeVirtual)
         .def_static("getSerializationType", &Mesh::getSerializationType)
         .def("getSerializationTypeVirtual", &Mesh::getSerializationTypeVirtual)
         .def("getEntitiesCount", &mesh_getEntitiesCount< Mesh >)
diff --git a/src/Python/pytnl/tnl/SparseMatrix.h b/src/Python/pytnl/tnl/SparseMatrix.h
index 6788d1a68..1a32bd257 100644
--- a/src/Python/pytnl/tnl/SparseMatrix.h
+++ b/src/Python/pytnl/tnl/SparseMatrix.h
@@ -56,8 +56,6 @@ void export_Matrix( py::module & m, const char* name )
     auto matrix = py::class_< Matrix, TNL::Object >( m, name )
         .def(py::init<>())
         // overloads (defined in Object)
-        .def_static("getType",              &Matrix::getType)
-        .def("getTypeVirtual",              &Matrix::getTypeVirtual)
         .def_static("getSerializationType", &Matrix::getSerializationType)
         .def("getSerializationTypeVirtual", &Matrix::getSerializationTypeVirtual)
         .def("print", &Matrix::print)
diff --git a/src/Python/pytnl/tnl/StaticVector.h b/src/Python/pytnl/tnl/StaticVector.h
index 6b5570647..ba7cfcaf4 100644
--- a/src/Python/pytnl/tnl/StaticVector.h
+++ b/src/Python/pytnl/tnl/StaticVector.h
@@ -14,7 +14,6 @@ void export_StaticVector( Scope & scope, const char* name )
     auto vector = py::class_<VectorType>(scope, name)
         .def(py::init< RealType >())
         .def(py::init< VectorType >())
-        .def_static("getType", &VectorType::getType)
         .def("getSize", &VectorType::getSize)
         // operator=
         .def("assign", []( VectorType& vector, const VectorType& other ) -> VectorType& {
diff --git a/src/Python/pytnl/tnl/String.cpp b/src/Python/pytnl/tnl/String.cpp
index f9fff7d92..3203abda2 100644
--- a/src/Python/pytnl/tnl/String.cpp
+++ b/src/Python/pytnl/tnl/String.cpp
@@ -16,7 +16,6 @@ void export_String( py::module & m )
         .def(py::init<const char*, int, int>())
         .def(py::init([](int v){ return TNL::convertToString(v); }))
         .def(py::init([](double v){ return TNL::convertToString(v); }))
-        .def_static("getType", &TNL::String::getType)
         // __str__ (uses operator<<)
         // explicit namespace resolution is necessary, see http://stackoverflow.com/a/3084341/4180822
 //        .def(py::self_ns::str(py::self_ns::self))
diff --git a/src/Python/pytnl/tnl/Vector.h b/src/Python/pytnl/tnl/Vector.h
index 9fdac4072..475a53736 100644
--- a/src/Python/pytnl/tnl/Vector.h
+++ b/src/Python/pytnl/tnl/Vector.h
@@ -14,8 +14,6 @@ void export_Vector(py::module & m, const char* name)
     py::class_<VectorType, ArrayType>(m, name)
         .def(py::init<>())
         .def(py::init<int>())
-        .def_static("getType",              &VectorType::getType)
-        .def("getTypeVirtual",              &VectorType::getTypeVirtual)
         .def_static("getSerializationType", &VectorType::getSerializationType)
         .def("getSerializationTypeVirtual", &VectorType::getSerializationTypeVirtual)
         .def(py::self == py::self)
diff --git a/src/TNL/Atomic.h b/src/TNL/Atomic.h
index 67d54abe2..4855b8f90 100644
--- a/src/TNL/Atomic.h
+++ b/src/TNL/Atomic.h
@@ -16,7 +16,6 @@
 
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/param-types.h>
 
 namespace TNL {
 
@@ -48,14 +47,6 @@ public:
       return *this;
    }
 
-   // just for compatibility with TNL::Containers::Array...
-   static String getType()
-   {
-      return "Atomic< " +
-             TNL::getType< T >() + ", " +
-             Devices::Host::getType() + " >";
-   }
-
    // CAS loops for updating maximum and minimum
    // reference: https://stackoverflow.com/a/16190791
    T fetch_max( T value ) noexcept
@@ -120,14 +111,6 @@ public:
       return *this;
    }
 
-   // just for compatibility with TNL::Containers::Array...
-   static String getType()
-   {
-      return "Atomic< " +
-             TNL::getType< T >() + ", " +
-             Devices::Cuda::getType() + " >";
-   }
-
    bool is_lock_free() const noexcept
    {
       return true;
diff --git a/src/TNL/Config/ConfigDescription.h b/src/TNL/Config/ConfigDescription.h
index 06ed1ba8a..febad283c 100644
--- a/src/TNL/Config/ConfigDescription.h
+++ b/src/TNL/Config/ConfigDescription.h
@@ -17,7 +17,6 @@
 
 #include <TNL/Assert.h>
 #include <TNL/String.h>
-#include <TNL/param-types.h>
 #include <TNL/Config/ConfigEntryType.h>
 #include <TNL/Config/ConfigEntry.h>
 #include <TNL/Config/ConfigEntryList.h>
diff --git a/src/TNL/Config/ConfigEntry.h b/src/TNL/Config/ConfigEntry.h
index 1b56574cc..370366e5e 100644
--- a/src/TNL/Config/ConfigEntry.h
+++ b/src/TNL/Config/ConfigEntry.h
@@ -12,6 +12,7 @@
 
 #include <vector>
 
+#include <TNL/TypeInfo.h>
 #include <TNL/Config/ConfigEntryBase.h>
 
 namespace TNL {
diff --git a/src/TNL/Config/ConfigEntryList.h b/src/TNL/Config/ConfigEntryList.h
index 50284e37c..86f264234 100644
--- a/src/TNL/Config/ConfigEntryList.h
+++ b/src/TNL/Config/ConfigEntryList.h
@@ -12,6 +12,7 @@
 
 #include <vector>
 
+#include <TNL/TypeInfo.h>
 #include <TNL/Config/ConfigEntryBase.h>
 
 namespace TNL {
diff --git a/src/TNL/Config/ParameterContainer.h b/src/TNL/Config/ParameterContainer.h
index b8315d3ec..cceaf2afb 100644
--- a/src/TNL/Config/ParameterContainer.h
+++ b/src/TNL/Config/ParameterContainer.h
@@ -13,7 +13,8 @@
 #include <vector>
 #include <memory>
 
-#include <TNL/param-types.h>
+#include <TNL/TypeInfo.h>
+#include <TNL/String.h>
 //#include <TNL/Debugging/StackBacktrace.h>
 
 namespace TNL {
diff --git a/src/TNL/Config/parseCommandLine.h b/src/TNL/Config/parseCommandLine.h
index 34a555f28..3e2849290 100644
--- a/src/TNL/Config/parseCommandLine.h
+++ b/src/TNL/Config/parseCommandLine.h
@@ -13,7 +13,6 @@
 #include <cstring>
 #include <string>
 
-//#include <TNL/Object.h>
 #include <TNL/Config/ConfigDescription.h>
 #include <TNL/Config/ParameterContainer.h>
 
diff --git a/src/TNL/Containers/Algorithms/ArrayIO.h b/src/TNL/Containers/Algorithms/ArrayIO.h
index 922fc0ae8..5ec8b000b 100644
--- a/src/TNL/Containers/Algorithms/ArrayIO.h
+++ b/src/TNL/Containers/Algorithms/ArrayIO.h
@@ -34,9 +34,9 @@ struct ArrayIO< Value, Device, Index, true >
    static String getSerializationType()
    {
       return String( "Containers::Array< " ) +
-             TNL::getType< Value >() + ", " +
-             Devices::Host::getType() + ", " +
-             TNL::getType< Index >() + " >";
+             TNL::getSerializationType< Value >() + ", " +
+             TNL::getSerializationType< Devices::Host >() + ", " +
+             TNL::getSerializationType< Index >() + " >";
    }
 
    static void save( File& file,
@@ -80,9 +80,9 @@ struct ArrayIO< Value, Device, Index, false >
    static String getSerializationType()
    {
       return String( "Containers::Array< " ) +
-             TNL::getType< Value >() + ", " +
-             Devices::Host::getType() + ", " +
-             TNL::getType< Index >() + " >";
+             TNL::getSerializationType< Value >() + ", " +
+             TNL::getSerializationType< Devices::Host >() + ", " +
+             TNL::getSerializationType< Index >() + " >";
    }
 
    static void save( File& file,
diff --git a/src/TNL/Containers/Array.h b/src/TNL/Containers/Array.h
index d9a5e56d2..a67c8c1e5 100644
--- a/src/TNL/Containers/Array.h
+++ b/src/TNL/Containers/Array.h
@@ -226,16 +226,6 @@ class Array
        */
       AllocatorType getAllocator() const;
 
-      /**
-       * \brief Returns a \ref String representation of the array type in C++ style.
-       */
-      static String getType();
-
-      /**
-       * \brief Returns a \ref String representation of the array type in C++ style.
-       */
-      virtual String getTypeVirtual() const;
-
       /**
        * \brief Returns a \ref String representation of the array type in C++ style,
        * where device is always \ref Devices::Host.
diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp
index 94179c925..23909dd8c 100644
--- a/src/TNL/Containers/Array.hpp
+++ b/src/TNL/Containers/Array.hpp
@@ -15,7 +15,7 @@
 
 #include <TNL/Assert.h>
 #include <TNL/Math.h>
-#include <TNL/param-types.h>
+#include <TNL/TypeInfo.h>
 #include <TNL/Containers/Algorithms/ArrayOperations.h>
 #include <TNL/Containers/Algorithms/ArrayIO.h>
 #include <TNL/Containers/Algorithms/ArrayAssignment.h>
@@ -177,31 +177,6 @@ getAllocator() const
    return allocator;
 }
 
-template< typename Value,
-          typename Device,
-          typename Index,
-          typename Allocator >
-String
-Array< Value, Device, Index, Allocator >::
-getType()
-{
-   return String( "Containers::Array< " ) +
-          TNL::getType< Value >() + ", " +
-          Device::getType() + ", " +
-          TNL::getType< Index >() + " >";
-}
-
-template< typename Value,
-          typename Device,
-          typename Index,
-          typename Allocator >
-String
-Array< Value, Device, Index, Allocator >::
-getTypeVirtual() const
-{
-   return this->getType();
-}
-
 template< typename Value,
           typename Device,
           typename Index,
diff --git a/src/TNL/Containers/ArrayView.h b/src/TNL/Containers/ArrayView.h
index 066ada8f2..49a4a911d 100644
--- a/src/TNL/Containers/ArrayView.h
+++ b/src/TNL/Containers/ArrayView.h
@@ -100,11 +100,6 @@ public:
     */
    using ConstViewType = ArrayView< std::add_const_t< Value >, Device, Index >;
 
-   /**
-    * \brief Returns a \ref String representation of the array view type.
-    */
-   static String getType();
-
    /**
     * \brief Constructs an empty array view.
     *
diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp
index 05cf7e9e3..b37831004 100644
--- a/src/TNL/Containers/ArrayView.hpp
+++ b/src/TNL/Containers/ArrayView.hpp
@@ -13,7 +13,7 @@
 #include <iostream>
 #include <stdexcept>
 
-#include <TNL/param-types.h>
+#include <TNL/TypeInfo.h>
 #include <TNL/ParallelFor.h>
 #include <TNL/Containers/Algorithms/ArrayOperations.h>
 #include <TNL/Containers/Algorithms/ArrayIO.h>
@@ -24,19 +24,6 @@
 namespace TNL {
 namespace Containers {
 
-template< typename Value,
-          typename Device,
-          typename Index >
-String
-ArrayView< Value, Device, Index >::
-getType()
-{
-   return String( "Containers::ArrayView< " ) + ", " +
-                  TNL::getType< Value >() + ", " +
-                  Device::getType() + ", " +
-                  TNL::getType< Index >() + " >";
-}
-
 // explicit initialization by raw data pointer and size
 template< typename Value,
           typename Device,
diff --git a/src/TNL/Containers/DistributedArray.h b/src/TNL/Containers/DistributedArray.h
index 7f53c724a..f7b0c383a 100644
--- a/src/TNL/Containers/DistributedArray.h
+++ b/src/TNL/Containers/DistributedArray.h
@@ -83,13 +83,6 @@ public:
    void copyFromGlobal( ConstLocalViewType globalArray );
 
 
-   static String getType();
-
-   virtual String getTypeVirtual() const;
-
-   // TODO: no getSerializationType method until there is support for serialization
-
-
    // Usual Array methods follow below.
 
    /**
diff --git a/src/TNL/Containers/DistributedArray.hpp b/src/TNL/Containers/DistributedArray.hpp
index af5673a38..c0f7522ab 100644
--- a/src/TNL/Containers/DistributedArray.hpp
+++ b/src/TNL/Containers/DistributedArray.hpp
@@ -160,33 +160,6 @@ operator ConstViewType() const
    return getConstView();
 }
 
-template< typename Value,
-          typename Device,
-          typename Index,
-          typename Communicator >
-String
-DistributedArray< Value, Device, Index, Communicator >::
-getType()
-{
-   return String( "Containers::DistributedArray< " ) +
-          TNL::getType< Value >() + ", " +
-          Device::getType() + ", " +
-          TNL::getType< Index >() + ", " +
-          // TODO: communicators don't have a getType method
-          "<Communicator> >";
-}
-
-template< typename Value,
-          typename Device,
-          typename Index,
-          typename Communicator >
-String
-DistributedArray< Value, Device, Index, Communicator >::
-getTypeVirtual() const
-{
-   return getType();
-}
-
 template< typename Value,
           typename Device,
           typename Index,
diff --git a/src/TNL/Containers/DistributedArrayView.h b/src/TNL/Containers/DistributedArrayView.h
index 82a662e39..41557d784 100644
--- a/src/TNL/Containers/DistributedArrayView.h
+++ b/src/TNL/Containers/DistributedArrayView.h
@@ -108,9 +108,6 @@ public:
    void copyFromGlobal( ConstLocalViewType globalArray );
 
 
-   static String getType();
-
-
    /*
     * Usual ArrayView methods follow below.
     */
diff --git a/src/TNL/Containers/DistributedArrayView.hpp b/src/TNL/Containers/DistributedArrayView.hpp
index a861ee3a1..d92ae927d 100644
--- a/src/TNL/Containers/DistributedArrayView.hpp
+++ b/src/TNL/Containers/DistributedArrayView.hpp
@@ -184,23 +184,6 @@ copyFromGlobal( ConstLocalViewType globalArray )
 }
 
 
-template< typename Value,
-          typename Device,
-          typename Index,
-          typename Communicator >
-String
-DistributedArrayView< Value, Device, Index, Communicator >::
-getType()
-{
-   return String( "Containers::DistributedArrayView< " ) +
-          TNL::getType< Value >() + ", " +
-          Device::getType() + ", " +
-          TNL::getType< Index >() + ", " +
-          // TODO: communicators don't have a getType method
-          "<Communicator> >";
-}
-
-
 template< typename Value,
           typename Device,
           typename Index,
diff --git a/src/TNL/Containers/DistributedVector.h b/src/TNL/Containers/DistributedVector.h
index 51d7c537c..27ea91033 100644
--- a/src/TNL/Containers/DistributedVector.h
+++ b/src/TNL/Containers/DistributedVector.h
@@ -69,11 +69,6 @@ public:
    operator ConstViewType() const;
 
 
-   static String getType();
-
-   virtual String getTypeVirtual() const;
-
-
    /*
     * Usual Vector methods follow below.
     */
diff --git a/src/TNL/Containers/DistributedVector.hpp b/src/TNL/Containers/DistributedVector.hpp
index e6913b4e6..0820dd218 100644
--- a/src/TNL/Containers/DistributedVector.hpp
+++ b/src/TNL/Containers/DistributedVector.hpp
@@ -83,34 +83,6 @@ operator ConstViewType() const
 }
 
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename Communicator >
-String
-DistributedVector< Real, Device, Index, Communicator >::
-getType()
-{
-   return String( "Containers::DistributedVector< " ) +
-          TNL::getType< Real >() + ", " +
-          Device::getType() + ", " +
-          TNL::getType< Index >() + ", " +
-          // TODO: communicators don't have a getType method
-          "<Communicator> >";
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename Communicator >
-String
-DistributedVector< Real, Device, Index, Communicator >::
-getTypeVirtual() const
-{
-   return getType();
-}
-
-
 /*
  * Usual Vector methods follow below.
  */
diff --git a/src/TNL/Containers/DistributedVectorView.h b/src/TNL/Containers/DistributedVectorView.h
index 99764432d..cb23669ac 100644
--- a/src/TNL/Containers/DistributedVectorView.h
+++ b/src/TNL/Containers/DistributedVectorView.h
@@ -72,8 +72,6 @@ public:
    __cuda_callable__
    ConstViewType getConstView() const;
 
-   static String getType();
-
    /*
     * Usual Vector methods follow below.
     */
diff --git a/src/TNL/Containers/DistributedVectorView.hpp b/src/TNL/Containers/DistributedVectorView.hpp
index 7ecb4e370..9bc0045a5 100644
--- a/src/TNL/Containers/DistributedVectorView.hpp
+++ b/src/TNL/Containers/DistributedVectorView.hpp
@@ -64,22 +64,6 @@ getConstView() const
    return *this;
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename Communicator >
-String
-DistributedVectorView< Real, Device, Index, Communicator >::
-getType()
-{
-   return String( "Containers::DistributedVectorView< " ) +
-          TNL::getType< Real >() + ", " +
-          Device::getType() + ", " +
-          TNL::getType< Index >() + ", " +
-          // TODO: communicators don't have a getType method
-          "<Communicator> >";
-}
-
 
 /*
  * Usual Vector methods follow below.
diff --git a/src/TNL/Containers/List.h b/src/TNL/Containers/List.h
index 0cf6f762d..3558a1b2c 100644
--- a/src/TNL/Containers/List.h
+++ b/src/TNL/Containers/List.h
@@ -15,7 +15,7 @@
 #include <TNL/Assert.h>
 #include <TNL/File.h>
 #include <TNL/String.h>
-#include <TNL/param-types.h>
+#include <TNL/TypeInfo.h>
 
 namespace TNL {
 namespace Containers {
@@ -59,9 +59,6 @@ template< class T > class List
       /// Destroys the list. References to the values in the list become invalid.
       ~List();
 
-      /// Returns the type of list.
-      static String getType();
-
       /// Returns \e true if the list contains no items, otherwise returns \e false.
       bool isEmpty() const;
 
diff --git a/src/TNL/Containers/List_impl.h b/src/TNL/Containers/List_impl.h
index a8bcb8115..3068de315 100644
--- a/src/TNL/Containers/List_impl.h
+++ b/src/TNL/Containers/List_impl.h
@@ -35,12 +35,6 @@ List< T >::~List()
    reset();
 }
 
-template< typename T >
-String List< T >::getType()
-{
-   return String( "Containers::List< " ) + TNL::getType< T >() +  String( " >" );
-}
-
 template< typename T >
 bool List< T >::isEmpty() const
 {
diff --git a/src/TNL/Containers/Multimaps/EllpackIndexMultimap.h b/src/TNL/Containers/Multimaps/EllpackIndexMultimap.h
index 76e61846a..5e575cc21 100644
--- a/src/TNL/Containers/Multimaps/EllpackIndexMultimap.h
+++ b/src/TNL/Containers/Multimaps/EllpackIndexMultimap.h
@@ -52,10 +52,6 @@ class EllpackIndexMultimap
       template< typename Device_ >
       EllpackIndexMultimap& operator=( const EllpackIndexMultimap< Index, Device_, LocalIndex, SliceSize >& other );
 
-      static String getType();
-
-      String getTypeVirtual() const;
-
       void setKeysRange( const IndexType& keysRange );
 
       __cuda_callable__
diff --git a/src/TNL/Containers/Multimaps/EllpackIndexMultimap.hpp b/src/TNL/Containers/Multimaps/EllpackIndexMultimap.hpp
index 4b94c9cab..6fb1f4b26 100644
--- a/src/TNL/Containers/Multimaps/EllpackIndexMultimap.hpp
+++ b/src/TNL/Containers/Multimaps/EllpackIndexMultimap.hpp
@@ -43,34 +43,6 @@ operator=( const EllpackIndexMultimap< Index, Device_, LocalIndex, SliceSize >&
    return *this;
 }
 
-template< typename Index,
-          typename Device,
-          typename LocalIndex,
-          int SliceSize >
-String
-EllpackIndexMultimap< Index, Device, LocalIndex, SliceSize >::
-getType()
-{
-   return String( "EllpackIndexMultimap< ") +
-          String( TNL::getType< Index >() ) +
-          String( ", " ) +
-          Device::getType() +
-          String( ", " ) +
-          String( TNL::getType< LocalIndexType >() ) +
-          String( " >" );
-}
-
-template< typename Index,
-          typename Device,
-          typename LocalIndex,
-          int SliceSize >
-String
-EllpackIndexMultimap< Index, Device, LocalIndex, SliceSize >::
-getTypeVirtual() const
-{
-   return this->getType();
-}
-
 template< typename Index,
           typename Device,
           typename LocalIndex,
diff --git a/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.h b/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.h
index ba9994da9..f816cabd2 100644
--- a/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.h
+++ b/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.h
@@ -52,10 +52,6 @@ class StaticEllpackIndexMultimap
       template< typename Device_ >
       StaticEllpackIndexMultimap& operator=( const StaticEllpackIndexMultimap< ValuesCount, Index, Device_, LocalIndex, SliceSize >& other );
 
-      static String getType();
-
-      String getTypeVirtual() const;
-
       void setKeysRange( const IndexType& keysRange );
 
       __cuda_callable__
diff --git a/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.hpp b/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.hpp
index d1261ff8b..3e03f9e03 100644
--- a/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.hpp
+++ b/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.hpp
@@ -43,36 +43,6 @@ operator=( const StaticEllpackIndexMultimap< ValuesCount, Index, Device_, LocalI
    return *this;
 }
 
-template< int ValuesCount,
-          typename Index,
-          typename Device,
-          typename LocalIndex,
-          int SliceSize >
-String
-StaticEllpackIndexMultimap< ValuesCount, Index, Device, LocalIndex, SliceSize >::
-getType()
-{
-   return String( "StaticEllpackIndexMultimap< ") +
-          String( TNL::getType< Index >() ) +
-          String( ", " ) +
-          Device::getType() +
-          String( ", " ) +
-          String( TNL::getType< LocalIndexType >() ) +
-          String( " >" );
-}
-
-template< int ValuesCount,
-          typename Index,
-          typename Device,
-          typename LocalIndex,
-          int SliceSize >
-String
-StaticEllpackIndexMultimap< ValuesCount, Index, Device, LocalIndex, SliceSize >::
-getTypeVirtual() const
-{
-   return this->getType();
-}
-
 template< int ValuesCount,
           typename Index,
           typename Device,
diff --git a/src/TNL/Containers/StaticArray.h b/src/TNL/Containers/StaticArray.h
index 2421305a7..fa543b591 100644
--- a/src/TNL/Containers/StaticArray.h
+++ b/src/TNL/Containers/StaticArray.h
@@ -105,10 +105,6 @@ public:
    __cuda_callable__
    StaticArray( const Value& v1, const Value& v2, const Value& v3 );
 
-   /**
-    * \brief Gets type of this array.
-    */
-   static String getType();
 
    /**
     * \brief Gets pointer to data of this static array.
diff --git a/src/TNL/Containers/StaticArray.hpp b/src/TNL/Containers/StaticArray.hpp
index 89a66ecc9..9567590c0 100644
--- a/src/TNL/Containers/StaticArray.hpp
+++ b/src/TNL/Containers/StaticArray.hpp
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/param-types.h>
+#include <TNL/TypeInfo.h>
 #include <TNL/Math.h>
 #include <TNL/Containers/StaticArray.h>
 #include <TNL/Containers/Algorithms/StaticArrayAssignment.h>
@@ -146,16 +146,6 @@ StaticArray< Size, Value >::StaticArray( const Value& v1, const Value& v2, const
    data[ 2 ] = v3;
 }
 
-template< int Size, typename Value >
-String StaticArray< Size, Value >::getType()
-{
-   return String( "Containers::StaticArray< " ) +
-          convertToString( Size ) +
-          String( ", " ) +
-          TNL::getType< Value >() +
-          String( " >" );
-}
-
 template< int Size, typename Value >
 __cuda_callable__
 Value* StaticArray< Size, Value >::getData()
diff --git a/src/TNL/Containers/StaticVector.h b/src/TNL/Containers/StaticVector.h
index a15420d07..640fa45de 100644
--- a/src/TNL/Containers/StaticVector.h
+++ b/src/TNL/Containers/StaticVector.h
@@ -94,11 +94,6 @@ public:
    bool setup( const Config::ParameterContainer& parameters,
                const String& prefix = "" );
 
-   /**
-    * \brief Gets type of this vector.
-    */
-   static String getType();
-
    /**
     * \brief Assignment operator with a vector expression.
     * 
diff --git a/src/TNL/Containers/StaticVector.hpp b/src/TNL/Containers/StaticVector.hpp
index 8442db661..99088626d 100644
--- a/src/TNL/Containers/StaticVector.hpp
+++ b/src/TNL/Containers/StaticVector.hpp
@@ -50,16 +50,6 @@ StaticVector< Size, Real >::setup( const Config::ParameterContainer& parameters,
    return true;
 }
 
-template< int Size, typename Real >
-String StaticVector< Size, Real >::getType()
-{
-   return String( "Containers::StaticVector< " ) +
-          convertToString( Size ) +
-          String( ", " ) +
-          TNL::getType< Real >() +
-          String( " >" );
-}
-
 template< int Size, typename Real >
    template< typename VectorExpression >
 StaticVector< Size, Real >&
diff --git a/src/TNL/Containers/Subrange.h b/src/TNL/Containers/Subrange.h
index 089118553..17e02c45f 100644
--- a/src/TNL/Containers/Subrange.h
+++ b/src/TNL/Containers/Subrange.h
@@ -16,7 +16,7 @@
 
 #include <TNL/Assert.h>
 #include <TNL/String.h>
-#include <TNL/param-types.h>
+#include <TNL/TypeInfo.h>
 
 namespace TNL {
 namespace Containers {
@@ -54,11 +54,6 @@ public:
       end = 0;
    }
 
-   static String getType()
-   {
-      return "Subrange< " + TNL::getType< Index >() + " >";
-   }
-
    // Checks if a global index is in the set of local indices.
    __cuda_callable__
    bool isLocal( Index i ) const
@@ -127,7 +122,7 @@ protected:
 template< typename Index >
 std::ostream& operator<<( std::ostream& str, const Subrange< Index >& range )
 {
-   return str << Subrange< Index >::getType() << "( " << range.getBegin() << ", " << range.getEnd() << " )";
+   return str << getType< Subrange< Index > >() << "( " << range.getBegin() << ", " << range.getEnd() << " )";
 }
 
 } // namespace Containers
diff --git a/src/TNL/Containers/Vector.h b/src/TNL/Containers/Vector.h
index 42f846597..ba67df733 100644
--- a/src/TNL/Containers/Vector.h
+++ b/src/TNL/Containers/Vector.h
@@ -121,16 +121,6 @@ public:
     */
    Vector& operator=( Vector&& ) = default;
 
-   /**
-    * \brief Returns a \ref String representation of the vector type in C++ style.
-    */
-   static String getType();
-
-   /**
-    * \brief Returns a \ref String representation of the vector type in C++ style.
-    */
-   virtual String getTypeVirtual() const;
-
    /**
     * \brief Returns a modifiable view of the vector.
     *
diff --git a/src/TNL/Containers/Vector.hpp b/src/TNL/Containers/Vector.hpp
index 12a699cb3..e01bdf75e 100644
--- a/src/TNL/Containers/Vector.hpp
+++ b/src/TNL/Containers/Vector.hpp
@@ -27,31 +27,6 @@ Vector( const Vector& vector,
 {
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename Allocator >
-String
-Vector< Real, Device, Index, Allocator >::
-getType()
-{
-   return String( "Containers::Vector< " ) +
-                  TNL::getType< Real >() + ", " +
-                  Device::getType() + ", " +
-                  TNL::getType< Index >() + " >";
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename Allocator >
-String
-Vector< Real, Device, Index, Allocator >::
-getTypeVirtual() const
-{
-   return this->getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/TNL/Containers/VectorView.h b/src/TNL/Containers/VectorView.h
index ba43e74d8..f111a14be 100644
--- a/src/TNL/Containers/VectorView.h
+++ b/src/TNL/Containers/VectorView.h
@@ -97,11 +97,6 @@ public:
    VectorView( const ArrayView< Real_, Device, Index >& view )
    : BaseType( view ) {}
 
-   /**
-    * \brief Returns a \ref String representation of the vector view type.
-    */
-   static String getType();
-
    /**
     * \brief Returns a modifiable view of the vector view.
     *
diff --git a/src/TNL/Containers/VectorView.hpp b/src/TNL/Containers/VectorView.hpp
index 9985594a6..40d870f62 100644
--- a/src/TNL/Containers/VectorView.hpp
+++ b/src/TNL/Containers/VectorView.hpp
@@ -17,19 +17,6 @@
 namespace TNL {
 namespace Containers {
 
-template< typename Real,
-          typename Device,
-          typename Index >
-String
-VectorView< Real, Device, Index >::
-getType()
-{
-   return String( "Containers::VectorView< " ) +
-                  TNL::getType< Real >() + ", " +
-                  Device::getType() + ", " +
-                  TNL::getType< Index >() + " >";
-}
-
 template< typename Real,
           typename Device,
           typename Index >
diff --git a/src/TNL/Devices/Cuda.h b/src/TNL/Devices/Cuda.h
index 9ed546c45..122c466c9 100644
--- a/src/TNL/Devices/Cuda.h
+++ b/src/TNL/Devices/Cuda.h
@@ -26,8 +26,6 @@ namespace Devices {
 class Cuda
 {
 public:
-   static inline String getType();
-
    static inline void configSetup( Config::ConfigDescription& config, const String& prefix = "" );
 
    static inline bool setup( const Config::ParameterContainer& parameters,
diff --git a/src/TNL/Devices/Cuda_impl.h b/src/TNL/Devices/Cuda_impl.h
index 6703ec0b6..b758584bd 100644
--- a/src/TNL/Devices/Cuda_impl.h
+++ b/src/TNL/Devices/Cuda_impl.h
@@ -21,11 +21,6 @@
 namespace TNL {
 namespace Devices {
 
-inline String Cuda::getType()
-{
-   return String( "Devices::Cuda" );
-}
-
 inline void
 Cuda::configSetup( Config::ConfigDescription& config,
                    const String& prefix )
diff --git a/src/TNL/Devices/Host.h b/src/TNL/Devices/Host.h
index b48ee98bc..115607583 100644
--- a/src/TNL/Devices/Host.h
+++ b/src/TNL/Devices/Host.h
@@ -25,11 +25,6 @@ namespace Devices {
 class Host
 {
 public:
-   static String getType()
-   {
-      return String( "Devices::Host" );
-   }
-
    static void disableOMP()
    {
       ompEnabled() = false;
diff --git a/src/TNL/Experimental/Arithmetics/Quad.h b/src/TNL/Experimental/Arithmetics/Quad.h
index 13d9c8231..3c1dd073f 100644
--- a/src/TNL/Experimental/Arithmetics/Quad.h
+++ b/src/TNL/Experimental/Arithmetics/Quad.h
@@ -33,8 +33,6 @@ public:
     explicit Quad(const T&);
     explicit Quad(int);
     Quad(const Quad<T>&);
-    
-    static String getType();
 
     /*OVERLOADED OPERATORS*/
     T& operator[](int);
diff --git a/src/TNL/Experimental/Arithmetics/Quad_impl.h b/src/TNL/Experimental/Arithmetics/Quad_impl.h
index 63c08a401..3a2ecb245 100644
--- a/src/TNL/Experimental/Arithmetics/Quad_impl.h
+++ b/src/TNL/Experimental/Arithmetics/Quad_impl.h
@@ -56,14 +56,6 @@ Quad<T>::Quad(const Quad<T>& other) {
     data[3] = other[3];
 }
 
-template <class T>
-String
-Quad< T >::
-getType()
-{
-   return String( "Quad< " + getType< T >() + " >" );
-}
-
 template <class T>
 T& Quad<T>::operator [](int idx) {
     return data[idx];
diff --git a/src/TNL/Functions/Analytic/Blob.h b/src/TNL/Functions/Analytic/Blob.h
index e12a27393..5a95257cd 100644
--- a/src/TNL/Functions/Analytic/Blob.h
+++ b/src/TNL/Functions/Analytic/Blob.h
@@ -50,8 +50,6 @@ class Blob< 1, Real > : public BlobBase< Real, 1 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Blob();
 
       template< int XDiffOrder = 0,
@@ -75,8 +73,6 @@ class Blob< 2, Real > : public BlobBase< Real, 2 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Blob();
 
       template< int XDiffOrder = 0,
@@ -101,8 +97,6 @@ class Blob< 3, Real > : public BlobBase< Real, 3 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Blob();
 
       template< int XDiffOrder = 0,
diff --git a/src/TNL/Functions/Analytic/Blob_impl.h b/src/TNL/Functions/Analytic/Blob_impl.h
index f615a10dd..f5195f758 100644
--- a/src/TNL/Functions/Analytic/Blob_impl.h
+++ b/src/TNL/Functions/Analytic/Blob_impl.h
@@ -32,13 +32,6 @@ setup( const Config::ParameterContainer& parameters,
  * 1D
  */
 
-template< typename Real >
-String
-Blob< 1, Real >::getType()
-{
-   return "Functions::Analytic::Blob< 1, " + TNL::getType< Real >() + String( " >" );
-}
-
 template< typename Real >
 Blob< 1, Real >::Blob()
 {
@@ -75,13 +68,6 @@ operator()( const PointType& v,
 /****
  * 2D
  */
-template< typename Real >
-String
-Blob< 2, Real >::getType()
-{
-   return String( "Functions::Analytic::Blob< 2, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 Blob< 2, Real >::Blob()
 {
@@ -119,13 +105,6 @@ operator()( const PointType& v,
 /****
  * 3D
  */
-template< typename Real >
-String
-Blob< 3, Real >::getType()
-{
-   return String( "Functions::Analytic::Blob< 3, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 Blob< 3, Real >::Blob()
 {
diff --git a/src/TNL/Functions/Analytic/Cylinder.h b/src/TNL/Functions/Analytic/Cylinder.h
index fb3f0542c..8b8ab1983 100644
--- a/src/TNL/Functions/Analytic/Cylinder.h
+++ b/src/TNL/Functions/Analytic/Cylinder.h
@@ -54,8 +54,6 @@ class Cylinder< 1, Real > : public CylinderBase< Real, 1 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Cylinder();
 
       template< int XDiffOrder = 0,
@@ -81,8 +79,6 @@ class Cylinder< 2, Real > : public CylinderBase< Real, 2 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Cylinder();
 
       template< int XDiffOrder = 0,
@@ -108,8 +104,6 @@ class Cylinder< 3, Real > : public CylinderBase< Real, 3 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Cylinder();
 
       template< int XDiffOrder = 0,
diff --git a/src/TNL/Functions/Analytic/Cylinder_impl.h b/src/TNL/Functions/Analytic/Cylinder_impl.h
index b0698bca3..b76286580 100644
--- a/src/TNL/Functions/Analytic/Cylinder_impl.h
+++ b/src/TNL/Functions/Analytic/Cylinder_impl.h
@@ -47,13 +47,6 @@ const Real& CylinderBase< Real, Dimension >::getDiameter() const
  * 1D
  */
 
-template< typename Real >
-String
-Cylinder< 1, Real >::getType()
-{
-   return "Functions::Analytic::Cylinder< 1, " + TNL::getType< Real >() + String( " >" );
-}
-
 template< typename Real >
 Cylinder< 1, Real >::Cylinder()
 {
@@ -91,13 +84,6 @@ operator()( const PointType& v,
  * 2D
  */
 
-template< typename Real >
-String
-Cylinder< 2, Real >::getType()
-{
-   return String( "Functions::Analytic::Cylinder< 2, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 Cylinder< 2, Real >::Cylinder()
 {
@@ -137,14 +123,6 @@ operator()( const PointType& v,
 /****
  * 3D
  */
-
-template< typename Real >
-String
-Cylinder< 3, Real >::getType()
-{
-   return String( "Functions::Analytic::Cylinder< 3, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 Cylinder< 3, Real >::Cylinder()
 {
diff --git a/src/TNL/Functions/Analytic/ExpBump.h b/src/TNL/Functions/Analytic/ExpBump.h
index 36b07c9a2..48fc613d9 100644
--- a/src/TNL/Functions/Analytic/ExpBump.h
+++ b/src/TNL/Functions/Analytic/ExpBump.h
@@ -58,8 +58,6 @@ class ExpBump< 1, Real > : public ExpBumpBase< 1, Real >
       typedef Real RealType;
       typedef Containers::StaticVector< 1, RealType > PointType;
 
-      static String getType();
-
       ExpBump();
 
       template< int XDiffOrder = 0,
@@ -82,8 +80,6 @@ class ExpBump< 2, Real > : public ExpBumpBase< 2, Real >
       typedef Real RealType;
       typedef Containers::StaticVector< 2, RealType > PointType;
 
-      static String getType();
-
       ExpBump();
 
       template< int XDiffOrder = 0,
@@ -106,9 +102,6 @@ class ExpBump< 3, Real > : public ExpBumpBase< 3, Real >
       typedef Real RealType;
       typedef Containers::StaticVector< 3, RealType > PointType;
 
- 
-      static String getType();
-
       ExpBump();
 
       template< int XDiffOrder = 0,
diff --git a/src/TNL/Functions/Analytic/ExpBump_impl.h b/src/TNL/Functions/Analytic/ExpBump_impl.h
index 54ecbe2a6..6c1103f02 100644
--- a/src/TNL/Functions/Analytic/ExpBump_impl.h
+++ b/src/TNL/Functions/Analytic/ExpBump_impl.h
@@ -63,13 +63,6 @@ const Real& ExpBumpBase< dimensions, Real >::getSigma() const
  * 1D
  */
 
-template< typename Real >
-String
-ExpBump< 1, Real >::getType()
-{
-   return "Functions::Analytic::ExpBump< 1, " + TNL::getType< Real >() + String( " >" );
-}
-
 template< typename Real >
 ExpBump< 1, Real >::ExpBump()
 {
@@ -113,13 +106,6 @@ operator()( const PointType& v,
  * 2D
  */
 
-template< typename Real >
-String
-ExpBump< 2, Real >::getType()
-{
-   return String( "Functions::Analytic::ExpBump< 2, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 ExpBump< 2, Real >::ExpBump()
 {
@@ -168,13 +154,6 @@ operator()( const PointType& v,
  * 3D
  */
 
-template< typename Real >
-String
-ExpBump< 3, Real >::getType()
-{
-   return String( "Functions::Analytic::ExpBump< 3, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 ExpBump< 3, Real >::ExpBump()
 {
diff --git a/src/TNL/Functions/Analytic/Flowerpot.h b/src/TNL/Functions/Analytic/Flowerpot.h
index 5a42c5f94..f33d32b18 100644
--- a/src/TNL/Functions/Analytic/Flowerpot.h
+++ b/src/TNL/Functions/Analytic/Flowerpot.h
@@ -54,8 +54,6 @@ class Flowerpot< 1, Real > : public FlowerpotBase< Real, 1 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Flowerpot();
 
       template< int XDiffOrder = 0,
@@ -81,8 +79,6 @@ class Flowerpot< 2, Real > : public FlowerpotBase< Real, 2 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Flowerpot();
 
       template< int XDiffOrder = 0,
@@ -108,8 +104,6 @@ class Flowerpot< 3, Real > : public FlowerpotBase< Real, 3 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Flowerpot();
 
       template< int XDiffOrder = 0,
diff --git a/src/TNL/Functions/Analytic/Flowerpot_impl.h b/src/TNL/Functions/Analytic/Flowerpot_impl.h
index 455b4682b..6769a794b 100644
--- a/src/TNL/Functions/Analytic/Flowerpot_impl.h
+++ b/src/TNL/Functions/Analytic/Flowerpot_impl.h
@@ -45,13 +45,6 @@ const Real& FlowerpotBase< Real, Dimension >::getDiameter() const
  * 1D
  */
 
-template< typename Real >
-String
-Flowerpot< 1, Real >::getType()
-{
-   return "Functions::Analytic::Flowerpot< 1, " + TNL::getType< Real >() + String( " >" );
-}
-
 template< typename Real >
 Flowerpot< 1, Real >::Flowerpot()
 {
@@ -89,13 +82,6 @@ operator()( const PointType& v,
 /****
  * 2D
  */
-template< typename Real >
-String
-Flowerpot< 2, Real >::getType()
-{
-   return String( "Functions::Analytic::Flowerpot< 2, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 Flowerpot< 2, Real >::Flowerpot()
 {
@@ -136,13 +122,6 @@ operator()( const PointType& v,
  * 3D
  */
 
-template< typename Real >
-String
-Flowerpot< 3, Real >::getType()
-{
-   return String( "Functions::Analytic::Flowerpot< 3, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 Flowerpot< 3, Real >::Flowerpot()
 {
diff --git a/src/TNL/Functions/Analytic/PseudoSquare.h b/src/TNL/Functions/Analytic/PseudoSquare.h
index ea4a5ae84..1139f6ed8 100644
--- a/src/TNL/Functions/Analytic/PseudoSquare.h
+++ b/src/TNL/Functions/Analytic/PseudoSquare.h
@@ -50,8 +50,6 @@ class PseudoSquare< 1, Real > : public PseudoSquareBase< Real, 1 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       PseudoSquare();
 
       template< int XDiffOrder = 0,
@@ -75,8 +73,6 @@ class PseudoSquare< 2, Real > : public PseudoSquareBase< Real, 2 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       PseudoSquare();
 
       template< int XDiffOrder = 0,
@@ -100,8 +96,6 @@ class PseudoSquare< 3, Real > : public PseudoSquareBase< Real, 3 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       PseudoSquare();
 
       template< int XDiffOrder = 0,
diff --git a/src/TNL/Functions/Analytic/PseudoSquare_impl.h b/src/TNL/Functions/Analytic/PseudoSquare_impl.h
index 5da33707a..18edb0d34 100644
--- a/src/TNL/Functions/Analytic/PseudoSquare_impl.h
+++ b/src/TNL/Functions/Analytic/PseudoSquare_impl.h
@@ -33,13 +33,6 @@ setup( const Config::ParameterContainer& parameters,
  * 1D
  */
 
-template< typename Real >
-String
-PseudoSquare< 1, Real >::getType()
-{
-   return "Functions::Analytic::PseudoSquare< 1, " + TNL::getType< Real >() + String( " >" );
-}
-
 template< typename Real >
 PseudoSquare< 1, Real >::PseudoSquare()
 {
@@ -76,13 +69,6 @@ operator()( const PointType& v,
 /****
  * 2D
  */
-template< typename Real >
-String
-PseudoSquare< 2, Real >::getType()
-{
-   return String( "Functions::Analytic::PseudoSquare< 2, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 PseudoSquare< 2, Real >::PseudoSquare()
 {
@@ -120,13 +106,6 @@ operator()( const PointType& v,
 /****
  * 3D
  */
-template< typename Real >
-String
-PseudoSquare< 3, Real >::getType()
-{
-   return String( "Functions::Analytic::PseudoSquare< 3, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 PseudoSquare< 3, Real >::PseudoSquare()
 {
diff --git a/src/TNL/Functions/Analytic/Twins.h b/src/TNL/Functions/Analytic/Twins.h
index c882ec4eb..775caf391 100644
--- a/src/TNL/Functions/Analytic/Twins.h
+++ b/src/TNL/Functions/Analytic/Twins.h
@@ -46,8 +46,6 @@ class Twins< 1, Real > : public TwinsBase< Real, 1 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Twins();
 
       template< int XDiffOrder = 0,
@@ -73,8 +71,6 @@ class Twins< 2, Real > : public TwinsBase< Real, 2 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Twins();
 
       template< int XDiffOrder = 0,
@@ -100,8 +96,6 @@ class Twins< 3, Real > : public TwinsBase< Real, 3 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Twins();
 
       template< int XDiffOrder = 0,
diff --git a/src/TNL/Functions/Analytic/Twins_impl.h b/src/TNL/Functions/Analytic/Twins_impl.h
index 9e1cd81c1..7b2ce41c7 100644
--- a/src/TNL/Functions/Analytic/Twins_impl.h
+++ b/src/TNL/Functions/Analytic/Twins_impl.h
@@ -31,13 +31,6 @@ setup( const Config::ParameterContainer& parameters,
  * 1D
  */
 
-template< typename Real >
-String
-Twins< 1, Real >::getType()
-{
-   return "Functions::Analytic::Twins< 1, " + TNL::getType< Real >() + String( " >" );
-}
-
 template< typename Real >
 Twins< 1, Real >::Twins()
 {
@@ -75,13 +68,6 @@ operator()( const PointType& v,
 /****
  * 2D
  */
-template< typename Real >
-String
-Twins< 2, Real >::getType()
-{
-   return String( "Functions::Analytic::Twins< 2, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 Twins< 2, Real >::Twins()
 {
@@ -121,13 +107,6 @@ operator()( const PointType& v,
 /****
  * 3D
  */
-template< typename Real >
-String
-Twins< 3, Real >::getType()
-{
-   return String( "Functions::Analytic::Twins< 3, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 Twins< 3, Real >::Twins()
 {
diff --git a/src/TNL/Functions/Analytic/VectorNorm.h b/src/TNL/Functions/Analytic/VectorNorm.h
index a9d292c5f..583f3eebc 100644
--- a/src/TNL/Functions/Analytic/VectorNorm.h
+++ b/src/TNL/Functions/Analytic/VectorNorm.h
@@ -144,8 +144,6 @@ class VectorNorm< 1, Real > : public VectorNormBase< 1, Real >
       using typename BaseType::RealType;
       using typename BaseType::PointType;
 
-      static String getType();
-
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
@@ -184,8 +182,6 @@ class VectorNorm< 2, Real > : public VectorNormBase< 2, Real >
       using typename BaseType::RealType;
       using typename BaseType::PointType;
 
-      static String getType();
-
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
@@ -232,8 +228,6 @@ class VectorNorm< 3, Real > : public VectorNormBase< 3, Real >
       using typename BaseType::RealType;
       using typename BaseType::PointType;
 
-      static String getType();
-
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
diff --git a/src/TNL/Functions/MeshFunction.h b/src/TNL/Functions/MeshFunction.h
index f7d6749c9..2b7069c0f 100644
--- a/src/TNL/Functions/MeshFunction.h
+++ b/src/TNL/Functions/MeshFunction.h
@@ -60,10 +60,6 @@ class MeshFunction :
                     Pointers::SharedPointer<  Vector >& data,
                     const IndexType& offset = 0 );
 
-      static String getType();
-
-      String getTypeVirtual() const;
-
       static String getSerializationType();
 
       virtual String getSerializationTypeVirtual() const;
diff --git a/src/TNL/Functions/MeshFunction_impl.h b/src/TNL/Functions/MeshFunction_impl.h
index 908a31a09..0131cbb25 100644
--- a/src/TNL/Functions/MeshFunction_impl.h
+++ b/src/TNL/Functions/MeshFunction_impl.h
@@ -92,30 +92,6 @@ MeshFunction( const MeshPointer& meshPointer,
    this->data.bind( *data, offset, getMesh().template getEntitiesCount< typename Mesh::template EntityType< MeshEntityDimension > >() );
 }
 
-template< typename Mesh,
-          int MeshEntityDimension,
-          typename Real >
-String
-MeshFunction< Mesh, MeshEntityDimension, Real >::
-getType()
-{
-   return String( "Functions::MeshFunction< " ) +
-                     Mesh::getType() + ", " +
-                     convertToString( MeshEntityDimension ) + ", " +
-                    TNL::getType< Real >() +
-                     " >";
-};
-
-template< typename Mesh,
-          int MeshEntityDimension,
-          typename Real >
-String
-MeshFunction< Mesh, MeshEntityDimension, Real >::
-getTypeVirtual() const
-{
-   return this->getType();
-};
-
 template< typename Mesh,
           int MeshEntityDimension,
           typename Real >
@@ -124,10 +100,10 @@ MeshFunction< Mesh, MeshEntityDimension, Real >::
 getSerializationType()
 {
    return String( "Functions::MeshFunction< " ) +
-                     Mesh::getSerializationType() + ", " +
-                     convertToString( MeshEntityDimension ) + ", " +
-                    TNL::getType< Real >() +
-                     " >";
+          TNL::getSerializationType< Mesh >() + ", " +
+          convertToString( MeshEntityDimension ) + ", " +
+          getType< Real >() +
+          " >";
 };
 
 template< typename Mesh,
diff --git a/src/TNL/Functions/VectorField.h b/src/TNL/Functions/VectorField.h
index 4db601c9f..4f06cd368 100644
--- a/src/TNL/Functions/VectorField.h
+++ b/src/TNL/Functions/VectorField.h
@@ -113,19 +113,6 @@ class VectorField< Size, MeshFunction< Mesh, MeshEntityDimension, Real > >
             this->vectorField[ i ]->setMesh( meshPointer );
       };
       
-      static String getType()
-      {
-         return String( "Functions::VectorField< " ) +
-                  convertToString( Size) + ", " +
-                 FunctionType::getType() +
-                  " >";
-      }
- 
-      String getTypeVirtual() const
-      {
-         return this->getType();
-      }
- 
       static String getSerializationType()
       {
          return String( "Functions::VectorField< " ) +
diff --git a/src/TNL/Functions/VectorFieldGnuplotWriter_impl.h b/src/TNL/Functions/VectorFieldGnuplotWriter_impl.h
index ebda5972a..151ad5e7b 100644
--- a/src/TNL/Functions/VectorFieldGnuplotWriter_impl.h
+++ b/src/TNL/Functions/VectorFieldGnuplotWriter_impl.h
@@ -23,7 +23,7 @@ write( const VectorField& vectorField,
        std::ostream& str,
        const double& scale  )
 {
-   std::cerr << "Gnuplot writer for mesh vectorFields defined on mesh type " << VectorField::MeshType::getType() << " is not (yet) implemented." << std::endl;
+   std::cerr << "Gnuplot writer for mesh vectorFields defined on mesh type " << getType< typename VectorField::MeshType >() << " is not (yet) implemented." << std::endl;
    return false;
 }
 
diff --git a/src/TNL/Images/DicomSeries.h b/src/TNL/Images/DicomSeries.h
index 36e626ab6..50355bc34 100644
--- a/src/TNL/Images/DicomSeries.h
+++ b/src/TNL/Images/DicomSeries.h
@@ -17,7 +17,7 @@
 #include <TNL/Containers/Array.h>
 #include <TNL/Containers/List.h>
 #include <TNL/String.h>
-#include <TNL/param-types.h>
+#include <TNL/TypeInfo.h>
 #include <TNL/Images//Image.h>
 #include <TNL/Images//DicomHeader.h>
 #include <TNL/Images//RegionOfInterest.h>
@@ -33,10 +33,7 @@
 #include <string>
 
 namespace TNL {
-
-template<> inline String getType< Images::DicomHeader * > () { return String( "DicomHeader *" ); }
-
-namespace Images {   
+namespace Images {
 
 struct WindowCenterWidth
 {
diff --git a/src/TNL/Logger.h b/src/TNL/Logger.h
index d1f6c5c67..efcbbb3b5 100644
--- a/src/TNL/Logger.h
+++ b/src/TNL/Logger.h
@@ -12,6 +12,7 @@
 
 #include <ostream>
 
+#include <TNL/String.h>
 #include <TNL/Config/ParameterContainer.h>
 
 namespace TNL {
diff --git a/src/TNL/Matrices/AdEllpack.h b/src/TNL/Matrices/AdEllpack.h
index dd6618d5e..200b54293 100644
--- a/src/TNL/Matrices/AdEllpack.h
+++ b/src/TNL/Matrices/AdEllpack.h
@@ -89,10 +89,6 @@ public:
 
     AdEllpack();
 
-    static String getType();
-
-    String getTypeVirtual() const;
-
     void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
 
     IndexType getWarp( const IndexType row ) const;
diff --git a/src/TNL/Matrices/AdEllpack_impl.h b/src/TNL/Matrices/AdEllpack_impl.h
index d69f6c4da..e754eca68 100644
--- a/src/TNL/Matrices/AdEllpack_impl.h
+++ b/src/TNL/Matrices/AdEllpack_impl.h
@@ -11,7 +11,7 @@
 #include <TNL/Matrices/AdEllpack.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Math.h>
-#include <TNL/param-types.h>
+#include <TNL/TypeInfo.h>
 
 #pragma once
 
@@ -157,26 +157,6 @@ AdEllpack< Real, Device, Index >::AdEllpack()
 warpSize( 32 )
 {}
 
-template< typename Real,
-          typename Device,
-          typename Index >
-String AdEllpack< Real, Device, Index >::getTypeVirtual() const
-{
-    return this->getType();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String AdEllpack< Real, Device, Index >::getType()
-{
-    return String( "AdEllpack< ") +
-           String( TNL::getType< Real >() ) +
-           String( ", " ) +
-           Device::getType() +
-           String( " >" );
-}
-
 template< typename Real,
           typename Device,
           typename Index >
diff --git a/src/TNL/Matrices/BiEllpack.h b/src/TNL/Matrices/BiEllpack.h
index 08bb53666..f3dcc366e 100644
--- a/src/TNL/Matrices/BiEllpack.h
+++ b/src/TNL/Matrices/BiEllpack.h
@@ -44,10 +44,6 @@ public:
 
 	BiEllpack();
 
-	static String getType();
-
-	String getTypeVirtual() const;
-
 	void setDimensions( const IndexType rows,
 	                    const IndexType columns );
 
diff --git a/src/TNL/Matrices/BiEllpackSymmetric.h b/src/TNL/Matrices/BiEllpackSymmetric.h
index 0d2ae9f1e..22f39cbb2 100644
--- a/src/TNL/Matrices/BiEllpackSymmetric.h
+++ b/src/TNL/Matrices/BiEllpackSymmetric.h
@@ -35,10 +35,6 @@ public:
 
 	BiEllpackSymmetric();
 
-	static String getType();
-
-	String getTypeVirtual() const;
-
 	void setDimensions( const IndexType rows, const IndexType columns );
 
    void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
diff --git a/src/TNL/Matrices/BiEllpackSymmetric_impl.h b/src/TNL/Matrices/BiEllpackSymmetric_impl.h
index b30cd4d4e..47b342828 100644
--- a/src/TNL/Matrices/BiEllpackSymmetric_impl.h
+++ b/src/TNL/Matrices/BiEllpackSymmetric_impl.h
@@ -45,28 +45,6 @@ BiEllpackSymmetric< Real, Device, Index, StripSize >::BiEllpackSymmetric()
   logWarpSize( 5 )
 {}
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          int StripSize >
-String BiEllpackSymmetric< Real, Device, Index, StripSize >::getType()
-{
-    return String( "BiEllpackMatrix< ") +
-           String( TNL::getType< Real >() ) +
-           String( ", " ) +
-           Device::getType() +
-           String( " >" );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          int StripSize >
-String BiEllpackSymmetric< Real, Device, Index, StripSize >::getTypeVirtual() const
-{
-    return this->getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/TNL/Matrices/BiEllpack_impl.h b/src/TNL/Matrices/BiEllpack_impl.h
index 441f040cf..ac0285361 100644
--- a/src/TNL/Matrices/BiEllpack_impl.h
+++ b/src/TNL/Matrices/BiEllpack_impl.h
@@ -47,28 +47,6 @@ BiEllpack< Real, Device, Index, StripSize >::BiEllpack()
   logWarpSize( 5 )
 {}
 
-template< typename Real,
-	  typename Device,
-	  typename Index,
-	  int StripSize >
-String BiEllpack< Real, Device, Index, StripSize >::getType()
-{
-	return String( "BiEllpack< ") +
-	       String( TNL::getType< Real >() ) +
-	       String( ", " ) +
-	       Device::getType() +
-	       String( " >" );
-}
-
-template< typename Real,
-	  typename Device,
-	  typename Index,
-	  int StripSize >
-String BiEllpack< Real, Device, Index, StripSize >::getTypeVirtual() const
-{
-    return this->getType();
-}
-
 template< typename Real,
 	  typename Device,
 	  typename Index,
diff --git a/src/TNL/Matrices/COOMatrix.h b/src/TNL/Matrices/COOMatrix.h
index e5a4a0fd9..799fd60c7 100644
--- a/src/TNL/Matrices/COOMatrix.h
+++ b/src/TNL/Matrices/COOMatrix.h
@@ -40,10 +40,6 @@ public:
 
 	COOMatrix();
 
-	static String getType();
-
-	String getTypeVirtual() const;
-
 	bool setDimensions(const IndexType rows,
 			   	   	   const IndexType columns);
 
diff --git a/src/TNL/Matrices/COOMatrix_impl.h b/src/TNL/Matrices/COOMatrix_impl.h
index 1647d684d..bbdd36002 100644
--- a/src/TNL/Matrices/COOMatrix_impl.h
+++ b/src/TNL/Matrices/COOMatrix_impl.h
@@ -12,7 +12,7 @@
 
 #include <TNL/Matrices/COOMatrix.h>
 #include <TNL/Math.h>
-#include <TNL/param-types.h>
+#include <TNL/TypeInfo.h>
 
 namespace TNL {
 namespace Matrices {
@@ -27,26 +27,6 @@ COOMatrix< Real, Device, Index >::COOMatrix()
 {
 };
 
-template< typename Real,
-	  	  typename Device,
-	  	  typename Index >
-String COOMatrix< Real, Device, Index >::getType()
-{
-	return String("COOMatrix< ") +
-  	 	   String(TNL::getType< Real>()) +
-		   String(", ") +
-		   Device::getType() +
-		   String(" >");
-}
-
-template< typename Real,
-	  	  typename Device,
-	  	  typename Index >
-String COOMatrix< Real, Device, Index >::getTypeVirtual() const
-{
-	return this->getType();
-}
-
 template< typename Real,
 		  typename Device,
 		  typename Index >
diff --git a/src/TNL/Matrices/CSR.h b/src/TNL/Matrices/CSR.h
index 64e202a67..15632f130 100644
--- a/src/TNL/Matrices/CSR.h
+++ b/src/TNL/Matrices/CSR.h
@@ -59,10 +59,6 @@ public:
 
    CSR();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    static String getSerializationType();
 
    virtual String getSerializationTypeVirtual() const;
diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/CSR_impl.h
index 9cf962286..1e28157f9 100644
--- a/src/TNL/Matrices/CSR_impl.h
+++ b/src/TNL/Matrices/CSR_impl.h
@@ -41,31 +41,15 @@ CSR< Real, Device, Index >::CSR()
 template< typename Real,
           typename Device,
           typename Index >
-String CSR< Real, Device, Index >::getType()
+String CSR< Real, Device, Index >::getSerializationType()
 {
    return String( "Matrices::CSR< ") +
-          String( TNL::getType< Real>() ) +
+          TNL::getType< Real>() +
           String( ", " ) +
-          Device::getType() +
+          getType< Devices::Host >() +
           String( " >" );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index >
-String CSR< Real, Device, Index >::getTypeVirtual() const
-{
-   return this->getType();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String CSR< Real, Device, Index >::getSerializationType()
-{
-   return HostType::getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index >
diff --git a/src/TNL/Matrices/ChunkedEllpack.h b/src/TNL/Matrices/ChunkedEllpack.h
index a6f06e797..94ab61a14 100644
--- a/src/TNL/Matrices/ChunkedEllpack.h
+++ b/src/TNL/Matrices/ChunkedEllpack.h
@@ -44,9 +44,6 @@ struct tnlChunkedEllpackSliceInfo
    IndexType chunkSize;
    IndexType firstRow;
    IndexType pointer;
-
-   static inline String getType()
-   { return String( "tnlChunkedEllpackSliceInfo" ); };
 };
 
 #ifdef HAVE_CUDA
@@ -86,10 +83,6 @@ public:
 
    ChunkedEllpack();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    static String getSerializationType();
 
    virtual String getSerializationTypeVirtual() const;
diff --git a/src/TNL/Matrices/ChunkedEllpack_impl.h b/src/TNL/Matrices/ChunkedEllpack_impl.h
index 5ba898f0c..5bc0cfd27 100644
--- a/src/TNL/Matrices/ChunkedEllpack_impl.h
+++ b/src/TNL/Matrices/ChunkedEllpack_impl.h
@@ -39,31 +39,15 @@ ChunkedEllpack< Real, Device, Index >::ChunkedEllpack()
 template< typename Real,
           typename Device,
           typename Index >
-String ChunkedEllpack< Real, Device, Index >::getType()
+String ChunkedEllpack< Real, Device, Index >::getSerializationType()
 {
    return String( "Matrices::ChunkedEllpack< ") +
-          String( TNL::getType< Real >() ) +
+          getType< Real >() +
           String( ", " ) +
-          Device::getType() +
+          getType< Device >() +
           String( " >" );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index >
-String ChunkedEllpack< Real, Device, Index >::getTypeVirtual() const
-{
-   return this->getType();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String ChunkedEllpack< Real, Device, Index >::getSerializationType()
-{
-   return getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index >
@@ -314,7 +298,7 @@ template< typename Real,
 Index ChunkedEllpack< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const
 {
     ConstMatrixRow matrixRow = getRow( row );
-    return matrixRow.getNonZeroElementsCount( Device::getType() );
+    return matrixRow.getNonZeroElementsCount( getType< Device >() );
     
 //    IndexType elementCount ( 0 );
 //    ConstMatrixRow matrixRow = this->getRow( row );
@@ -1349,7 +1333,7 @@ void ChunkedEllpack< Real, Device, Index >::printStructure( std::ostream& str,
                                                                      const String& name ) const
 {
    const IndexType numberOfSlices = this->getNumberOfSlices();
-   str << "Matrix type: " << getType() << std::endl
+   str << "Matrix type: " << getType( *this ) << std::endl
        << "Marix name: " << name << std::endl
        << "Rows: " << this->getRows() << std::endl
        << "Columns: " << this->getColumns() << std::endl
diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index 8c21e33b0..a827f632e 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -49,10 +49,6 @@ public:
 
    Dense();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    static String getSerializationType();
 
    virtual String getSerializationTypeVirtual() const;
diff --git a/src/TNL/Matrices/Dense_impl.h b/src/TNL/Matrices/Dense_impl.h
index 357bc8bfc..f57671197 100644
--- a/src/TNL/Matrices/Dense_impl.h
+++ b/src/TNL/Matrices/Dense_impl.h
@@ -24,31 +24,15 @@ Dense< Real, Device, Index >::Dense()
 {
 }
 
-template< typename Real,
-          typename Device,
-          typename Index >
-String Dense< Real, Device, Index >::getType()
-{
-   return String( "Matrices::Dense< " ) +
-          String( TNL::getType< RealType >() ) + ", " +
-          String( Device::getType() ) + ", " +
-          String( TNL::getType< IndexType >() ) + " >";
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Dense< Real, Device, Index >::getTypeVirtual() const
-{
-   return this->getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index >
 String Dense< Real, Device, Index >::getSerializationType()
 {
-   return getType();
+   return String( "Matrices::Dense< " ) +
+          getType< RealType >() + ", " +
+          getType< Device >() + ", " +
+          getType< IndexType >() + " >";
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/DistributedMatrix.h b/src/TNL/Matrices/DistributedMatrix.h
index 72586dbb3..839273efd 100644
--- a/src/TNL/Matrices/DistributedMatrix.h
+++ b/src/TNL/Matrices/DistributedMatrix.h
@@ -80,13 +80,6 @@ public:
    const Matrix& getLocalMatrix() const;
 
 
-   static String getType();
-
-   virtual String getTypeVirtual() const;
-
-   // TODO: no getSerializationType method until there is support for serialization
-
-
    /*
     * Some common Matrix methods follow below.
     */
diff --git a/src/TNL/Matrices/DistributedMatrix_impl.h b/src/TNL/Matrices/DistributedMatrix_impl.h
index 33eeef264..c1a13a713 100644
--- a/src/TNL/Matrices/DistributedMatrix_impl.h
+++ b/src/TNL/Matrices/DistributedMatrix_impl.h
@@ -71,28 +71,6 @@ getLocalMatrix() const
 }
 
 
-template< typename Matrix,
-          typename Communicator >
-String
-DistributedMatrix< Matrix, Communicator >::
-getType()
-{
-   return String( "Matrices::DistributedMatrix< " ) +
-          Matrix::getType() + ", " +
-          // TODO: communicators don't have a getType method
-          "<Communicator>" + " >";
-}
-
-template< typename Matrix,
-          typename Communicator >
-String
-DistributedMatrix< Matrix, Communicator >::
-getTypeVirtual() const
-{
-   return getType();
-}
-
-
 /*
  * Some common Matrix methods follow below.
  */
diff --git a/src/TNL/Matrices/Ellpack.h b/src/TNL/Matrices/Ellpack.h
index e2479fd54..ee3941034 100644
--- a/src/TNL/Matrices/Ellpack.h
+++ b/src/TNL/Matrices/Ellpack.h
@@ -47,10 +47,6 @@ public:
 
    Ellpack();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    static String getSerializationType();
 
    virtual String getSerializationTypeVirtual() const;
diff --git a/src/TNL/Matrices/EllpackSymmetric.h b/src/TNL/Matrices/EllpackSymmetric.h
index 323772551..495fcdd07 100644
--- a/src/TNL/Matrices/EllpackSymmetric.h
+++ b/src/TNL/Matrices/EllpackSymmetric.h
@@ -37,10 +37,6 @@ class EllpackSymmetric : public Sparse< Real, Device, Index >
 
    EllpackSymmetric();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    void setDimensions( const IndexType rows,
                        const IndexType columns );
 
diff --git a/src/TNL/Matrices/EllpackSymmetricGraph.h b/src/TNL/Matrices/EllpackSymmetricGraph.h
index 4c56a8716..c232ad085 100644
--- a/src/TNL/Matrices/EllpackSymmetricGraph.h
+++ b/src/TNL/Matrices/EllpackSymmetricGraph.h
@@ -37,10 +37,6 @@ class EllpackSymmetricGraph : public Sparse< Real, Device, Index >
 
    EllpackSymmetricGraph();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    void setDimensions( const IndexType rows,
                        const IndexType columns );
 
diff --git a/src/TNL/Matrices/EllpackSymmetricGraph_impl.h b/src/TNL/Matrices/EllpackSymmetricGraph_impl.h
index eff31f4b7..b817372dc 100644
--- a/src/TNL/Matrices/EllpackSymmetricGraph_impl.h
+++ b/src/TNL/Matrices/EllpackSymmetricGraph_impl.h
@@ -42,26 +42,6 @@ Index EllpackSymmetricGraph< Real, Device, Index >::getAlignedRows() const
     return this->alignedRows;
 }
 
-template< typename Real,
-          typename Device,
-          typename Index >
-String EllpackSymmetricGraph< Real, Device, Index > :: getType()
-{
-   return String( "EllpackSymmetricGraph< ") +
-          String( TNL::getType< Real >() ) +
-          String( ", " ) +
-          Device::getType() +
-          String( " >" );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String EllpackSymmetricGraph< Real, Device, Index >::getTypeVirtual() const
-{
-   return this->getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index >
diff --git a/src/TNL/Matrices/EllpackSymmetric_impl.h b/src/TNL/Matrices/EllpackSymmetric_impl.h
index da65f22f1..5890212a4 100644
--- a/src/TNL/Matrices/EllpackSymmetric_impl.h
+++ b/src/TNL/Matrices/EllpackSymmetric_impl.h
@@ -26,26 +26,6 @@ EllpackSymmetric< Real, Device, Index > :: EllpackSymmetric()
 {
 };
 
-template< typename Real,
-          typename Device,
-          typename Index >
-String EllpackSymmetric< Real, Device, Index > :: getType()
-{
-   return String( "EllpackSymmetric< ") +
-          String( TNL::getType< Real >() ) +
-          String( ", " ) +
-          Device::getType() +
-          String( " >" );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String EllpackSymmetric< Real, Device, Index >::getTypeVirtual() const
-{
-   return this->getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index >
diff --git a/src/TNL/Matrices/Ellpack_impl.h b/src/TNL/Matrices/Ellpack_impl.h
index e42bf5e42..2b8675c04 100644
--- a/src/TNL/Matrices/Ellpack_impl.h
+++ b/src/TNL/Matrices/Ellpack_impl.h
@@ -29,33 +29,17 @@ Ellpack< Real, Device, Index > :: Ellpack()
 template< typename Real,
           typename Device,
           typename Index >
-String Ellpack< Real, Device, Index > :: getType()
+String Ellpack< Real, Device, Index >::getSerializationType()
 {
    return String( "Matrices::Ellpack< ") +
-          String( TNL::getType< Real >() ) +
+          getType< Real >() +
           String( ", " ) +
-          Device::getType() +
+          getType< Device >() +
           String( ", " ) +
-          String( TNL::getType< Index >() ) +
+          getType< Index >() +
           String( " >" );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index >
-String Ellpack< Real, Device, Index >::getTypeVirtual() const
-{
-   return this->getType();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Ellpack< Real, Device, Index >::getSerializationType()
-{
-   return getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index >
@@ -130,7 +114,7 @@ template< typename Real,
 Index Ellpack< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const
 {
     ConstMatrixRow matrixRow = getRow( row );
-    return matrixRow.getNonZeroElementsCount( Device::getType() );
+    return matrixRow.getNonZeroElementsCount( getType< Device >() );
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h
index 0496a25a3..2da69f790 100644
--- a/src/TNL/Matrices/Multidiagonal.h
+++ b/src/TNL/Matrices/Multidiagonal.h
@@ -46,10 +46,6 @@ public:
 
    Multidiagonal();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    static String getSerializationType();
 
    virtual String getSerializationTypeVirtual() const;
diff --git a/src/TNL/Matrices/Multidiagonal_impl.h b/src/TNL/Matrices/Multidiagonal_impl.h
index b9641babd..ff1ac384a 100644
--- a/src/TNL/Matrices/Multidiagonal_impl.h
+++ b/src/TNL/Matrices/Multidiagonal_impl.h
@@ -31,31 +31,15 @@ Multidiagonal< Real, Device, Index > :: Multidiagonal()
 template< typename Real,
           typename Device,
           typename Index >
-String Multidiagonal< Real, Device, Index > :: getType()
+String Multidiagonal< Real, Device, Index >::getSerializationType()
 {
    return String( "Matrices::Multidiagonal< ") +
-          String( TNL::getType< Real >() ) +
+          getType< Real >() +
           String( ", " ) +
-          Device::getType() +
+          getType< Device >() +
           String( " >" );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index >
-String Multidiagonal< Real, Device, Index >::getTypeVirtual() const
-{
-   return this->getType();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Multidiagonal< Real, Device, Index >::getSerializationType()
-{
-   return getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index >
diff --git a/src/TNL/Matrices/SlicedEllpack.h b/src/TNL/Matrices/SlicedEllpack.h
index 8503f6180..548916e25 100644
--- a/src/TNL/Matrices/SlicedEllpack.h
+++ b/src/TNL/Matrices/SlicedEllpack.h
@@ -77,10 +77,6 @@ public:
 
    SlicedEllpack();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    static String getSerializationType();
 
    virtual String getSerializationTypeVirtual() const;
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric.h b/src/TNL/Matrices/SlicedEllpackSymmetric.h
index 9e7694de4..272ae510e 100644
--- a/src/TNL/Matrices/SlicedEllpackSymmetric.h
+++ b/src/TNL/Matrices/SlicedEllpackSymmetric.h
@@ -55,10 +55,6 @@ class SlicedEllpackSymmetric : public Sparse< Real, Device, Index >
 
    SlicedEllpackSymmetric();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    void setDimensions( const IndexType rows,
                        const IndexType columns );
 
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h b/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h
index 12019b79d..36d02f9bd 100644
--- a/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h
+++ b/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h
@@ -55,10 +55,6 @@ class SlicedEllpackSymmetricGraph : public Sparse< Real, Device, Index >
 
    SlicedEllpackSymmetricGraph();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    void setDimensions( const IndexType rows,
                        const IndexType columns );
 
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h b/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h
index e126f2c75..f9ef284da 100644
--- a/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h
+++ b/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h
@@ -25,28 +25,6 @@ template< typename Real,
 SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >::SlicedEllpackSymmetricGraph()
 : rearranged( false )
 {
-};
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          int SliceSize >
-String SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >::getType()
-{
-   return String( "SlicedEllpackSymmetricGraph< ") +
-          String( TNL::getType< Real >() ) +
-          String( ", " ) +
-          Device::getType() +
-          String( " >" );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          int SliceSize >
-String SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >::getTypeVirtual() const
-{
-   return this->getType();
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h
index 5f0af6d30..edc645688 100644
--- a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h
+++ b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h
@@ -24,28 +24,6 @@ template< typename Real,
           int SliceSize >
 SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::SlicedEllpackSymmetric()
 {
-};
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          int SliceSize >
-String SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::getType()
-{
-   return String( "SlicedEllpackSymmetric< ") +
-          String( TNL::getType< Real >() ) +
-          String( ", " ) +
-          Device::getType() +
-          String( " >" );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          int SliceSize >
-String SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::getTypeVirtual() const
-{
-   return this->getType();
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/SlicedEllpack_impl.h b/src/TNL/Matrices/SlicedEllpack_impl.h
index 0517e913f..c6caa5639 100644
--- a/src/TNL/Matrices/SlicedEllpack_impl.h
+++ b/src/TNL/Matrices/SlicedEllpack_impl.h
@@ -24,39 +24,21 @@ template< typename Real,
           int SliceSize >
 SlicedEllpack< Real, Device, Index, SliceSize >::SlicedEllpack()
 {
-};
+}
 
 template< typename Real,
           typename Device,
           typename Index,
           int SliceSize >
-String SlicedEllpack< Real, Device, Index, SliceSize >::getType()
+String SlicedEllpack< Real, Device, Index, SliceSize >::getSerializationType()
 {
    return String( "Matrices::SlicedEllpack< ") +
-          String( TNL::getType< Real >() ) +
+          TNL::getType< Real >() +
           String( ", " ) +
-          Device::getType() +
+          getType< Device >() +
           String( " >" );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          int SliceSize >
-String SlicedEllpack< Real, Device, Index, SliceSize >::getTypeVirtual() const
-{
-   return this->getType();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          int SliceSize >
-String SlicedEllpack< Real, Device, Index, SliceSize >::getSerializationType()
-{
-   return getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -129,7 +111,7 @@ template< typename Real,
 Index SlicedEllpack< Real, Device, Index, SliceSize >::getNonZeroRowLength( const IndexType row ) const
 {
     ConstMatrixRow matrixRow = getRow( row );
-    return matrixRow.getNonZeroElementsCount( Device::getType() );
+    return matrixRow.getNonZeroElementsCount( getType< Device >() );
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h
index 7f58bd9c4..4e01c90bb 100644
--- a/src/TNL/Matrices/Tridiagonal.h
+++ b/src/TNL/Matrices/Tridiagonal.h
@@ -47,10 +47,6 @@ public:
 
    Tridiagonal();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    static String getSerializationType();
 
    virtual String getSerializationTypeVirtual() const;
diff --git a/src/TNL/Matrices/Tridiagonal_impl.h b/src/TNL/Matrices/Tridiagonal_impl.h
index 2a77a8a59..78e798c2b 100644
--- a/src/TNL/Matrices/Tridiagonal_impl.h
+++ b/src/TNL/Matrices/Tridiagonal_impl.h
@@ -27,31 +27,15 @@ Tridiagonal< Real, Device, Index >::Tridiagonal()
 {
 }
 
-template< typename Real,
-          typename Device,
-          typename Index >
-String Tridiagonal< Real, Device, Index >::getType()
-{
-   return String( "Matrices::Tridiagonal< " ) +
-          String( TNL::getType< RealType >() ) + ", " +
-          String( Device::getType() ) + ", " +
-          String( TNL::getType< IndexType >() ) + " >";
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Tridiagonal< Real, Device, Index >::getTypeVirtual() const
-{
-   return this->getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index >
 String Tridiagonal< Real, Device, Index >::getSerializationType()
 {
-   return getType();
+   return String( "Matrices::Tridiagonal< " ) +
+          getType< RealType >() + ", " +
+          getType< Device >() + ", " +
+          getType< IndexType >() + " >";
 }
 
 template< typename Real,
diff --git a/src/TNL/Meshes/DefaultConfig.h b/src/TNL/Meshes/DefaultConfig.h
index 5e8a7cbef..366356474 100644
--- a/src/TNL/Meshes/DefaultConfig.h
+++ b/src/TNL/Meshes/DefaultConfig.h
@@ -17,7 +17,7 @@
 #pragma once
 
 #include <TNL/String.h>
-#include <TNL/param-types.h>
+#include <TNL/TypeInfo.h>
 #include <TNL/Meshes/Topologies/SubentityVertexMap.h>
 
 namespace TNL {
@@ -46,17 +46,6 @@ struct DefaultConfig
    static constexpr int worldDimension = WorldDimension;
    static constexpr int meshDimension = Cell::dimension;
 
-   static String getType()
-   {
-      return String( "Meshes::DefaultConfig< " ) +
-             Cell::getType() + ", " +
-             convertToString( WorldDimension ) + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< GlobalIndex >() + ", " +
-             TNL::getType< LocalIndex >() + ", " +
-             TNL::getType< Id >() + " >";
-   };
-
    /****
     * Storage of mesh entities.
     */
diff --git a/src/TNL/Meshes/GridDetails/Grid1D.h b/src/TNL/Meshes/GridDetails/Grid1D.h
index 53b748c4e..3e36fd025 100644
--- a/src/TNL/Meshes/GridDetails/Grid1D.h
+++ b/src/TNL/Meshes/GridDetails/Grid1D.h
@@ -65,16 +65,6 @@ class Grid< 1, Real, Device, Index > : public Object
    // empty destructor is needed only to avoid crappy nvcc warnings
    ~Grid() {}
 
-   /**
-    * \brief Returns type of grid Real (value), Device type and the type of Index.
-    */
-   static String getType();
-
-   /**
-    * \brief Returns type of grid Real (value), Device type and the type of Index.
-    */
-   String getTypeVirtual() const;
-
    /**
     * \brief Returns (host) type of grid Real (value), Device type and the type of Index.
     */
diff --git a/src/TNL/Meshes/GridDetails/Grid1D_impl.h b/src/TNL/Meshes/GridDetails/Grid1D_impl.h
index 55055c818..279ec9810 100644
--- a/src/TNL/Meshes/GridDetails/Grid1D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Grid1D_impl.h
@@ -44,32 +44,16 @@ Grid< 1, Real, Device, Index >::Grid( const Index xSize )
    this->setDimensions( xSize );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index  >
-String Grid< 1, Real, Device, Index >::getType()
-{
-   return String( "Meshes::Grid< " ) +
-          convertToString( getMeshDimension() ) + ", " +
-          String( TNL::getType< RealType >() ) + ", " +
-          String( Device::getType() ) + ", " +
-          String( TNL::getType< IndexType >() ) + " >";
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Grid< 1, Real, Device, Index >::getTypeVirtual() const
-{
-   return this->getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index >
 String Grid< 1, Real, Device, Index >::getSerializationType()
 {
-   return HostType::getType();
+   return String( "Meshes::Grid< " ) +
+          convertToString( getMeshDimension() ) + ", " +
+          getType< RealType >() + ", " +
+          getType< Devices::Host >() + ", " +
+          getType< IndexType >() + " >";
 };
 
 template< typename Real,
diff --git a/src/TNL/Meshes/GridDetails/Grid2D.h b/src/TNL/Meshes/GridDetails/Grid2D.h
index 61f3c11c0..ecaed0cd4 100644
--- a/src/TNL/Meshes/GridDetails/Grid2D.h
+++ b/src/TNL/Meshes/GridDetails/Grid2D.h
@@ -57,21 +57,11 @@ class Grid< 2, Real, Device, Index > : public Object
     */
    Grid();
 
-   /**
-    * \brief See Grid1D::getType().
-    */
    Grid( const Index xSize, const Index ySize );
 
    // empty destructor is needed only to avoid crappy nvcc warnings
    ~Grid() {}
 
-   static String getType();
-
-   /**
-    * \brief See Grid1D::getTypeVirtual().
-    */
-   String getTypeVirtual() const;
-
    /**
     * \brief See Grid1D::getSerializationType().
     */
diff --git a/src/TNL/Meshes/GridDetails/Grid2D_impl.h b/src/TNL/Meshes/GridDetails/Grid2D_impl.h
index c6c0420b2..259181688 100644
--- a/src/TNL/Meshes/GridDetails/Grid2D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Grid2D_impl.h
@@ -54,29 +54,13 @@ Grid< 2, Real, Device, Index >::Grid( const Index xSize, const Index ySize )
 template< typename Real,
           typename Device,
           typename Index >
-String Grid< 2, Real, Device, Index > :: getType()
+String Grid< 2, Real, Device, Index > :: getSerializationType()
 {
    return String( "Meshes::Grid< " ) +
           convertToString( getMeshDimension() ) + ", " +
-          String( TNL::getType< RealType >() ) + ", " +
-          String( Device::getType() ) + ", " +
-          String( TNL::getType< IndexType >() ) + " >";
-}
-
-template< typename Real,
-           typename Device,
-           typename Index >
-String Grid< 2, Real, Device, Index > :: getTypeVirtual() const
-{
-   return this->getType();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Grid< 2, Real, Device, Index > :: getSerializationType()
-{
-   return HostType::getType();
+          getType< RealType >() + ", " +
+          getType< Devices::Host >() + ", " +
+          getType< IndexType >() + " >";
 };
 
 template< typename Real,
diff --git a/src/TNL/Meshes/GridDetails/Grid3D.h b/src/TNL/Meshes/GridDetails/Grid3D.h
index 67c752cb1..873cc6872 100644
--- a/src/TNL/Meshes/GridDetails/Grid3D.h
+++ b/src/TNL/Meshes/GridDetails/Grid3D.h
@@ -62,16 +62,6 @@ class Grid< 3, Real, Device, Index > : public Object
    // empty destructor is needed only to avoid crappy nvcc warnings
    ~Grid() {}
 
-   /**
-    * \brief See Grid1D::getType().
-    */
-   static String getType();
-
-   /**
-    * \brief See Grid1D::getTypeVirtual().
-    */
-   String getTypeVirtual() const;
-
    /**
     * \brief See Grid1D::getSerializationType().
     */
diff --git a/src/TNL/Meshes/GridDetails/Grid3D_impl.h b/src/TNL/Meshes/GridDetails/Grid3D_impl.h
index d2d86c8ba..f4707a8ce 100644
--- a/src/TNL/Meshes/GridDetails/Grid3D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Grid3D_impl.h
@@ -68,29 +68,13 @@ Grid< 3, Real, Device, Index >::Grid( const Index xSize, const Index ySize, cons
 template< typename Real,
           typename Device,
           typename Index >
-String Grid< 3, Real, Device, Index > :: getType()
+String Grid< 3, Real, Device, Index > :: getSerializationType()
 {
    return String( "Meshes::Grid< " ) +
           convertToString( getMeshDimension() ) + ", " +
-          String( TNL::getType< RealType >() ) + ", " +
-          String( Device::getType() ) + ", " +
-          String( TNL::getType< IndexType >() ) + " >";
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Grid< 3, Real, Device, Index > :: getTypeVirtual() const
-{
-   return this->getType();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Grid< 3, Real, Device, Index > :: getSerializationType()
-{
-   return HostType::getType();
+          getType< RealType >() + ", " +
+          getType< Devices::Host >() + ", " +
+          getType< IndexType >() + " >";
 };
 
 template< typename Real,
diff --git a/src/TNL/Meshes/Mesh.h b/src/TNL/Meshes/Mesh.h
index 589a862b9..27d70814c 100644
--- a/src/TNL/Meshes/Mesh.h
+++ b/src/TNL/Meshes/Mesh.h
@@ -110,10 +110,6 @@ class Mesh
       using Face = EntityType< getMeshDimension() - 1 >;
       using Vertex = EntityType< 0 >;
 
-      static String getType();
-
-      virtual String getTypeVirtual() const;
-
       static String getSerializationType();
 
       virtual String getSerializationTypeVirtual() const;
diff --git a/src/TNL/Meshes/MeshDetails/MeshEntityReferenceOrientation.h b/src/TNL/Meshes/MeshDetails/MeshEntityReferenceOrientation.h
index e14e909a5..80340c62c 100644
--- a/src/TNL/Meshes/MeshDetails/MeshEntityReferenceOrientation.h
+++ b/src/TNL/Meshes/MeshDetails/MeshEntityReferenceOrientation.h
@@ -26,8 +26,8 @@ namespace Meshes {
 template< typename MeshConfig, typename EntityTopology >
 class MeshEntityReferenceOrientation
 {
-	typedef typename MeshTraits< MeshConfig >::LocalIndexType  LocalIndexType;
-	typedef typename MeshTraits< MeshConfig >::GlobalIndexType GlobalIndexType;
+   typedef typename MeshTraits< MeshConfig >::LocalIndexType  LocalIndexType;
+   typedef typename MeshTraits< MeshConfig >::GlobalIndexType GlobalIndexType;
 
    public:
       typedef EntitySeed< MeshConfig, EntityTopology >            SeedType;
@@ -45,8 +45,6 @@ class MeshEntityReferenceOrientation
             this->cornerIdsMap.insert( std::make_pair( referenceCornerIds[i], i ) );
          }
       }
- 
-      static String getType(){ return "MeshEntityReferenceOrientation"; };
 
       EntityOrientation createOrientation( const SeedType& seed ) const
       {
@@ -67,4 +65,3 @@ class MeshEntityReferenceOrientation
 
 } // namespace Meshes
 } // namespace TNL
-
diff --git a/src/TNL/Meshes/MeshDetails/MeshEntity_impl.h b/src/TNL/Meshes/MeshDetails/MeshEntity_impl.h
index 8bdd40570..5c7414b42 100644
--- a/src/TNL/Meshes/MeshDetails/MeshEntity_impl.h
+++ b/src/TNL/Meshes/MeshDetails/MeshEntity_impl.h
@@ -80,11 +80,11 @@ template< typename MeshConfig,
           typename EntityTopology >
 String
 MeshEntity< MeshConfig, Device, EntityTopology >::
-getType()
+getSerializationType()
 {
-   return String( "MeshEntity< " ) +
-          MeshConfig::getType() + ", " +
-          EntityTopology::getType() + " >";
+   return String( "MeshEntity<" ) +
+          TNL::getSerializationType< MeshConfig >() + ", " +
+          TNL::getSerializationType< EntityTopology >() + ">";
 }
 
 template< typename MeshConfig,
@@ -92,9 +92,9 @@ template< typename MeshConfig,
           typename EntityTopology >
 String
 MeshEntity< MeshConfig, Device, EntityTopology >::
-getTypeVirtual() const
+getSerializationTypeVirtual() const
 {
-   return this->getType();
+   return this->getSerializationType();
 }
 
 template< typename MeshConfig,
@@ -242,17 +242,19 @@ operator=( const MeshEntity< MeshConfig, Device_, Topologies::Vertex >& entity )
 template< typename MeshConfig, typename Device >
 String
 MeshEntity< MeshConfig, Device, Topologies::Vertex >::
-getType()
+getSerializationType()
 {
-   return String( "MeshEntity< ... >" );
+   return String( "MeshEntity<" ) +
+          TNL::getSerializationType< MeshConfig >() + ", " +
+          TNL::getSerializationType< Topologies::Vertex >() + ">";
 }
 
 template< typename MeshConfig, typename Device >
 String
 MeshEntity< MeshConfig, Device, Topologies::Vertex >::
-getTypeVirtual() const
+getSerializationTypeVirtual() const
 {
-   return this->getType();
+   return this->getSerializationType();
 }
 
 template< typename MeshConfig, typename Device >
diff --git a/src/TNL/Meshes/MeshDetails/Mesh_impl.h b/src/TNL/Meshes/MeshDetails/Mesh_impl.h
index 4fbd3ba07..4b0488b2d 100644
--- a/src/TNL/Meshes/MeshDetails/Mesh_impl.h
+++ b/src/TNL/Meshes/MeshDetails/Mesh_impl.h
@@ -91,28 +91,12 @@ getMeshDimension()
    return MeshTraitsType::meshDimension;
 }
 
-template< typename MeshConfig, typename Device >
-String
-Mesh< MeshConfig, Device >::
-getType()
-{
-   return String( "Meshes::Mesh< ") + MeshConfig::getType() + " >";
-}
-
-template< typename MeshConfig, typename Device >
-String
-Mesh< MeshConfig, Device >::
-getTypeVirtual() const
-{
-   return this->getType();
-}
-
 template< typename MeshConfig, typename Device >
 String
 Mesh< MeshConfig, Device >::
 getSerializationType()
 {
-   return Mesh::getType();
+   return String( "Meshes::Mesh< ") + TNL::getType< MeshConfig >() + " >";
 }
 
 template< typename MeshConfig, typename Device >
diff --git a/src/TNL/Meshes/MeshDetails/initializer/EntityInitializer.h b/src/TNL/Meshes/MeshDetails/initializer/EntityInitializer.h
index 7058691a3..031be1900 100644
--- a/src/TNL/Meshes/MeshDetails/initializer/EntityInitializer.h
+++ b/src/TNL/Meshes/MeshDetails/initializer/EntityInitializer.h
@@ -68,8 +68,6 @@ class EntityInitializer
    using InitializerType  = Initializer< MeshConfig >;
 
 public:
-   static String getType() { return "EntityInitializer"; };
-
    static void initEntity( EntityType& entity, const GlobalIndexType& entityIndex, const SeedType& entitySeed, InitializerType& initializer)
    {
       initializer.setEntityIndex( entity, entityIndex );
@@ -91,8 +89,6 @@ public:
    using PointType       = typename MeshTraits< MeshConfig >::PointType;
    using InitializerType = Initializer< MeshConfig >;
 
-   static String getType() { return "EntityInitializer"; };
-
    static void initEntity( VertexType& entity, const GlobalIndexType& entityIndex, const PointType& point, InitializerType& initializer)
    {
       initializer.setEntityIndex( entity, entityIndex );
diff --git a/src/TNL/Meshes/MeshDetails/initializer/EntitySeed.h b/src/TNL/Meshes/MeshDetails/initializer/EntitySeed.h
index b9d34d070..41439c405 100644
--- a/src/TNL/Meshes/MeshDetails/initializer/EntitySeed.h
+++ b/src/TNL/Meshes/MeshDetails/initializer/EntitySeed.h
@@ -40,8 +40,6 @@ class EntitySeed
       using HashType        = EntitySeedHash< EntitySeed >;
       using KeyEqual        = EntitySeedEq< EntitySeed >;
 
-      static String getType() { return String( "EntitySeed<>" ); }
-
       static constexpr LocalIndexType getCornersCount()
       {
          return SubvertexTraits::count;
@@ -82,8 +80,6 @@ class EntitySeed< MeshConfig, Topologies::Vertex >
       using HashType        = EntitySeedHash< EntitySeed >;
       using KeyEqual        = EntitySeedEq< EntitySeed >;
 
-      static String getType() { return String( "EntitySeed<>" ); }
-
       static constexpr LocalIndexType getCornersCount()
       {
          return 1;
diff --git a/src/TNL/Meshes/MeshEntity.h b/src/TNL/Meshes/MeshEntity.h
index 6e0970ade..b1c8afea5 100644
--- a/src/TNL/Meshes/MeshEntity.h
+++ b/src/TNL/Meshes/MeshEntity.h
@@ -72,9 +72,9 @@ class MeshEntity
       MeshEntity& operator=( const MeshEntity< MeshConfig, Device_, EntityTopology >& entity );
 
 
-      static String getType();
+      static String getSerializationType();
 
-      String getTypeVirtual() const;
+      String getSerializationTypeVirtual() const;
 
       void save( File& file ) const;
 
@@ -166,9 +166,9 @@ class MeshEntity< MeshConfig, Device, Topologies::Vertex >
       MeshEntity& operator=( const MeshEntity< MeshConfig, Device_, EntityTopology >& entity );
 
 
-      static String getType();
+      static String getSerializationType();
 
-      String getTypeVirtual() const;
+      String getSerializationTypeVirtual() const;
 
       void save( File& file ) const;
 
diff --git a/src/TNL/Meshes/Topologies/Edge.h b/src/TNL/Meshes/Topologies/Edge.h
index 4c4b8269e..298e638c0 100644
--- a/src/TNL/Meshes/Topologies/Edge.h
+++ b/src/TNL/Meshes/Topologies/Edge.h
@@ -22,15 +22,10 @@
 namespace TNL {
 namespace Meshes {
 namespace Topologies {
-   
+
 struct Edge
 {
    static constexpr int dimension = 1;
-
-   static String getType()
-   {
-      return "Topologies::Edge";
-   }
 };
 
 
diff --git a/src/TNL/Meshes/Topologies/Hexahedron.h b/src/TNL/Meshes/Topologies/Hexahedron.h
index db922f3b5..af0765db5 100644
--- a/src/TNL/Meshes/Topologies/Hexahedron.h
+++ b/src/TNL/Meshes/Topologies/Hexahedron.h
@@ -25,11 +25,6 @@ namespace Topologies {
 struct Hexahedron
 {
    static constexpr int dimension = 3;
-
-   static String getType()
-   {
-      return "Topologies::Hexahedron";
-   }
 };
 
 template<>
diff --git a/src/TNL/Meshes/Topologies/Quadrilateral.h b/src/TNL/Meshes/Topologies/Quadrilateral.h
index 6b5d4eb54..50be274e2 100644
--- a/src/TNL/Meshes/Topologies/Quadrilateral.h
+++ b/src/TNL/Meshes/Topologies/Quadrilateral.h
@@ -25,11 +25,6 @@ namespace Topologies {
 struct Quadrilateral
 {
    static constexpr int dimension = 2;
-
-   static String getType()
-   {
-      return "Topologies::Quadrilateral";
-   }
 };
 
 
diff --git a/src/TNL/Meshes/Topologies/Simplex.h b/src/TNL/Meshes/Topologies/Simplex.h
index a9cbee72b..3b61f09fd 100644
--- a/src/TNL/Meshes/Topologies/Simplex.h
+++ b/src/TNL/Meshes/Topologies/Simplex.h
@@ -25,15 +25,9 @@ namespace Meshes {
 namespace Topologies {
 
 template< int dimension_ >
-class Simplex
+struct Simplex
 {
-   public:
-      static constexpr int dimension = dimension_;
-
-      static String getType()
-      {
-         return String( "Topologies::Simplex< " ) + convertToString( dimension ) + " >";
-      }
+   static constexpr int dimension = dimension_;
 };
 
 namespace SimplexDetails {
diff --git a/src/TNL/Meshes/Topologies/Tetrahedron.h b/src/TNL/Meshes/Topologies/Tetrahedron.h
index 7722f5ef6..048daa1c3 100644
--- a/src/TNL/Meshes/Topologies/Tetrahedron.h
+++ b/src/TNL/Meshes/Topologies/Tetrahedron.h
@@ -25,11 +25,6 @@ namespace Topologies {
 struct Tetrahedron
 {
    static constexpr int dimension = 3;
-
-   static String getType()
-   {
-      return "Topologies::Tetrahedron";
-   }
 };
 
 template<>
diff --git a/src/TNL/Meshes/Topologies/Triangle.h b/src/TNL/Meshes/Topologies/Triangle.h
index 11d1c8a84..efe031059 100644
--- a/src/TNL/Meshes/Topologies/Triangle.h
+++ b/src/TNL/Meshes/Topologies/Triangle.h
@@ -25,11 +25,6 @@ namespace Topologies {
 struct Triangle
 {
    static constexpr int dimension = 2;
-
-   static String getType()
-   {
-      return "Topologies::Triangle";
-   }
 };
 
 
diff --git a/src/TNL/Meshes/Topologies/Vertex.h b/src/TNL/Meshes/Topologies/Vertex.h
index cff78e37d..f90127624 100644
--- a/src/TNL/Meshes/Topologies/Vertex.h
+++ b/src/TNL/Meshes/Topologies/Vertex.h
@@ -25,11 +25,6 @@ namespace Topologies {
 struct Vertex
 {
    static constexpr int dimension = 0;
-
-   static String getType()
-   {
-      return "Topologies::Vertex";
-   }
 };
 
 } // namespace Topologies
diff --git a/src/TNL/Meshes/TypeResolver/MeshTypeResolver_impl.h b/src/TNL/Meshes/TypeResolver/MeshTypeResolver_impl.h
index 7feba3d02..b92148fa9 100644
--- a/src/TNL/Meshes/TypeResolver/MeshTypeResolver_impl.h
+++ b/src/TNL/Meshes/TypeResolver/MeshTypeResolver_impl.h
@@ -74,7 +74,7 @@ MeshTypeResolver< Reader, ConfigTag, Device, ProblemSetter, ProblemSetterArgs...
 resolveWorldDimension( const Reader& reader,
                        ProblemSetterArgs&&... problemSetterArgs )
 {
-   std::cerr << "The cell topology " << CellTopology::getType() << " is disabled in the build configuration." << std::endl;
+   std::cerr << "The cell topology " << getType< CellTopology >() << " is disabled in the build configuration." << std::endl;
    return false;
 }
 
@@ -334,7 +334,7 @@ MeshTypeResolver< Reader, ConfigTag, Device, ProblemSetter, ProblemSetterArgs...
 resolveTerminate( const Reader& reader,
                   ProblemSetterArgs&&... problemSetterArgs )
 {
-   std::cerr << "The mesh config type " << TNL::getType< MeshConfig >() << " is disabled in the build configuration for device " << Device::getType() << "." << std::endl;
+   std::cerr << "The mesh config type " << getType< MeshConfig >() << " is disabled in the build configuration for device " << getType< Device >() << "." << std::endl;
    return false;
 }
 
diff --git a/src/TNL/Meshes/TypeResolver/TypeResolver_impl.h b/src/TNL/Meshes/TypeResolver/TypeResolver_impl.h
index 6c6012645..d45016af1 100644
--- a/src/TNL/Meshes/TypeResolver/TypeResolver_impl.h
+++ b/src/TNL/Meshes/TypeResolver/TypeResolver_impl.h
@@ -149,7 +149,7 @@ loadMesh( const String& fileName,
       std::cerr << "I am not able to load the mesh from the file " << fileName << ". "
                    "Perhaps the mesh stored in the file is not supported by the mesh "
                    "passed to the loadMesh function? The mesh type is "
-                << mesh.getType() << std::endl;
+                << getType< decltype(mesh) >() << std::endl;
       return false;
    }
    return true;
diff --git a/src/TNL/Object.h b/src/TNL/Object.h
index 356b91eda..ba4151095 100644
--- a/src/TNL/Object.h
+++ b/src/TNL/Object.h
@@ -25,8 +25,7 @@ namespace TNL {
  * \brief Basic class for majority of TNL objects like matrices, meshes, grids, solvers, etc..
  *
  * Objects like numerical meshes, matrices large vectors etc. are inherited by
- * this class. This class introduces virtual method \ref getType which is
- * supposed to tell the object type in a C++ style.
+ * this class.
  *
  * Since the virtual destructor is not defined as \ref __cuda_callable__,
  * objects inherited from Object should not be created in CUDA kernels.
@@ -42,28 +41,6 @@ class Object
 {
    public:
 
-      /**
-       * \brief Static type getter.
-       *
-       * Returns the type in C++ style - for example the returned value
-       * may look as \c "Array< double, Devices::Cuda, int >".
-       *
-       * \par Example
-       * \include ObjectExample_getType.cpp
-       * \par Output
-       * \include ObjectExample_getType.out
-       */
-      static String getType();
-
-      /***
-       * \brief Virtual type getter.
-       *
-       * Returns the type in C++ style - for example the returned value
-       * may look as \c "Array< double, Devices::Cuda, int >".
-       * See example at \ref Object::getType.
-       */
-      virtual String getTypeVirtual() const;
-
       /**
        * \brief Static serialization type getter.
        *
@@ -71,7 +48,6 @@ class Object
        * is supposed to return the object type but with the device type replaced
        * by Devices::Host. For example \c Array< double, Devices::Cuda > is
        * saved as \c Array< double, Devices::Host >.
-       * See example at \ref Object::getType.
        */
       static String getSerializationType();
 
@@ -82,7 +58,6 @@ class Object
        * is supposed to return the object type but with the device type replaced
        * by Devices::Host. For example \c Array< double, Devices::Cuda > is
        * saved as \c Array< double, Devices::Host >.
-       * See example at \ref Object::getType.
        */
       virtual String getSerializationTypeVirtual() const;
 
diff --git a/src/TNL/Object.hpp b/src/TNL/Object.hpp
index eeec8bf98..25c709212 100644
--- a/src/TNL/Object.hpp
+++ b/src/TNL/Object.hpp
@@ -20,16 +20,6 @@ namespace TNL {
 
 static constexpr char magic_number[] = "TNLMN";
 
-inline String Object::getType()
-{
-   return String( "Object" );
-}
-
-inline String Object::getTypeVirtual() const
-{
-   return this->getType();
-}
-
 inline String Object::getSerializationType()
 {
    return String( "Object" );
diff --git a/src/TNL/Operators/Advection/LaxFridrichs.h b/src/TNL/Operators/Advection/LaxFridrichs.h
index d1fbd399e..45a8abae7 100644
--- a/src/TNL/Operators/Advection/LaxFridrichs.h
+++ b/src/TNL/Operators/Advection/LaxFridrichs.h
@@ -61,8 +61,6 @@ class LaxFridrichs< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index,
          return true;
       }
 
-      static String getType();
-      
       void setViscosity(const Real& artificalViscosity)
       {
          this->artificialViscosity = artificalViscosity;
@@ -153,8 +151,6 @@ class LaxFridrichs< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index,
          return true;
       }
 
-      static String getType();
-      
       void setViscosity(const Real& artificalViscosity)
       {
          this->artificialViscosity = artificalViscosity;
@@ -251,8 +247,6 @@ class LaxFridrichs< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index,
          return true;
       }
 
-      static String getType();
-      
       void setViscosity(const Real& artificalViscosity)
       {
          this->artificialViscosity = artificalViscosity;
diff --git a/src/TNL/Operators/Advection/Upwind.h b/src/TNL/Operators/Advection/Upwind.h
index e41768e57..942ec2956 100644
--- a/src/TNL/Operators/Advection/Upwind.h
+++ b/src/TNL/Operators/Advection/Upwind.h
@@ -61,8 +61,6 @@ class Upwind< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index, Veloc
          return true;
       }
 
-      static String getType();
-      
       void setViscosity(const Real& artificalViscosity)
       {
          this->artificialViscosity = artificalViscosity;
@@ -154,8 +152,6 @@ class Upwind< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index, Veloc
          return true;
       }
 
-      static String getType();
-      
       void setViscosity(const Real& artificalViscosity)
       {
          this->artificialViscosity = artificalViscosity;
@@ -256,8 +252,6 @@ class Upwind< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index, Veloc
          return true;
       }
 
-      static String getType();
-      
       void setViscosity(const Real& artificalViscosity)
       {
          this->artificialViscosity = artificalViscosity;
diff --git a/src/TNL/Operators/ExactFunctionInverseOperator.h b/src/TNL/Operators/ExactFunctionInverseOperator.h
index 7833d0544..6fe0b2f12 100644
--- a/src/TNL/Operators/ExactFunctionInverseOperator.h
+++ b/src/TNL/Operators/ExactFunctionInverseOperator.h
@@ -25,12 +25,6 @@ class ExactFunctionInverseOperator
 {
    public:
  
-      static String getType()
-      {
-         return String( "ExactFunctionInverseOperator< " ) +
-                String( Dimension) + " >";
-      }
- 
       InnerOperator& getInnerOperator()
       {
          return this->innerOperator;
diff --git a/src/TNL/Operators/ExactIdentityOperator.h b/src/TNL/Operators/ExactIdentityOperator.h
index 7c39938df..22d7bbdd8 100644
--- a/src/TNL/Operators/ExactIdentityOperator.h
+++ b/src/TNL/Operators/ExactIdentityOperator.h
@@ -23,12 +23,6 @@ class ExactIdentityOperator
 {
    public:
  
-      static String getType()
-      {
-         return String( "ExactIdentityOperator< " ) +
-                String( Dimension) + " >";
-      }
- 
       template< typename Function >
       __cuda_callable__
       typename Function::RealType
diff --git a/src/TNL/Operators/FunctionInverseOperator.h b/src/TNL/Operators/FunctionInverseOperator.h
index 1265f3b70..5bfb5c37f 100644
--- a/src/TNL/Operators/FunctionInverseOperator.h
+++ b/src/TNL/Operators/FunctionInverseOperator.h
@@ -36,11 +36,6 @@ class FunctionInverseOperator
       FunctionInverseOperator( const OperatorType& operator_ )
       : operator_( operator_ ) {};
  
-      static String getType()
-      {
-         return String( "FunctionInverseOperator< " ) + OperatorType::getType() + " >";
-      }
- 
       const OperatorType& getOperator() const { return this->operator_; }
  
       template< typename MeshFunction,
diff --git a/src/TNL/Operators/diffusion/ExactLinearDiffusion.h b/src/TNL/Operators/diffusion/ExactLinearDiffusion.h
index 790fa0777..f0a927d34 100644
--- a/src/TNL/Operators/diffusion/ExactLinearDiffusion.h
+++ b/src/TNL/Operators/diffusion/ExactLinearDiffusion.h
@@ -32,8 +32,6 @@ class ExactLinearDiffusion< 1 > : public Functions::Domain< 1, Functions::SpaceD
 
       static const int Dimension = 1;
  
-      static String getType();
- 
       template< typename Function >
       __cuda_callable__ inline
       typename Function::RealType operator()( const Function& function,
@@ -47,8 +45,6 @@ class ExactLinearDiffusion< 2 > : public Functions::Domain< 2, Functions::SpaceD
    public:
  
       static const int Dimension = 2;
- 
-      static String getType();
 
       template< typename Function >
       __cuda_callable__ inline
@@ -63,8 +59,6 @@ class ExactLinearDiffusion< 3 > : public Functions::Domain< 3 >
    public:
  
       static const int Dimension = 3;
- 
-      static String getType();
 
       template< typename Function >
       __cuda_callable__ inline
diff --git a/src/TNL/Operators/diffusion/ExactLinearDiffusion_impl.h b/src/TNL/Operators/diffusion/ExactLinearDiffusion_impl.h
index 0aabb1027..60a27d9c0 100644
--- a/src/TNL/Operators/diffusion/ExactLinearDiffusion_impl.h
+++ b/src/TNL/Operators/diffusion/ExactLinearDiffusion_impl.h
@@ -19,13 +19,6 @@
 namespace TNL {
 namespace Operators {
 
-String
-ExactLinearDiffusion< 1 >::
-getType()
-{
-   return "ExactLinearDiffusion< 1 >";
-}
-
 template< typename Function >
 __cuda_callable__ inline
 typename Function::RealType
@@ -37,13 +30,6 @@ operator()( const Function& function,
    return function.template getPartialDerivative< 2, 0, 0 >( v, time );
 }
 
-String
-ExactLinearDiffusion< 2 >::
-getType()
-{
-   return "ExactLinearDiffusion< 2 >";
-}
-
 template< typename Function >
 __cuda_callable__ inline
 typename Function::RealType
@@ -56,13 +42,6 @@ operator()( const Function& function,
           function.template getPartialDerivative< 0, 2, 0 >( v, time );
 }
 
-String
-ExactLinearDiffusion< 3 >::
-getType()
-{
-   return "ExactLinearDiffusion< 3 >";
-}
-
 template< typename Function >
 __cuda_callable__ inline
 typename Function::RealType
diff --git a/src/TNL/Operators/diffusion/ExactMeanCurvature.h b/src/TNL/Operators/diffusion/ExactMeanCurvature.h
index fbc2260ef..a96d5a090 100644
--- a/src/TNL/Operators/diffusion/ExactMeanCurvature.h
+++ b/src/TNL/Operators/diffusion/ExactMeanCurvature.h
@@ -34,13 +34,6 @@ class ExactMeanCurvature
       typedef ExactFunctionInverseOperator< Dimension, ExactGradientNormType > FunctionInverse;
       typedef ExactNonlinearDiffusion< Dimension, FunctionInverse > NonlinearDiffusion;
  
-      static String getType()
-      {
-         return String( "ExactMeanCurvature< " ) +
-                String( Dimension) + ", " +
-                InnerOperator::getType() + " >";
-      }
- 
       template< typename Real >
       void setRegularizationEpsilon( const Real& eps)
       {
diff --git a/src/TNL/Operators/diffusion/ExactNonlinearDiffusion.h b/src/TNL/Operators/diffusion/ExactNonlinearDiffusion.h
index 25381e2bb..826796751 100644
--- a/src/TNL/Operators/diffusion/ExactNonlinearDiffusion.h
+++ b/src/TNL/Operators/diffusion/ExactNonlinearDiffusion.h
@@ -37,11 +37,6 @@ class ExactNonlinearDiffusion< 1, Nonlinearity, InnerOperator >
 {
    public:
 
-      static String getType()
-      {
-         return "ExactNonlinearDiffusion< 1, " + Nonlinearity::getType() + " >";
-      };
- 
       Nonlinearity& getNonlinearity()
       {
          return this->nonlinearity;
@@ -91,11 +86,6 @@ class ExactNonlinearDiffusion< 2, Nonlinearity, InnerOperator >
 {
    public:
  
-      static String getType()
-      {
-         return "ExactNonlinearDiffusion< " + Nonlinearity::getType() + ", 2 >";
-      };
- 
       Nonlinearity& getNonlinearity()
       {
          return this->nonlinearity;
@@ -150,11 +140,6 @@ class ExactNonlinearDiffusion< 3, Nonlinearity, InnerOperator >
 {
    public:
  
-      static String getType()
-      {
-         return "ExactNonlinearDiffusion< " + Nonlinearity::getType() + ", 3 >";
-      }
- 
       Nonlinearity& getNonlinearity()
       {
          return this->nonlinearity;
diff --git a/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator.h b/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator.h
index efb175555..e98d21c11 100644
--- a/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator.h
+++ b/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator.h
@@ -49,8 +49,6 @@ class FiniteVolumeNonlinearOperator< Meshes::Grid< 1,MeshReal, Device, MeshIndex
    typedef Index IndexType;
    typedef OperatorQ OperatorQType;
 
-   static String getType();
-   
    template< typename MeshEntity,
              typename Vector >
    __cuda_callable__
@@ -102,8 +100,6 @@ class FiniteVolumeNonlinearOperator< Meshes::Grid< 2, MeshReal, Device, MeshInde
    typedef OperatorQ OperatorQType;
    
 
-   static String getType();
-   
    template< typename MeshEntity,
              typename Vector >
    __cuda_callable__
@@ -155,8 +151,6 @@ class FiniteVolumeNonlinearOperator< Meshes::Grid< 3, MeshReal, Device, MeshInde
    typedef Index IndexType;
    typedef OperatorQ OperatorQType;
 
-   static String getType();
-   
    template< typename MeshEntity, 
              typename Vector >
    __cuda_callable__
diff --git a/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator_impl.h b/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator_impl.h
index 083160467..3d496bd52 100644
--- a/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator_impl.h
+++ b/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator_impl.h
@@ -21,23 +21,6 @@
 namespace TNL {
 namespace Operators {   
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index,
-          typename OperatorQ >
-String
-FiniteVolumeNonlinearOperator< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, OperatorQ, Real, Index >::
-getType()
-{
-   return String( "FiniteVolumeNonlinearOperator< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + ", " +
-	  OperatorQ::getType() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -98,23 +81,6 @@ setMatrixElements( const RealType& time,
    typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
 }
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index,
-	  typename OperatorQ >
-String
-FiniteVolumeNonlinearOperator< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, OperatorQ, Real, Index >::
-getType()
-{
-   return String( "FiniteVolumeNonlinearOperator< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + ", " +
-	  OperatorQ::getType() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -201,23 +167,6 @@ setMatrixElements( const RealType& time,
    matrixRow.setElement( 4, neighborEntities.template getEntityIndex<  0,  1 >(), eCoef );
 }
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index,
- 	  typename OperatorQ >
-String
-FiniteVolumeNonlinearOperator< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, OperatorQ, Real, Index >::
-getType()
-{
-   return String( "FiniteVolumeNonlinearOperator< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + ", " +
-	  OperatorQ::getType() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
diff --git a/src/TNL/Operators/diffusion/LinearDiffusion.h b/src/TNL/Operators/diffusion/LinearDiffusion.h
index e31113800..33e493d02 100644
--- a/src/TNL/Operators/diffusion/LinearDiffusion.h
+++ b/src/TNL/Operators/diffusion/LinearDiffusion.h
@@ -55,8 +55,6 @@ class LinearDiffusion< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, Real, Inde
       static const int Dimension = MeshType::getMeshDimension();
  
       static constexpr int getMeshDimension() { return Dimension; }
- 
-      static String getType();
 
       template< typename PreimageFunction,
                 typename MeshEntity >
@@ -107,8 +105,6 @@ class LinearDiffusion< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
  
       static constexpr int getMeshDimension() { return Dimension; }
 
-      static String getType();
-
       template< typename PreimageFunction, typename EntityType >
       __cuda_callable__
       inline Real operator()( const PreimageFunction& u,
@@ -157,8 +153,6 @@ class LinearDiffusion< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Ind
  
       static constexpr int getMeshDimension() { return Dimension; }
 
-      static String getType();
-
       template< typename PreimageFunction,
                 typename EntityType >
       __cuda_callable__
diff --git a/src/TNL/Operators/diffusion/LinearDiffusion_impl.h b/src/TNL/Operators/diffusion/LinearDiffusion_impl.h
index 83a20829c..51bdf8a62 100644
--- a/src/TNL/Operators/diffusion/LinearDiffusion_impl.h
+++ b/src/TNL/Operators/diffusion/LinearDiffusion_impl.h
@@ -22,21 +22,6 @@
 namespace TNL {
 namespace Operators {   
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-LinearDiffusion< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "LinearDiffusion< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -109,21 +94,6 @@ setMatrixElements( const PreimageFunction& u,
    matrixRow.setElement( 2, neighborEntities.template getEntityIndex< 1 >(),       - lambdaX );
 }
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-LinearDiffusion< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "LinearDiffusion< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -203,22 +173,6 @@ setMatrixElements( const PreimageFunction& u,
    matrixRow.setElement( 4, neighborEntities.template getEntityIndex< 0, 1 >(),   -lambdaY );
 }
 
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-LinearDiffusion< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "LinearDiffusion< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
diff --git a/src/TNL/Operators/diffusion/NonlinearDiffusion_impl.h b/src/TNL/Operators/diffusion/NonlinearDiffusion_impl.h
index 0dbc26988..97f9ec2be 100644
--- a/src/TNL/Operators/diffusion/NonlinearDiffusion_impl.h
+++ b/src/TNL/Operators/diffusion/NonlinearDiffusion_impl.h
@@ -23,23 +23,6 @@
 namespace TNL {
 namespace Operators {
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index,
-          typename NonlinearDiffusionOperator >
-String
-NonlinearDiffusion< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, NonlinearDiffusionOperator, Real, Index >::
-getType()
-{
-   return String( "NonlinearDiffusion< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + "," +
-          NonlinearDiffusionOperator::getType() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -100,23 +83,6 @@ setMatrixElements( const RealType& time,
     nonlinearDiffusionOperator.setMatrixElements( time, tau, mesh, index, entity, u, b, matrix );
 }
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index,
-          typename NonlinearDiffusionOperator >
-String
-NonlinearDiffusion< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, NonlinearDiffusionOperator, Real, Index >::
-getType()
-{
-   return String( "NonlinearDiffusion< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + "," +
-          NonlinearDiffusionOperator::getType() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -177,23 +143,6 @@ setMatrixElements( const RealType& time,
     nonlinearDiffusionOperator.setMatrixElements( time, tau, mesh, index, entity, u, b, matrix );
 }
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index,
-          typename NonlinearDiffusionOperator >
-String
-NonlinearDiffusion< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, NonlinearDiffusionOperator, Real, Index >::
-getType()
-{
-   return String( "NonlinearDiffusion< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + "," +
-          NonlinearDiffusionOperator::getType() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
diff --git a/src/TNL/Operators/diffusion/OneSidedMeanCurvature.h b/src/TNL/Operators/diffusion/OneSidedMeanCurvature.h
index 6e99d2f05..0c8767981 100644
--- a/src/TNL/Operators/diffusion/OneSidedMeanCurvature.h
+++ b/src/TNL/Operators/diffusion/OneSidedMeanCurvature.h
@@ -55,14 +55,6 @@ class OneSidedMeanCurvature
         nonlinearity( nonlinearityOperator, nonlinearityBoundaryConditions, meshPointer ),
         nonlinearDiffusion( nonlinearity ){}
  
-      static String getType()
-      {
-         return String( "OneSidedMeanCurvature< " ) +
-            MeshType::getType() + ", " +
-           TNL::getType< Real >() + ", " +
-           TNL::getType< Index >() + " >";
-      }
- 
       void setRegularizationEpsilon( const RealType& eps )
       {
          this->gradientNorm.setEps( eps );
diff --git a/src/TNL/Operators/diffusion/OneSidedNonlinearDiffusion.h b/src/TNL/Operators/diffusion/OneSidedNonlinearDiffusion.h
index b74abe0b0..9691ab32b 100644
--- a/src/TNL/Operators/diffusion/OneSidedNonlinearDiffusion.h
+++ b/src/TNL/Operators/diffusion/OneSidedNonlinearDiffusion.h
@@ -52,15 +52,6 @@ class OneSidedNonlinearDiffusion< Meshes::Grid< 1,MeshReal, Device, MeshIndex >,
 
       OneSidedNonlinearDiffusion( const Nonlinearity& nonlinearity )
       : nonlinearity( nonlinearity ){}
- 
-      static String getType()
-      {
-         return String( "OneSidedNonlinearDiffusion< " ) +
-            MeshType::getType() + ", " +
-            Nonlinearity::getType() + "," +
-           TNL::getType< Real >() + ", " +
-           TNL::getType< Index >() + " >";
-      }
 
       template< typename MeshFunction,
                 typename MeshEntity >
@@ -146,15 +137,6 @@ class OneSidedNonlinearDiffusion< Meshes::Grid< 2, MeshReal, Device, MeshIndex >
 
       OneSidedNonlinearDiffusion( const Nonlinearity& nonlinearity )
       : nonlinearity( nonlinearity ){}
- 
-      static String getType()
-      {
-         return String( "OneSidedNonlinearDiffusion< " ) +
-            MeshType::getType() + ", " +
-            Nonlinearity::getType() + "," +
-           TNL::getType< Real >() + ", " +
-           TNL::getType< Index >() + " >";
-      }
 
       template< typename MeshFunction,
                 typename MeshEntity >
@@ -255,15 +237,6 @@ class OneSidedNonlinearDiffusion< Meshes::Grid< 3, MeshReal, Device, MeshIndex >
 
       OneSidedNonlinearDiffusion( const Nonlinearity& nonlinearity )
       : nonlinearity( nonlinearity ){}
- 
-      static String getType()
-      {
-         return String( "OneSidedNonlinearDiffusion< " ) +
-            MeshType::getType() + ", " +
-            Nonlinearity::getType() + "," +
-           TNL::getType< Real >() + ", " +
-           TNL::getType< Index >() + " >";
-      }
 
       template< typename MeshFunction,
                 typename MeshEntity >
diff --git a/src/TNL/Operators/euler/fvm/LaxFridrichs.h b/src/TNL/Operators/euler/fvm/LaxFridrichs.h
index aaf44f521..10d1c7995 100644
--- a/src/TNL/Operators/euler/fvm/LaxFridrichs.h
+++ b/src/TNL/Operators/euler/fvm/LaxFridrichs.h
@@ -42,8 +42,6 @@ class LaxFridrichs< Meshes::Grid< 2, Real, Device, Index, GridGeometry >, Pressu
 
    LaxFridrichs();
 
-   static String getType();
-
    void getExplicitUpdate( const IndexType centralVolume,
                         RealType& rho_t,
                         RealType& rho_u1_t,
diff --git a/src/TNL/Operators/euler/fvm/LaxFridrichs_impl.h b/src/TNL/Operators/euler/fvm/LaxFridrichs_impl.h
index aa9c8059a..963ef7601 100644
--- a/src/TNL/Operators/euler/fvm/LaxFridrichs_impl.h
+++ b/src/TNL/Operators/euler/fvm/LaxFridrichs_impl.h
@@ -27,19 +27,6 @@ LaxFridrichs< Meshes::Grid< 2, Real, Device, Index, GridGeometry >,
 {
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename PressureGradient,
-          template< int, typename, typename, typename > class GridGeometry >
-String LaxFridrichs< Meshes::Grid< 2, Real, Device, Index, GridGeometry >,
-                           PressureGradient > :: getType()
-{
-   return String( "LaxFridrichs< " ) +
-          Meshes::Grid< 2, Real, Device, Index, GridGeometry > :: getType() + ", " +
-          PressureGradient :: getType() + " >";
-}
-
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/TNL/Operators/fdm/BackwardFiniteDifference.h b/src/TNL/Operators/fdm/BackwardFiniteDifference.h
index cceaa807f..16282c73e 100644
--- a/src/TNL/Operators/fdm/BackwardFiniteDifference.h
+++ b/src/TNL/Operators/fdm/BackwardFiniteDifference.h
@@ -50,17 +50,6 @@ class BackwardFiniteDifference< Meshes::Grid< Dimension, MeshReal, MeshDevice, M
  
       static constexpr int getMeshDimension() { return Dimension; }
  
-      static String getType()
-      {
-         return String( "BackwardFiniteDifference< " ) +
-            MeshType::getType() + ", " +
-            String( XDifference ) + ", " +
-            String( YDifference ) + ", " +
-            String( ZDifference ) + ", " +
-           TNL::getType< RealType >() + ", " +
-           TNL::getType< IndexType >() + " >";
-      }
- 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
       inline Real operator()( const MeshFunction& u,
diff --git a/src/TNL/Operators/fdm/CentralFiniteDifference.h b/src/TNL/Operators/fdm/CentralFiniteDifference.h
index feecc62e7..51a645be1 100644
--- a/src/TNL/Operators/fdm/CentralFiniteDifference.h
+++ b/src/TNL/Operators/fdm/CentralFiniteDifference.h
@@ -50,18 +50,6 @@ class CentralFiniteDifference< Meshes::Grid< Dimension, MeshReal, MeshDevice, Me
  
       //static constexpr int getMeshDimension() { return Dimension; }
  
-      static String getType()
-      {
-         return String( "CentralFiniteDifference< " ) +
-            MeshType::getType() + ", " +
-            String( XDifference ) + ", " +
-            String( YDifference ) + ", " +
-            String( ZDifference ) + ", " +
-           TNL::getType< RealType >() + ", " +
-           TNL::getType< IndexType >() + " >";
-      }
-
- 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
       inline Real operator()( const MeshFunction& u,
diff --git a/src/TNL/Operators/fdm/ExactDifference.h b/src/TNL/Operators/fdm/ExactDifference.h
index 5efffc1b8..58c693644 100644
--- a/src/TNL/Operators/fdm/ExactDifference.h
+++ b/src/TNL/Operators/fdm/ExactDifference.h
@@ -22,15 +22,6 @@ class ExactDifference
 {
    public:
  
-      static String getType()
-      {
-         return String( "ExactDifference< " ) +
-            String( Dimension ) + ", " +
-            String( XDerivative ) + ", " +
-            String( YDerivative ) + ", " +
-            String( ZDerivative ) + " >";
-      }
- 
       template< typename Function >
       __cuda_callable__
       typename Function::RealType operator()(
diff --git a/src/TNL/Operators/fdm/ForwardFiniteDifference.h b/src/TNL/Operators/fdm/ForwardFiniteDifference.h
index 53602afec..bbfe29bc1 100644
--- a/src/TNL/Operators/fdm/ForwardFiniteDifference.h
+++ b/src/TNL/Operators/fdm/ForwardFiniteDifference.h
@@ -51,18 +51,6 @@ class ForwardFiniteDifference< Meshes::Grid< Dimension, MeshReal, MeshDevice, Me
  
       static constexpr int getMeshDimension() { return Dimension; }
  
-      static String getType()
-      {
-         return String( "ForwardFiniteDifference< " ) +
-            MeshType::getType() + ", " +
-            String( XDifference ) + ", " +
-            String( YDifference ) + ", " +
-            String( ZDifference ) + ", " +
-           TNL::getType< RealType >() + ", " +
-           TNL::getType< IndexType >() + " >";
-      }
-
- 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
       inline Real operator()( const MeshFunction& u,
diff --git a/src/TNL/Operators/geometric/CoFVMGradientNorm.h b/src/TNL/Operators/geometric/CoFVMGradientNorm.h
index 2af779a11..37fabed7d 100644
--- a/src/TNL/Operators/geometric/CoFVMGradientNorm.h
+++ b/src/TNL/Operators/geometric/CoFVMGradientNorm.h
@@ -58,15 +58,6 @@ class CoFVMGradientNorm< Meshes::Grid< MeshDimension, MeshReal, Device, MeshInde
       : BaseType( outerOperator, innerOperator, mesh )
       {}
  
-      static String getType()
-      {
-         return String( "CoFVMGradientNorm< " ) +
-            MeshType::getType() + ", " +
-            String( MeshDimension ) + ", " +
-           TNL::getType< Real >() + ", " +
-           TNL::getType< Index >() + " >";
-      }
- 
       void setEps( const RealType& eps )
       {
          this->getInnerOperator().setEps( eps );
@@ -100,14 +91,6 @@ class CoFVMGradientNorm< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, 0, Real,
    CoFVMGradientNorm()
    : epsSquare( 0.0 ){}
 
-   static String getType()
-   {
-      return String( "CoFVMGradientNorm< " ) +
-         MeshType::getType() + ", 0, " +
-        TNL::getType< Real >() + ", " +
-        TNL::getType< Index >() + " >";
-   }
-
    template< typename MeshFunction, typename MeshEntity >
    __cuda_callable__
    Real operator()( const MeshFunction& u,
@@ -160,16 +143,6 @@ class CoFVMGradientNorm< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 1, Real
    CoFVMGradientNorm()
    : epsSquare( 0.0 ){}
 
-
-   static String getType()
-   {
-      return String( "CoFVMGradientNorm< " ) +
-         MeshType::getType() + ", 1, " +
-        TNL::getType< Real >() + ", " +
-        TNL::getType< Index >() + " >";
-
-   }
- 
    template< typename MeshFunction, typename MeshEntity >
    __cuda_callable__
    Real operator()( const MeshFunction& u,
@@ -278,14 +251,6 @@ class CoFVMGradientNorm< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 2, Real
    CoFVMGradientNorm()
    : epsSquare( 0.0 ){}
 
-   static String getType()
-   {
-      return String( "CoFVMGradientNorm< " ) +
-         MeshType::getType() + ", 2, " +
-        TNL::getType< Real >() + ", " +
-        TNL::getType< Index >() + " >";
-   }
-
    template< typename MeshFunction, typename MeshEntity >
    __cuda_callable__
    Real operator()( const MeshFunction& u,
diff --git a/src/TNL/Operators/geometric/ExactGradientNorm.h b/src/TNL/Operators/geometric/ExactGradientNorm.h
index cf7e33848..121e0181b 100644
--- a/src/TNL/Operators/geometric/ExactGradientNorm.h
+++ b/src/TNL/Operators/geometric/ExactGradientNorm.h
@@ -30,11 +30,6 @@ class ExactGradientNorm< 1, Real >
 {
    public:
 
-      static String getType()
-      {
-         return "ExactGradientNorm< 1 >";
-      }
- 
       ExactGradientNorm()
       : epsilonSquare( 0.0 ){};
 
@@ -98,11 +93,6 @@ class ExactGradientNorm< 2, Real >
 {
    public:
 
-      static String getType()
-      {
-         return "ExactGradientNorm< 2 >";
-      }
- 
       ExactGradientNorm()
       : epsilonSquare( 0.0 ){};
 
@@ -172,11 +162,6 @@ class ExactGradientNorm< 3, Real >
 {
    public:
 
-      static String getType()
-      {
-         return "ExactGradientNorm< 3 >";
-      }
- 
       ExactGradientNorm()
       : epsilonSquare( 0.0 ){};
 
diff --git a/src/TNL/Operators/geometric/FDMGradientNorm.h b/src/TNL/Operators/geometric/FDMGradientNorm.h
index a5eb45363..f42216a43 100644
--- a/src/TNL/Operators/geometric/FDMGradientNorm.h
+++ b/src/TNL/Operators/geometric/FDMGradientNorm.h
@@ -50,14 +50,6 @@ class FDMGradientNorm< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, Difference
    FDMGradientNorm()
    : epsSquare( 0.0 ){}
 
-   static String getType()
-   {
-      return String( "FDMGradientNorm< " ) +
-         MeshType::getType() + ", " +
-        TNL::getType< Real >() + ", " +
-        TNL::getType< Index >() + " >";
-   }
-
    template< typename MeshFunction, typename MeshEntity >
    __cuda_callable__
    Real operator()( const MeshFunction& u,
@@ -107,16 +99,6 @@ class FDMGradientNorm< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, Difference
       FDMGradientNorm()
       : epsSquare( 0.0 ){}
 
-
-      static String getType()
-      {
-         return String( "FDMGradientNorm< " ) +
-            MeshType::getType() + ", " +
-           TNL::getType< Real >() + ", " +
-           TNL::getType< Index >() + " >";
-
-      }
-
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
       Real operator()( const MeshFunction& u,
@@ -173,14 +155,6 @@ class FDMGradientNorm< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Differenc
       FDMGradientNorm()
       : epsSquare( 0.0 ){}
 
-      static String getType()
-      {
-         return String( "FDMGradientNorm< " ) +
-            MeshType::getType() + ", " +
-           TNL::getType< Real >() + ", " +
-           TNL::getType< Index >() + " >";
-      }
-
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
       Real operator()( const MeshFunction& u,
diff --git a/src/TNL/Operators/geometric/TwoSidedGradientNorm.h b/src/TNL/Operators/geometric/TwoSidedGradientNorm.h
index 2d86167b1..a1624b408 100644
--- a/src/TNL/Operators/geometric/TwoSidedGradientNorm.h
+++ b/src/TNL/Operators/geometric/TwoSidedGradientNorm.h
@@ -46,14 +46,6 @@ class TwoSidedGradientNorm< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, Real,
    TwoSidedGradientNorm()
    : epsSquare( 0.0 ){}
 
-   static String getType()
-   {
-      return String( "TwoSidedGradientNorm< " ) +
-         MeshType::getType() + ", " +
-        TNL::getType< Real >() + ", " +
-        TNL::getType< Index >() + " >";
-   }
-
    template< typename MeshFunction, typename MeshEntity >
    __cuda_callable__
    Real operator()( const MeshFunction& u,
@@ -99,16 +91,6 @@ class TwoSidedGradientNorm< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, Real,
    TwoSidedGradientNorm()
    : epsSquare( 0.0 ){}
 
-
-   static String getType()
-   {
-      return String( "TwoSidedGradientNorm< " ) +
-         MeshType::getType() + ", " +
-        TNL::getType< Real >() + ", " +
-        TNL::getType< Index >() + " >";
-
-   }
- 
    template< typename MeshFunction, typename MeshEntity >
    __cuda_callable__
    Real operator()( const MeshFunction& u,
@@ -162,14 +144,6 @@ class TwoSidedGradientNorm< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real
    TwoSidedGradientNorm()
    : epsSquare( 0.0 ){}
 
-   static String getType()
-   {
-      return String( "TwoSidedGradientNorm< " ) +
-         MeshType::getType() + ", " +
-        TNL::getType< Real >() + ", " +
-        TNL::getType< Index >() + " >";
-   }
-
    template< typename MeshFunction, typename MeshEntity >
    __cuda_callable__
    Real operator()( const MeshFunction& u,
diff --git a/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ.h b/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ.h
index 7f145198f..bfe41697f 100644
--- a/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ.h
+++ b/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ.h
@@ -42,8 +42,6 @@ class tnlFiniteVolumeOperatorQ< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, R
    typedef Device DeviceType;
    typedef Index IndexType;
 
-   static String getType();
-
    template< typename Vector >
    IndexType bind( Vector& u) 
    { return 0; }
@@ -96,8 +94,6 @@ class tnlFiniteVolumeOperatorQ< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, R
    typedef Device DeviceType;
    typedef Index IndexType;
 
-   static String getType(); 
-
    template< typename Vector >
    IndexType bind( Vector& u)
    { return 0; }
@@ -149,8 +145,6 @@ class tnlFiniteVolumeOperatorQ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, R
    typedef Device DeviceType;
    typedef Index IndexType;
 
-   static String getType();
-
    template< typename Vector >
    IndexType bind( Vector& u)
    { return 0; }
@@ -202,8 +196,6 @@ class tnlFiniteVolumeOperatorQ< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, R
    typedef Device DeviceType;
    typedef Index IndexType;
 
-   static String getType();
-
    template< typename Vector >
    Index bind( Vector& u);
 
@@ -255,8 +247,6 @@ class tnlFiniteVolumeOperatorQ< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, R
    typedef Device DeviceType;
    typedef Index IndexType;
    typedef SharedVector< RealType, DeviceType, IndexType > DofVectorType;
-   
-   static String getType(); 
 
    template< typename Vector >
    Index bind( Vector& u);
@@ -309,8 +299,6 @@ class tnlFiniteVolumeOperatorQ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, R
    typedef Real RealType;
    typedef Device DeviceType;
    typedef Index IndexType;
-   
-   static String getType();
 
    template< typename Vector >
    Index bind( Vector& u);
diff --git a/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ_impl.h b/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ_impl.h
index 0fae70006..184f1955d 100644
--- a/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ_impl.h
+++ b/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ_impl.h
@@ -16,36 +16,6 @@
 namespace TNL {
 namespace Operators {   
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-tnlFiniteVolumeOperatorQ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index, 0 >::
-getType()
-{
-   return String( "tnlFiniteVolumeOperatorQ< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + ", 0 >";
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-tnlFiniteVolumeOperatorQ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index, 1 >::
-getType()
-{
-   return String( "tnlFiniteVolumeOperatorQ< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + ", 1 >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -165,36 +135,6 @@ operator()(
     return 0.0;
 }
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-tnlFiniteVolumeOperatorQ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index, 0 >::
-getType()
-{
-   return String( "tnlFiniteVolumeOperatorQ< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + ", 0 >";
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-tnlFiniteVolumeOperatorQ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index, 1 >::
-getType()
-{
-   return String( "tnlFiniteVolumeOperatorQ< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + ", 1 >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -372,36 +312,6 @@ operator()( const MeshType& mesh,
    return q.getElement( entity.getIndex() );
 }
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-tnlFiniteVolumeOperatorQ< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index, 0 >::
-getType()
-{
-   return String( "tnlFiniteVolumeOperatorQ< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + ", 0 >";
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-tnlFiniteVolumeOperatorQ< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index, 1 >::
-getType()
-{
-   return String( "tnlFiniteVolumeOperatorQ< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + ", 1 >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
diff --git a/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ.h b/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ.h
index a96d22f51..aff3917c3 100644
--- a/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ.h
+++ b/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ.h
@@ -37,8 +37,6 @@ class tnlOneSideDiffOperatorQ< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, Re
    typedef Device DeviceType;
    typedef Index IndexType;
 
-   static String getType();
-
    template< typename MeshFunction, typename MeshEntity >
    __cuda_callable__
    Real operator()( const MeshFunction& u,
@@ -74,8 +72,6 @@ class tnlOneSideDiffOperatorQ< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, Re
    typedef Device DeviceType;
    typedef Index IndexType;
 
-   static String getType(); 
-      
    template< typename MeshFunction, typename MeshEntity >
    __cuda_callable__
    Real operator()( const MeshFunction& u,
@@ -112,8 +108,6 @@ class tnlOneSideDiffOperatorQ< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, R
    typedef Device DeviceType;
    typedef Index IndexType;
 
-   static String getType();
-
    template< typename MeshFunction, typename MeshEntity >
    __cuda_callable__
    Real operator()( const MeshFunction& u,
diff --git a/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ_impl.h b/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ_impl.h
index 21f5e44f0..6291e0a53 100644
--- a/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ_impl.h
+++ b/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ_impl.h
@@ -16,21 +16,6 @@
 namespace TNL {
 namespace Operators {
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-tnlOneSideDiffOperatorQ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "tnlOneSideDiffOperatorQ< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -92,21 +77,6 @@ getValueStriped( const MeshFunction& u,
 /***
  * 2D
  */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-tnlOneSideDiffOperatorQ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "tnlOneSideDiffOperatorQ< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -177,21 +147,6 @@ getValueStriped( const MeshFunction& u,
 /***
  * 3D
  */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-tnlOneSideDiffOperatorQ< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "tnlOneSideDiffOperatorQ< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
diff --git a/src/TNL/Operators/operator-curvature/ExactOperatorCurvature.h b/src/TNL/Operators/operator-curvature/ExactOperatorCurvature.h
index 33a20e255..f3a73c88a 100644
--- a/src/TNL/Operators/operator-curvature/ExactOperatorCurvature.h
+++ b/src/TNL/Operators/operator-curvature/ExactOperatorCurvature.h
@@ -29,8 +29,6 @@ class ExactOperatorCurvature< OperatorQ, 1 >
 
       enum { Dimension = 1 };
 
-      static String getType();
-
       template< int XDiffOrder = 0, int YDiffOrder = 0, int ZDiffOrder = 0, typename Function, typename Point, typename Real = typename Point::RealType >
       __cuda_callable__
       static Real getValue( const Function& function,
@@ -46,8 +44,6 @@ class ExactOperatorCurvature< ExactOperatorQ, 2 >
 
       enum { Dimension = 2 };
 
-      static String getType();
-         
       template< int XDiffOrder = 0, int YDiffOrder = 0, int ZDiffOrder = 0, typename Function, typename Point, typename Real = typename Point::RealType >
       __cuda_callable__
       static Real getValue( const Function& function,
@@ -62,8 +58,6 @@ class ExactOperatorCurvature< ExactOperatorQ, 3 >
 
       enum { Dimension = 3 };
 
-      static String getType();
-   
       template< int XDiffOrder = 0, int YDiffOrder = 0, int ZDiffOrder = 0, typename Function, typename Point, typename Real = typename Point::RealType >
       __cuda_callable__
       static Real getValue( const Function& function,
diff --git a/src/TNL/Operators/operator-curvature/ExactOperatorCurvature_impl.h b/src/TNL/Operators/operator-curvature/ExactOperatorCurvature_impl.h
index dfc03a727..8408cfe17 100644
--- a/src/TNL/Operators/operator-curvature/ExactOperatorCurvature_impl.h
+++ b/src/TNL/Operators/operator-curvature/ExactOperatorCurvature_impl.h
@@ -15,14 +15,6 @@
 namespace TNL {
 namespace Operators {   
 
-template< typename ExactOperatorQ >
-String
-ExactOperatorCurvature< ExactOperatorQ, 1 >::
-getType()
-{
-   return "ExactOperatorCurvature< " + ExactOperatorQ::getType() + ",1 >";
-}
-
 template< typename OperatorQ >
 template< int XDiffOrder, int YDiffOrder, int ZDiffOrder, typename Function, typename Point, typename Real >
 __cuda_callable__
@@ -41,14 +33,6 @@ getValue( const Function& function,
    return 0;
 }
 
-template< typename ExactOperatorQ >
-String
-ExactOperatorCurvature< ExactOperatorQ, 2 >::
-getType()
-{
-   return "ExactOperatorCurvature< " + ExactOperatorQ::getType() + ",2 >";
-}
-
 template< int XDiffOrder, int YDiffOrder, int ZDiffOrder, typename Function, typename Point, typename Real >
 __cuda_callable__
 Real
@@ -68,13 +52,5 @@ getValue( const Function& function,
    return 0;
 }
 
-template< typename ExactOperatorQ >
-String
-ExactOperatorCurvature< ExactOperatorQ, 3 >::
-getType()
-{
-   return "ExactOperatorCurvature< " + ExactOperatorQ::getType() + ",3 >";
-}
-
 } // namespace Operators
 } // namespace TNL
diff --git a/src/TNL/Pointers/DevicePointer.h b/src/TNL/Pointers/DevicePointer.h
index f384c75e5..136c809cd 100644
--- a/src/TNL/Pointers/DevicePointer.h
+++ b/src/TNL/Pointers/DevicePointer.h
@@ -16,6 +16,7 @@
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Pointers/SmartPointer.h>
+#include <TNL/TypeInfo.h>
 
 #include <cstring>  // std::memcpy, std::memcmp
 
@@ -481,8 +482,8 @@ struct Formatter< Pointers::DevicePointer< Object, Device > >
    printToString( const Pointers::DevicePointer< Object, Device >& value )
    {
       ::std::stringstream ss;
-      ss << "(DevicePointer< " << Object::getType() << ", " << Device::getType()
-         << " > object at " << &value << ")";
+      ss << "(" + getType< Pointers::DevicePointer< Object, Device > >()
+         << " object at " << &value << ")";
       return ss.str();
    }
 };
diff --git a/src/TNL/Pointers/SharedPointer.h b/src/TNL/Pointers/SharedPointer.h
index 05383c39e..93f63f807 100644
--- a/src/TNL/Pointers/SharedPointer.h
+++ b/src/TNL/Pointers/SharedPointer.h
@@ -15,28 +15,10 @@
 #include <cstring>
 #include <type_traits>
 #include <TNL/Assert.h>
+#include <TNL/TypeInfo.h>
 
 //#define TNL_DEBUG_SHARED_POINTERS
 
-#ifdef TNL_DEBUG_SHARED_POINTERS
-   #include <typeinfo>
-   #include <cxxabi.h>
-   #include <iostream>
-   #include <string>
-   #include <memory>
-   #include <cstdlib>
-
-   inline
-   std::string demangle(const char* mangled)
-   {
-      int status;
-      std::unique_ptr<char[], void (*)(void*)> result(
-         abi::__cxa_demangle(mangled, 0, 0, &status), std::free);
-      return result.get() ? std::string(result.get()) : "error occurred";
-   }
-#endif
-
-
 namespace TNL {
 namespace Pointers {
 
@@ -59,7 +41,7 @@ struct Formatter< Pointers::SharedPointer< Object, Device > >
    printToString( const Pointers::SharedPointer< Object, Device >& value )
    {
       ::std::stringstream ss;
-      ss << "(SharedPointer< " << Object::getType() << ", " << Device::getType()
+      ss << "(" + getType< Pointers::SharedPointer< Object, Device > >()
          << " > object at " << &value << ")";
       return ss.str();
    }
diff --git a/src/TNL/Pointers/SharedPointerCuda.h b/src/TNL/Pointers/SharedPointerCuda.h
index 2cf1b297f..9c883c23a 100644
--- a/src/TNL/Pointers/SharedPointerCuda.h
+++ b/src/TNL/Pointers/SharedPointerCuda.h
@@ -21,27 +21,6 @@
 #include <cstddef>   // std::nullptr_t
 #include <algorithm> // swap
 
-//#define TNL_DEBUG_SHARED_POINTERS
-
-#ifdef TNL_DEBUG_SHARED_POINTERS
-   #include <typeinfo>
-   #include <cxxabi.h>
-   #include <iostream>
-   #include <string>
-   #include <memory>
-   #include <cstdlib>
-
-   inline
-   std::string demangle(const char* mangled)
-   {
-      int status;
-      std::unique_ptr<char[], void (*)(void*)> result(
-         abi::__cxa_demangle(mangled, 0, 0, &status), std::free);
-      return result.get() ? std::string(result.get()) : "error occurred";
-   }
-#endif
-
-
 namespace TNL {
 namespace Pointers {
 
@@ -78,7 +57,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
       : pd( nullptr )
       {
 #ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Creating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl;
+         std::cerr << "Creating shared pointer to " << getType< ObjectType >() << std::endl;
 #endif
          this->allocate( args... );
       }
@@ -119,7 +98,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
       bool recreate( Args... args )
       {
 #ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Recreating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl;
+         std::cerr << "Recreating shared pointer to " << getType< ObjectType >() << std::endl;
 #endif
          if( ! this->counter )
             return this->allocate( args... );
@@ -377,7 +356,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
       bool recreate( Args... args )
       {
 #ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Recreating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl;
+         std::cerr << "Recreating shared pointer to " << getType< ObjectType >() << std::endl;
 #endif
          if( ! this->pd )
             return this->allocate( args... );
@@ -478,7 +457,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
          if( this->pd != nullptr )
             this->pd->counter += 1;
 #ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
+         std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl;
 #endif
          return *this;
       }
@@ -494,7 +473,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
          if( this->pd != nullptr )
             this->pd->counter += 1;
 #ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
+         std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl;
 #endif
          return *this;
       }
@@ -508,7 +487,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
          ptr.pd = nullptr;
          ptr.cuda_pointer = nullptr;
 #ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
+         std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl;
 #endif
          return *this;
       }
@@ -524,7 +503,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
          ptr.pd = nullptr;
          ptr.cuda_pointer = nullptr;
 #ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
+         std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl;
 #endif
          return *this;
       }
@@ -537,7 +516,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
          if( this->modified() )
          {
 #ifdef TNL_DEBUG_SHARED_POINTERS
-            std::cerr << "Synchronizing shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(Object).name()) << std::endl;
+            std::cerr << "Synchronizing shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl;
             std::cerr << "   ( " << sizeof( Object ) << " bytes, CUDA adress " << this->cuda_pointer << " )" << std::endl;
 #endif
             TNL_ASSERT( this->cuda_pointer, );
@@ -595,7 +574,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
          // set last-sync state
          this->set_last_sync_state();
 #ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Created shared pointer to " << demangle(typeid(ObjectType).name()) << " (cuda_pointer = " << this->cuda_pointer << ")" << std::endl;
+         std::cerr << "Created shared pointer to " << getType< ObjectType >() << " (cuda_pointer = " << this->cuda_pointer << ")" << std::endl;
 #endif
          Devices::Cuda::insertSmartPointer( this );
          return true;
@@ -622,7 +601,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
          if( this->pd )
          {
 #ifdef TNL_DEBUG_SHARED_POINTERS
-            std::cerr << "Freeing shared pointer: counter = " << this->pd->counter << ", cuda_pointer = " << this->cuda_pointer << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
+            std::cerr << "Freeing shared pointer: counter = " << this->pd->counter << ", cuda_pointer = " << this->cuda_pointer << ", type: " << getType< ObjectType >() << std::endl;
 #endif
             if( ! --this->pd->counter )
             {
diff --git a/src/TNL/Pointers/SharedPointerHost.h b/src/TNL/Pointers/SharedPointerHost.h
index 48d83c938..087cfd79e 100644
--- a/src/TNL/Pointers/SharedPointerHost.h
+++ b/src/TNL/Pointers/SharedPointerHost.h
@@ -54,7 +54,7 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer
       : pd( nullptr )
       {
 #ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Creating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl;
+         std::cerr << "Creating shared pointer to " << getType< ObjectType >() << std::endl;
 #endif
          this->allocate( args... );
       }
@@ -95,7 +95,7 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer
       bool recreate( Args... args )
       {
 #ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Recreating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl;
+         std::cerr << "Recreating shared pointer to " << getType< ObjectType >() << std::endl;
 #endif
          if( ! this->counter )
             return this->allocate( args... );
diff --git a/src/TNL/Pointers/UniquePointer.h b/src/TNL/Pointers/UniquePointer.h
index a2a8551ec..53f2dac5b 100644
--- a/src/TNL/Pointers/UniquePointer.h
+++ b/src/TNL/Pointers/UniquePointer.h
@@ -322,7 +322,7 @@ struct Formatter< Pointers::UniquePointer< Object, Device > >
    printToString( const Pointers::UniquePointer< Object, Device >& value )
    {
       ::std::stringstream ss;
-      ss << "(UniquePointer< " << Object::getType() << ", " << Device::getType()
+      ss << "(" + getType< Pointers::UniquePointer< Object, Device > >()
          << " > object at " << &value << ")";
       return ss.str();
    }
diff --git a/src/TNL/Problems/HeatEquationEocProblem.h b/src/TNL/Problems/HeatEquationEocProblem.h
index 51990252c..78dd640b3 100644
--- a/src/TNL/Problems/HeatEquationEocProblem.h
+++ b/src/TNL/Problems/HeatEquationEocProblem.h
@@ -36,8 +36,6 @@ class HeatEquationEocProblem : public HeatEquationProblem< Mesh, BoundaryConditi
       
       using typename BaseType::MeshPointer;
 
-      static String getType();
-
       bool setup( const Config::ParameterContainer& parameters,
                   const String& prefix );
 };
diff --git a/src/TNL/Problems/HeatEquationEocProblem_impl.h b/src/TNL/Problems/HeatEquationEocProblem_impl.h
index ae062df74..f7c7aea5c 100644
--- a/src/TNL/Problems/HeatEquationEocProblem_impl.h
+++ b/src/TNL/Problems/HeatEquationEocProblem_impl.h
@@ -20,19 +20,7 @@
 #include "HeatEquationProblem.h"
 
 namespace TNL {
-namespace Problems {   
-
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename Communicator,
-          typename DifferentialOperator >
-String
-HeatEquationEocProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
-getType()
-{
-   return String( "heatEquationEocSolver< " ) + Mesh :: getType() + " >";
-}
+namespace Problems {
 
 template< typename Mesh,
           typename BoundaryCondition,
diff --git a/src/TNL/Problems/HeatEquationProblem.h b/src/TNL/Problems/HeatEquationProblem.h
index cddd70746..26df28965 100644
--- a/src/TNL/Problems/HeatEquationProblem.h
+++ b/src/TNL/Problems/HeatEquationProblem.h
@@ -62,8 +62,6 @@ class HeatEquationProblem : public PDEProblem< Mesh,
 
       typedef Communicator CommunicatorType;
 
-      static String getType();
-
       String getPrologHeader() const;
 
       void writeProlog( Logger& logger,
diff --git a/src/TNL/Problems/HeatEquationProblem_impl.h b/src/TNL/Problems/HeatEquationProblem_impl.h
index 64b4a2ca9..bc339e9b3 100644
--- a/src/TNL/Problems/HeatEquationProblem_impl.h
+++ b/src/TNL/Problems/HeatEquationProblem_impl.h
@@ -27,18 +27,6 @@
 namespace TNL {
 namespace Problems {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename Communicator,
-          typename DifferentialOperator >
-String
-HeatEquationProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
-getType()
-{
-   return String( "HeatEquationProblem< " ) + Mesh :: getType() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/TNL/Problems/MeanCurvatureFlowEocProblem.h b/src/TNL/Problems/MeanCurvatureFlowEocProblem.h
index e50afe7fa..7839dd8dd 100644
--- a/src/TNL/Problems/MeanCurvatureFlowEocProblem.h
+++ b/src/TNL/Problems/MeanCurvatureFlowEocProblem.h
@@ -33,8 +33,6 @@ class MeanCurvatureFlowEocProblem : public MeanCurvatureFlowProblem< Mesh, Bound
 {
    public:
 
-      static String getType();
-
       bool setup( const Config::ParameterContainer& parameters );
 };
 
diff --git a/src/TNL/Problems/MeanCurvatureFlowEocProblem_impl.h b/src/TNL/Problems/MeanCurvatureFlowEocProblem_impl.h
index 593028b37..71809f3cd 100644
--- a/src/TNL/Problems/MeanCurvatureFlowEocProblem_impl.h
+++ b/src/TNL/Problems/MeanCurvatureFlowEocProblem_impl.h
@@ -19,17 +19,6 @@
 namespace TNL {
 namespace Problems {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename DifferentialOperator >
-String
-MeanCurvatureFlowEocProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator >::
-getType()
-{
-   return String( "HeatEquationEocProblem< " ) + Mesh :: getType() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
@@ -47,4 +36,4 @@ setup( const Config::ParameterContainer& parameters )
 }
 
 } // namespace Problems
-} // namespace TNL
\ No newline at end of file
+} // namespace TNL
diff --git a/src/TNL/Problems/MeanCurvatureFlowProblem.h b/src/TNL/Problems/MeanCurvatureFlowProblem.h
index 45e87025b..415216dce 100644
--- a/src/TNL/Problems/MeanCurvatureFlowProblem.h
+++ b/src/TNL/Problems/MeanCurvatureFlowProblem.h
@@ -54,8 +54,6 @@ class MeanCurvatureFlowProblem : public PDEProblem< Mesh,
       using typename BaseType::MeshDependentDataType;
       using typename BaseType::MeshDependentDataPointer;
 
-      static String getType();
-
       String getPrologHeader() const;
 
       void writeProlog( Logger& logger,
diff --git a/src/TNL/Problems/MeanCurvatureFlowProblem_impl.h b/src/TNL/Problems/MeanCurvatureFlowProblem_impl.h
index 2cd7f9aaf..48807addf 100644
--- a/src/TNL/Problems/MeanCurvatureFlowProblem_impl.h
+++ b/src/TNL/Problems/MeanCurvatureFlowProblem_impl.h
@@ -30,17 +30,6 @@
 namespace TNL {
 namespace Problems {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename DifferentialOperator >
-String
-MeanCurvatureFlowProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator >::
-getType()
-{
-   return String( "tnlMeanCurvativeFlowProblem< " ) + Mesh :: getType() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/TNL/Problems/PDEProblem.h b/src/TNL/Problems/PDEProblem.h
index 51e56d44a..69d95aaee 100644
--- a/src/TNL/Problems/PDEProblem.h
+++ b/src/TNL/Problems/PDEProblem.h
@@ -50,8 +50,6 @@ class PDEProblem : public Problem< Real, Device, Index >
        * This means that the time stepper will be set from the command line arguments.
        */
       typedef void TimeStepper;
-      
-      static String getType();
 
       String getPrologHeader() const;
 
diff --git a/src/TNL/Problems/PDEProblem_impl.h b/src/TNL/Problems/PDEProblem_impl.h
index ba7612de7..6a3aa63e6 100644
--- a/src/TNL/Problems/PDEProblem_impl.h
+++ b/src/TNL/Problems/PDEProblem_impl.h
@@ -16,22 +16,6 @@
 namespace TNL {
 namespace Problems {
 
-template< typename Mesh,
-          typename Communicator,
-          typename Real,
-          typename Device,
-          typename Index >
-String
-PDEProblem< Mesh, Communicator, Real, Device, Index >::
-getType()
-{
-   return String( "PDEProblem< " ) +
-          Mesh::getType() + ", " +
-          TNL::getType< Real >() + ", " +
-          Device::getType() + ", " +
-          TNL::getType< Index >() + " >";
-}
-
 template< typename Mesh,
           typename Communicator,
           typename Real,
diff --git a/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver.h b/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver.h
index e725eb67f..1ea084f4f 100644
--- a/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver.h
+++ b/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver.h
@@ -34,8 +34,6 @@ class NavierStokesSolver
 
    NavierStokesSolver();
 
-   static String getType();
-
    void setAdvectionScheme( AdvectionSchemeType& advection );
 
    void setDiffusionScheme( DiffusionSchemeType& u1Viscosity,
@@ -148,4 +146,4 @@ class NavierStokesSolver
 
 } // namespace TNL
 
-#include <TNL/Solvers/cfd/navier-stokes/NavierStokesSolver_impl.h>
\ No newline at end of file
+#include <TNL/Solvers/cfd/navier-stokes/NavierStokesSolver_impl.h>
diff --git a/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver_impl.h b/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver_impl.h
index a12ec5cb1..a26693886 100644
--- a/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver_impl.h
+++ b/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver_impl.h
@@ -29,16 +29,6 @@ NavierStokesSolver< AdvectionScheme, DiffusionScheme, BoundaryConditions >::Navi
 {
 }
 
-template< typename AdvectionScheme,
-          typename DiffusionScheme,
-          typename BoundaryConditions >
-String NavierStokesSolver< AdvectionScheme, DiffusionScheme, BoundaryConditions >::getType()
-{
-   return String( "NavierStokesSolver< " ) +
-          AdvectionScheme::getType() + ", " +
-          DiffusionScheme::getType() + " >";
-}
-
 template< typename AdvectionScheme,
           typename DiffusionScheme,
           typename BoundaryConditions >
diff --git a/src/TNL/Solvers/Linear/BICGStab.h b/src/TNL/Solvers/Linear/BICGStab.h
index 686d6f450..2cede824a 100644
--- a/src/TNL/Solvers/Linear/BICGStab.h
+++ b/src/TNL/Solvers/Linear/BICGStab.h
@@ -28,8 +28,6 @@ public:
    using VectorViewType = typename Base::VectorViewType;
    using ConstVectorViewType = typename Base::ConstVectorViewType;
 
-   String getType() const;
-
    static void configSetup( Config::ConfigDescription& config,
                             const String& prefix = "" );
 
diff --git a/src/TNL/Solvers/Linear/BICGStabL.h b/src/TNL/Solvers/Linear/BICGStabL.h
index a35962d54..f2481b588 100644
--- a/src/TNL/Solvers/Linear/BICGStabL.h
+++ b/src/TNL/Solvers/Linear/BICGStabL.h
@@ -65,8 +65,6 @@ public:
    using ConstVectorViewType = typename Base::ConstVectorViewType;
    using VectorType = typename Traits::VectorType;
 
-   String getType() const;
-
    static void configSetup( Config::ConfigDescription& config,
                             const String& prefix = "" );
 
diff --git a/src/TNL/Solvers/Linear/BICGStabL_impl.h b/src/TNL/Solvers/Linear/BICGStabL_impl.h
index 1f20d4a30..3f41e5115 100644
--- a/src/TNL/Solvers/Linear/BICGStabL_impl.h
+++ b/src/TNL/Solvers/Linear/BICGStabL_impl.h
@@ -20,16 +20,6 @@ namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix >
-String
-BICGStabL< Matrix >::
-getType() const
-{
-   return String( "BICGStabL< " ) +
-          this->matrix -> getType() + ", " +
-          this->preconditioner -> getType() + " >";
-}
-
 template< typename Matrix >
 void
 BICGStabL< Matrix >::
diff --git a/src/TNL/Solvers/Linear/BICGStab_impl.h b/src/TNL/Solvers/Linear/BICGStab_impl.h
index 735358622..baa4b6363 100644
--- a/src/TNL/Solvers/Linear/BICGStab_impl.h
+++ b/src/TNL/Solvers/Linear/BICGStab_impl.h
@@ -18,14 +18,6 @@ namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix >
-String BICGStab< Matrix > :: getType() const
-{
-   return String( "BICGStab< " ) +
-          this->matrix -> getType() + ", " +
-          this->preconditioner -> getType() + " >";
-}
-
 template< typename Matrix >
 void
 BICGStab< Matrix >::
diff --git a/src/TNL/Solvers/Linear/CG.h b/src/TNL/Solvers/Linear/CG.h
index b87caf247..375db25cb 100644
--- a/src/TNL/Solvers/Linear/CG.h
+++ b/src/TNL/Solvers/Linear/CG.h
@@ -30,8 +30,6 @@ public:
    using VectorViewType = typename Base::VectorViewType;
    using ConstVectorViewType = typename Base::ConstVectorViewType;
 
-   String getType() const;
-
    bool solve( ConstVectorViewType b, VectorViewType x ) override;
 
 protected:
diff --git a/src/TNL/Solvers/Linear/CG_impl.h b/src/TNL/Solvers/Linear/CG_impl.h
index 07f8ea197..9c1b0458a 100644
--- a/src/TNL/Solvers/Linear/CG_impl.h
+++ b/src/TNL/Solvers/Linear/CG_impl.h
@@ -16,14 +16,6 @@ namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix >
-String CG< Matrix > :: getType() const
-{
-   return String( "CG< " ) +
-          this->matrix -> getType() + ", " +
-          this->preconditioner -> getType() + " >";
-}
-
 template< typename Matrix >
 bool
 CG< Matrix >::
diff --git a/src/TNL/Solvers/Linear/GMRES.h b/src/TNL/Solvers/Linear/GMRES.h
index dd72e2832..e1eb5e1cb 100644
--- a/src/TNL/Solvers/Linear/GMRES.h
+++ b/src/TNL/Solvers/Linear/GMRES.h
@@ -37,8 +37,6 @@ public:
    using ConstVectorViewType = typename Base::ConstVectorViewType;
    using VectorType = typename Traits::VectorType;
 
-   String getType() const;
-
    static void configSetup( Config::ConfigDescription& config,
                             const String& prefix = "" );
 
diff --git a/src/TNL/Solvers/Linear/GMRES_impl.h b/src/TNL/Solvers/Linear/GMRES_impl.h
index 5d7942f84..519fcb9aa 100644
--- a/src/TNL/Solvers/Linear/GMRES_impl.h
+++ b/src/TNL/Solvers/Linear/GMRES_impl.h
@@ -24,16 +24,6 @@ namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix >
-String
-GMRES< Matrix >::
-getType() const
-{
-   return String( "GMRES< " ) +
-          this->matrix -> getType() + ", " +
-          this->preconditioner -> getType() + " >";
-}
-
 template< typename Matrix >
 void
 GMRES< Matrix >::
diff --git a/src/TNL/Solvers/Linear/Jacobi.h b/src/TNL/Solvers/Linear/Jacobi.h
index 528872671..e4e74d5df 100644
--- a/src/TNL/Solvers/Linear/Jacobi.h
+++ b/src/TNL/Solvers/Linear/Jacobi.h
@@ -29,11 +29,6 @@ public:
    using VectorViewType = typename Base::VectorViewType;
    using ConstVectorViewType = typename Base::ConstVectorViewType;
 
-   String getType() const
-   {
-      return String( "Jacobi< " ) + this->matrix->getType() + ", " + this->preconditioner->getType() + " >";
-   }
-
    static void configSetup( Config::ConfigDescription& config,
                             const String& prefix = "" )
    {
diff --git a/src/TNL/Solvers/Linear/Preconditioners/Diagonal.h b/src/TNL/Solvers/Linear/Preconditioners/Diagonal.h
index 25aa1cd7c..f88e315cc 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/Diagonal.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/Diagonal.h
@@ -38,11 +38,6 @@ public:
 
    virtual void solve( ConstVectorViewType b, VectorViewType x ) const override;
 
-   String getType() const
-   {
-      return String( "Diagonal" );
-   }
-
 protected:
    VectorType diagonal;
 };
@@ -67,11 +62,6 @@ public:
 
    virtual void solve( ConstVectorViewType b, VectorViewType x ) const override;
 
-   String getType() const
-   {
-      return String( "Diagonal" );
-   }
-
 protected:
    VectorType diagonal;
 };
diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
index 97bc854ce..8a177df05 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
@@ -37,13 +37,7 @@ class ILU0_impl
 template< typename Matrix >
 class ILU0
 : public ILU0_impl< Matrix, typename Matrix::RealType, typename Matrix::DeviceType, typename Matrix::IndexType >
-{
-public:
-   String getType() const
-   {
-      return String( "ILU0" );
-   }
-};
+{};
 
 template< typename Matrix, typename Real, typename Index >
 class ILU0_impl< Matrix, Real, Devices::Host, Index >
diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILUT.h b/src/TNL/Solvers/Linear/Preconditioners/ILUT.h
index fa7c814fc..cce3dc5c4 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/ILUT.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/ILUT.h
@@ -33,11 +33,6 @@ class ILUT
 : public ILUT_impl< Matrix, typename Matrix::RealType, typename Matrix::DeviceType, typename Matrix::IndexType >
 {
 public:
-   String getType() const
-   {
-      return String( "ILUT" );
-   }
-
    static void configSetup( Config::ConfigDescription& config,
                             const String& prefix = "" )
    {
diff --git a/src/TNL/Solvers/Linear/Preconditioners/Preconditioner.h b/src/TNL/Solvers/Linear/Preconditioners/Preconditioner.h
index 2e70be2b1..67a62e74f 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/Preconditioner.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/Preconditioner.h
@@ -55,11 +55,6 @@ public:
       throw std::logic_error("The solve() method of a dummy preconditioner should not be called.");
    }
 
-   String getType() const
-   {
-      return String( "Preconditioner" );
-   }
-
    virtual ~Preconditioner() {}
 };
 
diff --git a/src/TNL/Solvers/Linear/SOR.h b/src/TNL/Solvers/Linear/SOR.h
index 7e94634cd..0d9aae433 100644
--- a/src/TNL/Solvers/Linear/SOR.h
+++ b/src/TNL/Solvers/Linear/SOR.h
@@ -28,8 +28,6 @@ public:
    using VectorViewType = typename Base::VectorViewType;
    using ConstVectorViewType = typename Base::ConstVectorViewType;
 
-   String getType() const;
-
    static void configSetup( Config::ConfigDescription& config,
                             const String& prefix = "" );
 
diff --git a/src/TNL/Solvers/Linear/SOR_impl.h b/src/TNL/Solvers/Linear/SOR_impl.h
index 648ae8d41..4a7d4fb9d 100644
--- a/src/TNL/Solvers/Linear/SOR_impl.h
+++ b/src/TNL/Solvers/Linear/SOR_impl.h
@@ -17,14 +17,6 @@ namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix >
-String SOR< Matrix > :: getType() const
-{
-   return String( "SOR< " ) +
-          this->matrix -> getType() + ", " +
-          this->preconditioner -> getType() + " >";
-}
-
 template< typename Matrix >
 void
 SOR< Matrix >::
diff --git a/src/TNL/Solvers/Linear/TFQMR.h b/src/TNL/Solvers/Linear/TFQMR.h
index 73d0894aa..2a94f44e7 100644
--- a/src/TNL/Solvers/Linear/TFQMR.h
+++ b/src/TNL/Solvers/Linear/TFQMR.h
@@ -28,8 +28,6 @@ public:
    using VectorViewType = typename Base::VectorViewType;
    using ConstVectorViewType = typename Base::ConstVectorViewType;
 
-   String getType() const;
-
    bool solve( ConstVectorViewType b, VectorViewType x ) override;
 
 protected:
diff --git a/src/TNL/Solvers/Linear/TFQMR_impl.h b/src/TNL/Solvers/Linear/TFQMR_impl.h
index 590aa35af..0ea03e83c 100644
--- a/src/TNL/Solvers/Linear/TFQMR_impl.h
+++ b/src/TNL/Solvers/Linear/TFQMR_impl.h
@@ -18,14 +18,6 @@ namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix >
-String TFQMR< Matrix > :: getType() const
-{
-   return String( "TFQMR< " ) +
-          this->matrix -> getType() + ", " +
-          this->preconditioner -> getType() + " >";
-}
-
 template< typename Matrix >
 bool TFQMR< Matrix >::solve( ConstVectorViewType b, VectorViewType x )
 {
diff --git a/src/TNL/Solvers/Linear/UmfpackWrapper.h b/src/TNL/Solvers/Linear/UmfpackWrapper.h
index 1d4e67ea2..0e2e5d7ac 100644
--- a/src/TNL/Solvers/Linear/UmfpackWrapper.h
+++ b/src/TNL/Solvers/Linear/UmfpackWrapper.h
@@ -81,8 +81,6 @@ public:
    using VectorViewType = typename Base::VectorViewType;
    using ConstVectorViewType = typename Base::ConstVectorViewType;
 
-   String getType() const;
-
    bool solve( ConstVectorViewType b, VectorViewType x ) override;
 };
 
diff --git a/src/TNL/Solvers/ODE/Euler.h b/src/TNL/Solvers/ODE/Euler.h
index 508d77b6a..1fd6ab3c1 100644
--- a/src/TNL/Solvers/ODE/Euler.h
+++ b/src/TNL/Solvers/ODE/Euler.h
@@ -35,8 +35,6 @@ class Euler : public ExplicitSolver< Problem, SolverMonitor >
 
       Euler();
 
-      static String getType();
-
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" );
 
diff --git a/src/TNL/Solvers/ODE/Euler.hpp b/src/TNL/Solvers/ODE/Euler.hpp
index 1cf5001ae..9dc6b6570 100644
--- a/src/TNL/Solvers/ODE/Euler.hpp
+++ b/src/TNL/Solvers/ODE/Euler.hpp
@@ -31,14 +31,6 @@ Euler< Problem, SolverMonitor > :: Euler()
 {
 };
 
-template< typename Problem, typename SolverMonitor >
-String Euler< Problem, SolverMonitor > :: getType()
-{
-   return String( "Euler< " ) +
-          Problem :: getType() +
-          String( " >" );
-};
-
 template< typename Problem, typename SolverMonitor >
 void Euler< Problem, SolverMonitor > :: configSetup( Config::ConfigDescription& config,
                                                const String& prefix )
diff --git a/src/TNL/Solvers/ODE/Merson.h b/src/TNL/Solvers/ODE/Merson.h
index 3ac978178..99ffc2409 100644
--- a/src/TNL/Solvers/ODE/Merson.h
+++ b/src/TNL/Solvers/ODE/Merson.h
@@ -35,8 +35,6 @@ class Merson : public ExplicitSolver< Problem, SolverMonitor >
 
       Merson();
 
-      static String getType();
-
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" );
 
diff --git a/src/TNL/Solvers/ODE/Merson_impl.h b/src/TNL/Solvers/ODE/Merson_impl.h
index 3c88576e9..4c7b21bc9 100644
--- a/src/TNL/Solvers/ODE/Merson_impl.h
+++ b/src/TNL/Solvers/ODE/Merson_impl.h
@@ -40,14 +40,6 @@ Merson< Problem, SolverMonitor >::Merson()
    }
 };
 
-template< typename Problem, typename SolverMonitor >
-String Merson< Problem, SolverMonitor >::getType()
-{
-   return String( "Merson< " ) +
-          Problem::getType() +
-          String( " >" );
-};
-
 template< typename Problem, typename SolverMonitor >
 void Merson< Problem, SolverMonitor >::configSetup( Config::ConfigDescription& config,
                                                 const String& prefix )
diff --git a/src/TNL/Solvers/PDE/ExplicitTimeStepper.h b/src/TNL/Solvers/PDE/ExplicitTimeStepper.h
index 8a5f0db1e..d4f6992b3 100644
--- a/src/TNL/Solvers/PDE/ExplicitTimeStepper.h
+++ b/src/TNL/Solvers/PDE/ExplicitTimeStepper.h
@@ -42,8 +42,6 @@ class ExplicitTimeStepper
 
       static_assert( ProblemType::isTimeDependent(), "The problem is not time dependent." );
 
-      static String getType();
-      
       ExplicitTimeStepper();
 
       static void configSetup( Config::ConfigDescription& config,
diff --git a/src/TNL/Solvers/PDE/ExplicitTimeStepper_impl.h b/src/TNL/Solvers/PDE/ExplicitTimeStepper_impl.h
index 4024ff326..fa2d1f806 100644
--- a/src/TNL/Solvers/PDE/ExplicitTimeStepper_impl.h
+++ b/src/TNL/Solvers/PDE/ExplicitTimeStepper_impl.h
@@ -14,21 +14,8 @@
 
 namespace TNL {
 namespace Solvers {
-namespace PDE {   
+namespace PDE {
 
-template< typename Problem,
-          template < typename OdeProblem, typename SolverMonitor > class OdeSolver >
-String
-ExplicitTimeStepper< Problem, OdeSolver >::
-getType()
-{
-   return String( "ExplicitTimeStepper< " ) +
-          Problem::getType() + ", " +
-          OdeSolverType::getType() + ", " +
-          String( " >" );
-};
-   
-   
 template< typename Problem,
           template < typename OdeProblem, typename SolverMonitor > class OdeSolver >
 ExplicitTimeStepper< Problem, OdeSolver >::
@@ -37,7 +24,7 @@ ExplicitTimeStepper()
   timeStep( 0 ),
   allIterations( 0 )
 {
-};
+}
 
 template< typename Problem,
           template < typename OdeProblem, typename SolverMonitor > class OdeSolver >
diff --git a/src/TNL/String.h b/src/TNL/String.h
index 63924c27f..f35abc377 100644
--- a/src/TNL/String.h
+++ b/src/TNL/String.h
@@ -97,11 +97,6 @@ class String
        */
       using std::string::operator=;
 
-      /**
-       * \brief Returns type of string: \c "String".
-       */
-      static String getType();
-
       /**
        * \brief Returns the number of characters in given string. Equivalent to \ref getSize.
        */
diff --git a/src/TNL/String.hpp b/src/TNL/String.hpp
index f9846fa6d..3c38fe6b0 100644
--- a/src/TNL/String.hpp
+++ b/src/TNL/String.hpp
@@ -19,11 +19,6 @@
 
 namespace TNL {
 
-inline String String::getType()
-{
-   return String( "String" );
-}
-
 inline int String::getLength() const
 {
    return getSize();
diff --git a/src/TNL/TypeInfo.h b/src/TNL/TypeInfo.h
new file mode 100644
index 000000000..61377fbb8
--- /dev/null
+++ b/src/TNL/TypeInfo.h
@@ -0,0 +1,107 @@
+/***************************************************************************
+                          TypeInfo.h  -  description
+                             -------------------
+    begin                : Aug 20, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <typeinfo>
+#include <string>
+
+#if defined( __has_include )
+   #if __has_include(<cxxabi.h>)
+      #define TNL_HAS_CXXABI_H
+   #endif
+#elif defined( __GLIBCXX__ ) || defined( __GLIBCPP__ )
+   #define TNL_HAS_CXXABI_H
+#endif
+
+#if defined( TNL_HAS_CXXABI_H )
+   #include <cxxabi.h>  // abi::__cxa_demangle
+   #include <memory>  // std::unique_ptr
+   #include <cstdlib>  // std::free
+#endif
+
+#include <TNL/TypeTraits.h>
+#include <TNL/String.h>
+
+namespace TNL {
+namespace __getType_impl {
+
+inline std::string
+demangle( const char* name )
+{
+#if defined( TNL_HAS_CXXABI_H )
+   int status = 0;
+   std::size_t size = 0;
+   std::unique_ptr<char[], void (*)(void*)> result(
+      abi::__cxa_demangle( name, NULL, &size, &status ),
+      std::free
+   );
+   if( result.get() )
+      return result.get();
+#endif
+   return name;
+}
+
+} // namespace __getType_impl
+
+/**
+ * \brief Returns a human-readable string representation of given type.
+ *
+ * Note that since we use the \ref typeid operator internally, the top-level
+ * cv-qualifiers are always ignored. See https://stackoverflow.com/a/8889143
+ * for details.
+ */
+template< typename T >
+String getType()
+{
+   return __getType_impl::demangle( typeid(T).name() );
+}
+
+/**
+ * \brief Returns a human-readable string representation of given object's type.
+ *
+ * Note that since we use the \ref typeid operator internally, the top-level
+ * cv-qualifiers are always ignored. See https://stackoverflow.com/a/8889143
+ * for details.
+ */
+template< typename T >
+String getType( T&& obj )
+{
+   return __getType_impl::demangle( typeid(obj).name() );
+}
+
+/**
+ * \brief Returns a string identifying a type for the purpose of serialization.
+ *
+ * By default, this function returns the same string as \ref getType. However,
+ * if a user-defined class has a static \e getSerializationType method, it is
+ * called instead. This is useful for overriding the default \ref typeid name,
+ * which may be necessary e.g. for class templates which should have the same
+ * serialization type for multiple devices.
+ */
+template< typename T,
+          std::enable_if_t< ! HasStaticGetSerializationType< T >::value, bool > = true >
+String getSerializationType()
+{
+   return getType< T >();
+}
+
+/**
+ * \brief Specialization of \ref getSerializationType for types which provide a
+ *        static \e getSerializationType method to override the default behaviour.
+ */
+template< typename T,
+          std::enable_if_t< HasStaticGetSerializationType< T >::value, bool > = true >
+String getSerializationType()
+{
+   return T::getSerializationType();
+}
+
+} // namespace TNL
diff --git a/src/TNL/TypeTraits.h b/src/TNL/TypeTraits.h
index d34f7d39f..d617f2b42 100644
--- a/src/TNL/TypeTraits.h
+++ b/src/TNL/TypeTraits.h
@@ -76,7 +76,6 @@ public:
     static constexpr bool value = type::value;
 };
 
-
 /**
  * \brief Type trait for checking if T has operator[] taking one index argument.
  */
@@ -183,4 +182,31 @@ struct IsViewType
             std::is_same< typename T::ViewType, T >::value >
 {};
 
+/**
+ * \brief Type trait for checking if T has a static getSerializationType method.
+ */
+template< typename T >
+class HasStaticGetSerializationType
+{
+private:
+   template< typename U >
+   static constexpr auto check(U*)
+   -> typename
+      std::enable_if_t<
+         ! std::is_same<
+               decltype( U::getSerializationType() ),
+               void
+            >::value,
+         std::true_type
+      >;
+
+   template< typename >
+   static constexpr std::false_type check(...);
+
+   using type = decltype(check<T>(0));
+
+public:
+    static constexpr bool value = type::value;
+};
+
 } //namespace TNL
diff --git a/src/TNL/param-types.h b/src/TNL/param-types.h
deleted file mode 100644
index 228b74279..000000000
--- a/src/TNL/param-types.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/***************************************************************************
-                          param-types.h  -  description
-                             -------------------
-    begin                : 2009/07/29
-    copyright            : (C) 2009 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <vector>
-#include <type_traits>
-
-#include <TNL/Experimental/Arithmetics/Real.h>
-#include <TNL/String.h>
-
-namespace TNL {
-
-namespace __getType_impl {
-
-template< typename T,
-          bool isEnum = std::is_enum< T >::value >
-struct getTypeHelper
-{
-   static String get() { return T::getType(); }
-};
-
-template<> struct getTypeHelper< void,                 false >{ static String get() { return String( "void" ); }; };
-template<> struct getTypeHelper< bool,                 false >{ static String get() { return String( "bool" ); }; };
-
-template<> struct getTypeHelper< char,                 false >{ static String get() { return String( "char" ); }; };
-template<> struct getTypeHelper< short int,            false >{ static String get() { return String( "short int" ); }; };
-template<> struct getTypeHelper< int,                  false >{ static String get() { return String( "int" ); }; };
-template<> struct getTypeHelper< long int,             false >{ static String get() { return String( "long int" ); }; };
-
-template<> struct getTypeHelper< unsigned char,        false >{ static String get() { return String( "unsigned char" ); }; };
-template<> struct getTypeHelper< unsigned short,       false >{ static String get() { return String( "unsigned short" ); }; };
-template<> struct getTypeHelper< unsigned int,         false >{ static String get() { return String( "unsigned int" ); }; };
-template<> struct getTypeHelper< unsigned long,        false >{ static String get() { return String( "unsigned long" ); }; };
-
-template<> struct getTypeHelper< signed char,          false >{ static String get() { return String( "signed char" ); }; };
-
-template<> struct getTypeHelper< float,                false >{ static String get() { return String( "float" ); }; };
-template<> struct getTypeHelper< double,               false >{ static String get() { return String( "double" ); }; };
-template<> struct getTypeHelper< long double,          false >{ static String get() { return String( "long double" ); }; };
-template<> struct getTypeHelper< tnlFloat,             false >{ static String get() { return String( "tnlFloat" ); }; };
-template<> struct getTypeHelper< tnlDouble,            false >{ static String get() { return String( "tnlDouble" ); }; };
-
-// const specializations
-template<> struct getTypeHelper< const void,           false >{ static String get() { return String( "const void" ); }; };
-template<> struct getTypeHelper< const bool,           false >{ static String get() { return String( "const bool" ); }; };
-
-template<> struct getTypeHelper< const char,           false >{ static String get() { return String( "const char" ); }; };
-template<> struct getTypeHelper< const short int,      false >{ static String get() { return String( "const short int" ); }; };
-template<> struct getTypeHelper< const int,            false >{ static String get() { return String( "const int" ); }; };
-template<> struct getTypeHelper< const long int,       false >{ static String get() { return String( "const long int" ); }; };
-
-template<> struct getTypeHelper< const unsigned char,  false >{ static String get() { return String( "const unsigned char" ); }; };
-template<> struct getTypeHelper< const unsigned short, false >{ static String get() { return String( "const unsigned short" ); }; };
-template<> struct getTypeHelper< const unsigned int,   false >{ static String get() { return String( "const unsigned int" ); }; };
-template<> struct getTypeHelper< const unsigned long,  false >{ static String get() { return String( "const unsigned long" ); }; };
-
-template<> struct getTypeHelper< const signed char,    false >{ static String get() { return String( "const signed char" ); }; };
-
-template<> struct getTypeHelper< const float,          false >{ static String get() { return String( "const float" ); }; };
-template<> struct getTypeHelper< const double,         false >{ static String get() { return String( "const double" ); }; };
-template<> struct getTypeHelper< const long double,    false >{ static String get() { return String( "const long double" ); }; };
-template<> struct getTypeHelper< const tnlFloat,       false >{ static String get() { return String( "const tnlFloat" ); }; };
-template<> struct getTypeHelper< const tnlDouble,      false >{ static String get() { return String( "const tnlDouble" ); }; };
-
-template< typename T >
-struct getTypeHelper< T, true >
-{
-   static String get() { return getTypeHelper< typename std::underlying_type< T >::type, false >::get(); };
-};
-
-// wrappers for STL containers
-template< typename T >
-struct getTypeHelper< std::vector< T >, false >
-{
-   static String get() { return String( "std::vector< " ) + getTypeHelper< T >::get() + " >"; }
-};
-
-} // namespace __getType_impl
-
-template< typename T >
-String getType() { return __getType_impl::getTypeHelper< T >::get(); }
-
-} // namespace TNL
diff --git a/src/Tools/tnl-lattice-init.h b/src/Tools/tnl-lattice-init.h
index 203054f58..71a09636c 100644
--- a/src/Tools/tnl-lattice-init.h
+++ b/src/Tools/tnl-lattice-init.h
@@ -246,9 +246,9 @@ bool resolveProfileReal( const Config::ParameterContainer& parameters )
       std::cerr << "MeshFunction is required in profile file " << profileFile << "." << std::endl;
       return false;
    }
-   if( parsedMeshFunctionType[ 1 ] != ProfileMesh::getType() )
+   if( parsedMeshFunctionType[ 1 ] != getType< ProfileMesh >() )
    {
-      std::cerr << "The mesh function in the profile file must be defined on " << ProfileMesh::getType() 
+      std::cerr << "The mesh function in the profile file must be defined on " << getType< ProfileMesh >()
                 << " but it is defined on " << parsedMeshFunctionType[ 1 ] << "." << std::endl;
       return false;
    }
diff --git a/src/Tools/tnl-quickstart/operator-grid-specialization.h.in b/src/Tools/tnl-quickstart/operator-grid-specialization.h.in
index e67c5e007..89146c200 100644
--- a/src/Tools/tnl-quickstart/operator-grid-specialization.h.in
+++ b/src/Tools/tnl-quickstart/operator-grid-specialization.h.in
@@ -14,8 +14,6 @@ class {operatorName}< TNL::Meshes::Grid< {meshDimension}, MeshReal, Device, Mesh
       typedef TNL::Functions::MeshFunction< MeshType > MeshFunctionType;
       enum {{ Dimension = MeshType::getMeshDimension() }};
 
-      static TNL::String getType();
-
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
       Real operator()( const MeshFunction& u,
diff --git a/src/Tools/tnl-quickstart/operator-grid-specialization_impl.h.in b/src/Tools/tnl-quickstart/operator-grid-specialization_impl.h.in
index da4da6d63..ed00005bc 100644
--- a/src/Tools/tnl-quickstart/operator-grid-specialization_impl.h.in
+++ b/src/Tools/tnl-quickstart/operator-grid-specialization_impl.h.in
@@ -1,21 +1,6 @@
 /****
  * {meshDimension}D problem
  */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-TNL::String
-{operatorName}< TNL::Meshes::Grid< {meshDimension}, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{{
-   return TNL::String( "{operatorName}< " ) +
-          MeshType::getType() + ", " +
-          TNL::getType< Real >() + ", " +
-          TNL::getType< Index >() + " >";
-}}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
diff --git a/src/Tools/tnl-quickstart/problem.h.in b/src/Tools/tnl-quickstart/problem.h.in
index 9006f7cf7..d72120c1f 100644
--- a/src/Tools/tnl-quickstart/problem.h.in
+++ b/src/Tools/tnl-quickstart/problem.h.in
@@ -38,8 +38,6 @@ class {problemBaseName}Problem:
 
       using CommunicatorType = Communicator;
 
-      static TNL::String getTypeStatic();
-
       TNL::String getPrologHeader() const;
 
       void writeProlog( TNL::Logger& logger,
diff --git a/src/Tools/tnl-quickstart/problem_impl.h.in b/src/Tools/tnl-quickstart/problem_impl.h.in
index f196ebcec..3e72e4db1 100644
--- a/src/Tools/tnl-quickstart/problem_impl.h.in
+++ b/src/Tools/tnl-quickstart/problem_impl.h.in
@@ -7,18 +7,6 @@
 #include <TNL/Solvers/PDE/BoundaryConditionsSetter.h>
 #include <TNL/Solvers/PDE/BackwardTimeDiscretisation.h>
 
-template< typename Mesh,
-          typename Communicator,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename DifferentialOperator >
-TNL::String
-{problemBaseName}Problem< Mesh, Communicator, BoundaryCondition, RightHandSide, DifferentialOperator >::
-getTypeStatic()
-{{
-   return TNL::String( "{problemBaseName}Problem< " ) + Mesh :: getTypeStatic() + " >";
-}}
-
 template< typename Mesh,
           typename Communicator,
           typename BoundaryCondition,
diff --git a/src/Tools/tnl-view.h b/src/Tools/tnl-view.h
index cd7cd93b9..7e7b82bbf 100644
--- a/src/Tools/tnl-view.h
+++ b/src/Tools/tnl-view.h
@@ -52,7 +52,7 @@ bool writeMeshFunction( const typename MeshFunction::MeshPointer& meshPointer,
 {
 
    MeshFunction function( meshPointer );
-   std::cout << "Mesh function: " << function.getType() << std::endl;
+   std::cout << "Mesh function: " << getType( function ) << std::endl;
    try
    {
       function.load( inputFileName );
@@ -84,7 +84,7 @@ bool writeVectorField( const typename VectorField::FunctionType::MeshPointer& me
 {
 
    VectorField field( meshPointer );
-   std::cout << "VectorField: " << field.getType() << std::endl;
+   std::cout << "VectorField: " << getType( field ) << std::endl;
    try
    {
       field.load( inputFileName );
diff --git a/src/UnitTests/CMakeLists.txt b/src/UnitTests/CMakeLists.txt
index a9fdeab52..b6f7f383f 100644
--- a/src/UnitTests/CMakeLists.txt
+++ b/src/UnitTests/CMakeLists.txt
@@ -57,6 +57,10 @@ ADD_EXECUTABLE( TimerTest TimerTest.cpp )
 TARGET_COMPILE_OPTIONS( TimerTest PRIVATE ${CXX_TESTS_FLAGS} )
 TARGET_LINK_LIBRARIES( TimerTest ${GTEST_BOTH_LIBRARIES} )
 
+ADD_EXECUTABLE( TypeInfoTest TypeInfoTest.cpp )
+TARGET_COMPILE_OPTIONS( TypeInfoTest PRIVATE ${CXX_TESTS_FLAGS} )
+TARGET_LINK_LIBRARIES( TypeInfoTest ${GTEST_BOTH_LIBRARIES} )
+
 ADD_TEST( AssertTest ${EXECUTABLE_OUTPUT_PATH}/AssertTest${CMAKE_EXECUTABLE_SUFFIX} )
 if( BUILD_CUDA )
    ADD_TEST( AssertCudaTest ${EXECUTABLE_OUTPUT_PATH}/AssertCudaTest${CMAKE_EXECUTABLE_SUFFIX} )
@@ -67,3 +71,4 @@ ADD_TEST( StringTest ${EXECUTABLE_OUTPUT_PATH}/StringTest${CMAKE_EXECUTABLE_SUFF
 ADD_TEST( ObjectTest ${EXECUTABLE_OUTPUT_PATH}/ObjectTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( ParallelForTest ${EXECUTABLE_OUTPUT_PATH}/ParallelForTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( TimerTest ${EXECUTABLE_OUTPUT_PATH}/TimerTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( TypeInfoTest ${EXECUTABLE_OUTPUT_PATH}/TypeInfoTest${CMAKE_EXECUTABLE_SUFFIX} )
diff --git a/src/UnitTests/Containers/ArrayTest.h b/src/UnitTests/Containers/ArrayTest.h
index 7151ed441..25c7fda49 100644
--- a/src/UnitTests/Containers/ArrayTest.h
+++ b/src/UnitTests/Containers/ArrayTest.h
@@ -45,11 +45,6 @@ struct MyData
    // operator used in tests, not necessary for Array to work
    template< typename T >
    bool operator==( T v ) const { return data == v; }
-
-   static String getType()
-   {
-      return String( "MyData" );
-   }
 };
 
 std::ostream& operator<<( std::ostream& str, const MyData& v )
diff --git a/src/UnitTests/Containers/ArrayViewTest.h b/src/UnitTests/Containers/ArrayViewTest.h
index 7f1fb6941..6c8465bd5 100644
--- a/src/UnitTests/Containers/ArrayViewTest.h
+++ b/src/UnitTests/Containers/ArrayViewTest.h
@@ -41,11 +41,6 @@ struct MyData
    // operator used in tests, not necessary for Array to work
    template< typename T >
    bool operator==( T v ) const { return data == v; }
-
-   static String getType()
-   {
-      return String( "MyData" );
-   }
 };
 
 std::ostream& operator<<( std::ostream& str, const MyData& v )
diff --git a/src/UnitTests/Containers/ListTest.cpp b/src/UnitTests/Containers/ListTest.cpp
index f24e650b4..072b75003 100644
--- a/src/UnitTests/Containers/ListTest.cpp
+++ b/src/UnitTests/Containers/ListTest.cpp
@@ -33,11 +33,6 @@ struct MyData
 
    __cuda_callable__
    bool operator!=( const MyData& v ) const { return data != v.data; }
-
-   static String getType()
-   {
-      return String( "MyData" );
-   }
 };
 
 std::ostream& operator<<( std::ostream& str, const MyData& v )
diff --git a/src/UnitTests/Meshes/BoundaryTagsTest.h b/src/UnitTests/Meshes/BoundaryTagsTest.h
index b7eccf5f2..6bc07adab 100644
--- a/src/UnitTests/Meshes/BoundaryTagsTest.h
+++ b/src/UnitTests/Meshes/BoundaryTagsTest.h
@@ -37,7 +37,8 @@ TEST( MeshTest, RegularMeshOfQuadrilateralsTest )
    using VertexMeshEntityType = typename QuadrilateralMeshEntityType::SubentityTraits< 0 >::SubentityType;
 
    using PointType = typename VertexMeshEntityType::PointType;
-   ASSERT_TRUE( PointType::getType() == ( Containers::StaticVector< 2, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value,
+                  "unexpected PointType" );
 
    const IndexType xSize( 3 ), ySize( 4 );
    const RealType width( 1.0 ), height( 1.0 );
diff --git a/src/UnitTests/Meshes/MeshEntityTest.h b/src/UnitTests/Meshes/MeshEntityTest.h
index 5360fd6a8..235150d9d 100644
--- a/src/UnitTests/Meshes/MeshEntityTest.h
+++ b/src/UnitTests/Meshes/MeshEntityTest.h
@@ -114,7 +114,8 @@ TEST( MeshEntityTest, VertexMeshEntityTest )
    using VertexMeshEntityType = TestMeshEntity< TestEdgeMeshConfig, typename EdgeMeshEntityType::SubentityTraits< 0 >::SubentityTopology >;
 
    using PointType = typename VertexMeshEntityType::PointType;
-   EXPECT_EQ( PointType::getType(),  ( Containers::StaticVector< 2, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value,
+                  "unexpected PointType" );
 
    VertexMeshEntityType vertexEntity;
    PointType point;
@@ -131,7 +132,8 @@ TEST( MeshEntityTest, EdgeMeshEntityTest )
    static_assert( EdgeMeshEntityType::SubentityTraits< 0 >::storageEnabled, "Testing edge entity does not store vertices as required." );
 
    using PointType = typename VertexMeshEntityType::PointType;
-   EXPECT_EQ( PointType::getType(),  ( Containers::StaticVector< 2, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value,
+                  "unexpected PointType" );
 
    /****
     *
@@ -211,7 +213,8 @@ TEST( MeshEntityTest, TriangleMeshEntityTest )
    static_assert( EdgeMeshEntityType::SubentityTraits< 0 >::storageEnabled, "Testing edge entity does not store vertices as required." );
 
    using PointType = typename VertexMeshEntityType::PointType;
-   EXPECT_EQ( PointType::getType(), ( Containers::StaticVector< 2, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value,
+                  "unexpected PointType" );
 
    /****
     * We set-up the same situation as in the test above
@@ -293,7 +296,8 @@ TEST( MeshEntityTest, TetrahedronMeshEntityTest )
    static_assert( EdgeMeshEntityType::SubentityTraits< 0 >::storageEnabled, "Testing edge entity does not store vertices as required." );
 
    using PointType = typename VertexMeshEntityType::PointType;
-   EXPECT_EQ( PointType::getType(),  ( Containers::StaticVector< 3, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 3, RealType > >::value,
+                  "unexpected PointType" );
 
    /****
     * We set-up similar situation as above but with
@@ -457,7 +461,8 @@ TEST( MeshEntityTest, TwoTrianglesMeshEntityTest )
    static_assert( VertexMeshEntityType::SuperentityTraits< 1 >::storageEnabled, "Testing vertex entity does not store edges as required." );
 
    using PointType = typename VertexMeshEntityType::PointType;
-   EXPECT_EQ( PointType::getType(),  ( Containers::StaticVector< 2, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value,
+                  "unexpected PointType" );
 
    /****
     * We set-up the following situation
@@ -647,7 +652,8 @@ TEST( MeshEntityTest, OneTriangleComparisonTest )
    static_assert( VertexMeshEntityType::SuperentityTraits< 1 >::storageEnabled, "Testing vertex entity does not store edges as required." );
 
    using PointType = typename VertexMeshEntityType::PointType;
-   EXPECT_EQ( PointType::getType(),  ( Containers::StaticVector< 2, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value,
+                  "unexpected PointType" );
 
    PointType point0( 0.0, 0.0 ),
              point1( 1.0, 0.0 ),
diff --git a/src/UnitTests/Meshes/MeshTest.h b/src/UnitTests/Meshes/MeshTest.h
index 352a2d791..5c95221ed 100644
--- a/src/UnitTests/Meshes/MeshTest.h
+++ b/src/UnitTests/Meshes/MeshTest.h
@@ -180,7 +180,8 @@ TEST( MeshTest, TwoTrianglesTest )
    static_assert( VertexMeshEntityType::SuperentityTraits< 1 >::storageEnabled, "Testing vertex entity does not store edges as required." );
 
    using PointType = typename VertexMeshEntityType::PointType;
-   ASSERT_TRUE( PointType::getType() == ( Containers::StaticVector< 2, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value,
+                  "unexpected PointType" );
 
    /****
     * We set-up the following situation
@@ -289,7 +290,8 @@ TEST( MeshTest, TetrahedronsTest )
    using VertexMeshEntityType = typename TetrahedronMeshEntityType::SubentityTraits< 0 >::SubentityType;
 
    using PointType = typename VertexMeshEntityType::PointType;
-   ASSERT_TRUE( PointType::getType() == ( Containers::StaticVector< 3, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 3, RealType > >::value,
+                  "unexpected PointType" );
 
    typedef Mesh< TestTetrahedronMeshConfig > TestTetrahedronMesh;
    TestTetrahedronMesh mesh;
@@ -454,7 +456,8 @@ TEST( MeshTest, RegularMeshOfTrianglesTest )
    using VertexMeshEntityType = typename TriangleMeshEntityType::SubentityTraits< 0 >::SubentityType;
 
    using PointType = typename VertexMeshEntityType::PointType;
-   ASSERT_TRUE( PointType::getType() == ( Containers::StaticVector< 2, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value,
+                  "unexpected PointType" );
 
    const IndexType xSize( 5 ), ySize( 5 );
    const RealType width( 1.0 ), height( 1.0 );
@@ -554,7 +557,8 @@ TEST( MeshTest, RegularMeshOfQuadrilateralsTest )
    using VertexMeshEntityType = typename QuadrilateralMeshEntityType::SubentityTraits< 0 >::SubentityType;
 
    using PointType = typename VertexMeshEntityType::PointType;
-   ASSERT_TRUE( PointType::getType() == ( Containers::StaticVector< 2, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value,
+                  "unexpected PointType" );
 
    const IndexType xSize( 3 ), ySize( 4 );
    const RealType width( 1.0 ), height( 1.0 );
@@ -652,7 +656,8 @@ TEST( MeshTest, RegularMeshOfHexahedronsTest )
    using VertexMeshEntityType = typename HexahedronMeshEntityType::SubentityTraits< 0 >::SubentityType;
 
    using PointType = typename VertexMeshEntityType::PointType;
-   ASSERT_TRUE( PointType::getType() == ( Containers::StaticVector< 3, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 3, RealType > >::value,
+                  "unexpected PointType" );
 
    const IndexType xSize( 3 ), ySize( 4 ), zSize( 5 );
    const RealType width( 1.0 ), height( 1.0 ), depth( 1.0 );
diff --git a/src/UnitTests/TypeInfoTest.cpp b/src/UnitTests/TypeInfoTest.cpp
new file mode 100644
index 000000000..c47cdffc8
--- /dev/null
+++ b/src/UnitTests/TypeInfoTest.cpp
@@ -0,0 +1,165 @@
+/***************************************************************************
+                          TypeInfoTest.cpp  -  description
+                             -------------------
+    begin                : Aug 20, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/TypeInfo.h>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+#endif
+
+using namespace TNL;
+
+#ifdef HAVE_GTEST
+
+enum MyEnumType { foo, bar };
+enum class MyEnumClass { foo, bar };
+
+class MyClass {};
+class MyClassWithGetSerializationType
+{
+public:
+   static std::string getSerializationType() { return "SomethingElse"; }
+};
+
+template< typename... >
+class MyClassTemplate {};
+
+class MyPolymorphicBase
+{
+public:
+   virtual ~MyPolymorphicBase() {}
+};
+class MyPolymorphicDerived : public MyPolymorphicBase
+{
+public:
+   virtual ~MyPolymorphicDerived() {}
+};
+
+
+TEST( TypeInfoTest, getType )
+{
+   // non-const variants
+   EXPECT_EQ( getType< void >(), std::string( "void" ) );
+   EXPECT_EQ( getType< bool >(), std::string( "bool" ) );
+
+   EXPECT_EQ( getType< char >(), std::string( "char" ) );
+   EXPECT_EQ( getType< short >(), std::string( "short" ) );
+   EXPECT_EQ( getType< int >(), std::string( "int" ) );
+   EXPECT_EQ( getType< long >(), std::string( "long" ) );
+
+   EXPECT_EQ( getType< unsigned char >(), std::string( "unsigned char" ) );
+   EXPECT_EQ( getType< unsigned short >(), std::string( "unsigned short" ) );
+   EXPECT_EQ( getType< unsigned int >(), std::string( "unsigned int" ) );
+   EXPECT_EQ( getType< unsigned long >(), std::string( "unsigned long" ) );
+
+   EXPECT_EQ( getType< signed char >(), std::string( "signed char" ) );
+
+   EXPECT_EQ( getType< float >(), std::string( "float" ) );
+   EXPECT_EQ( getType< double >(), std::string( "double" ) );
+   EXPECT_EQ( getType< long double >(), std::string( "long double" ) );
+
+   // const variants - top-level cv-qualifiers are ignored
+   EXPECT_EQ( getType< const void >(), std::string( "void" ) );
+   EXPECT_EQ( getType< const bool >(), std::string( "bool" ) );
+
+   EXPECT_EQ( getType< const char >(), std::string( "char" ) );
+   EXPECT_EQ( getType< const short >(), std::string( "short" ) );
+   EXPECT_EQ( getType< const int >(), std::string( "int" ) );
+   EXPECT_EQ( getType< const long >(), std::string( "long" ) );
+
+   EXPECT_EQ( getType< const unsigned char >(), std::string( "unsigned char" ) );
+   EXPECT_EQ( getType< const unsigned short >(), std::string( "unsigned short" ) );
+   EXPECT_EQ( getType< const unsigned int >(), std::string( "unsigned int" ) );
+   EXPECT_EQ( getType< const unsigned long >(), std::string( "unsigned long" ) );
+
+   EXPECT_EQ( getType< const signed char >(), std::string( "signed char" ) );
+
+   EXPECT_EQ( getType< const float >(), std::string( "float" ) );
+   EXPECT_EQ( getType< const double >(), std::string( "double" ) );
+   EXPECT_EQ( getType< const long double >(), std::string( "long double" ) );
+
+   // enum types
+   EXPECT_EQ( getType< MyEnumType >(), std::string( "MyEnumType" ) );
+   EXPECT_EQ( getType< MyEnumClass >(), std::string( "MyEnumClass" ) );
+
+   // classes
+   EXPECT_EQ( getType< MyClass >(), std::string( "MyClass" ) );
+   EXPECT_EQ( getType< MyClassWithGetSerializationType >(), std::string( "MyClassWithGetSerializationType" ) );
+
+   // class templates
+   using T1 = MyClassTemplate< int, MyClassTemplate< int, int >, MyClass >;
+   EXPECT_EQ( getType< T1 >(), std::string( "MyClassTemplate<int, MyClassTemplate<int, int>, MyClass>" ) );
+
+   // polymorphic base
+   MyPolymorphicDerived obj;
+   MyPolymorphicBase* ptr = &obj;
+   // no dynamic cast for pointer types
+   EXPECT_EQ( getType( ptr ), std::string( "MyPolymorphicBase*" ) );
+   // reference to a polymorphic object gets dynamic cast
+   EXPECT_EQ( getType( *ptr ), std::string( "MyPolymorphicDerived" ) );
+}
+
+TEST( TypeInfoTest, getSerializationType )
+{
+   // non-const variants
+   EXPECT_EQ( getSerializationType< void >(), std::string( "void" ) );
+   EXPECT_EQ( getSerializationType< bool >(), std::string( "bool" ) );
+
+   EXPECT_EQ( getSerializationType< char >(), std::string( "char" ) );
+   EXPECT_EQ( getSerializationType< short >(), std::string( "short" ) );
+   EXPECT_EQ( getSerializationType< int >(), std::string( "int" ) );
+   EXPECT_EQ( getSerializationType< long >(), std::string( "long" ) );
+
+   EXPECT_EQ( getSerializationType< unsigned char >(), std::string( "unsigned char" ) );
+   EXPECT_EQ( getSerializationType< unsigned short >(), std::string( "unsigned short" ) );
+   EXPECT_EQ( getSerializationType< unsigned int >(), std::string( "unsigned int" ) );
+   EXPECT_EQ( getSerializationType< unsigned long >(), std::string( "unsigned long" ) );
+
+   EXPECT_EQ( getSerializationType< signed char >(), std::string( "signed char" ) );
+
+   EXPECT_EQ( getSerializationType< float >(), std::string( "float" ) );
+   EXPECT_EQ( getSerializationType< double >(), std::string( "double" ) );
+   EXPECT_EQ( getSerializationType< long double >(), std::string( "long double" ) );
+
+   // const variants - top-level cv-qualifiers are ignored
+   EXPECT_EQ( getSerializationType< const void >(), std::string( "void" ) );
+   EXPECT_EQ( getSerializationType< const bool >(), std::string( "bool" ) );
+
+   EXPECT_EQ( getSerializationType< const char >(), std::string( "char" ) );
+   EXPECT_EQ( getSerializationType< const short >(), std::string( "short" ) );
+   EXPECT_EQ( getSerializationType< const int >(), std::string( "int" ) );
+   EXPECT_EQ( getSerializationType< const long >(), std::string( "long" ) );
+
+   EXPECT_EQ( getSerializationType< const unsigned char >(), std::string( "unsigned char" ) );
+   EXPECT_EQ( getSerializationType< const unsigned short >(), std::string( "unsigned short" ) );
+   EXPECT_EQ( getSerializationType< const unsigned int >(), std::string( "unsigned int" ) );
+   EXPECT_EQ( getSerializationType< const unsigned long >(), std::string( "unsigned long" ) );
+
+   EXPECT_EQ( getSerializationType< const signed char >(), std::string( "signed char" ) );
+
+   EXPECT_EQ( getSerializationType< const float >(), std::string( "float" ) );
+   EXPECT_EQ( getSerializationType< const double >(), std::string( "double" ) );
+   EXPECT_EQ( getSerializationType< const long double >(), std::string( "long double" ) );
+
+   // enum types
+   EXPECT_EQ( getSerializationType< MyEnumType >(), std::string( "MyEnumType" ) );
+   EXPECT_EQ( getSerializationType< MyEnumClass >(), std::string( "MyEnumClass" ) );
+
+   // classes
+   EXPECT_EQ( getSerializationType< MyClass >(), std::string( "MyClass" ) );
+   EXPECT_EQ( getSerializationType< MyClassWithGetSerializationType >(), std::string( "SomethingElse" ) );
+
+   // class templates
+   using T1 = MyClassTemplate< int, MyClassTemplate< int, int >, MyClass >;
+   EXPECT_EQ( getSerializationType< T1 >(), "MyClassTemplate<int, MyClassTemplate<int, int>, MyClass>" );
+}
+#endif
+
+#include "main.h"
-- 
GitLab


From 39dadccb76366f194dd7d42ea73aaa44446d10fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Thu, 22 Aug 2019 12:57:25 +0200
Subject: [PATCH 07/35] Fixed parseCommandLine after refactoring the getType
 function

---
 src/TNL/Config/ConfigDescription.h  | 26 ++++++++++++++-----------
 src/TNL/Config/ParameterContainer.h |  4 ++--
 src/TNL/Config/parseCommandLine.h   | 30 +++++++++++++++++++----------
 3 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/src/TNL/Config/ConfigDescription.h b/src/TNL/Config/ConfigDescription.h
index febad283c..648db1d44 100644
--- a/src/TNL/Config/ConfigDescription.h
+++ b/src/TNL/Config/ConfigDescription.h
@@ -142,7 +142,7 @@ public:
       TNL_ASSERT_TRUE( this->currentEntry, "there is no current entry" );
       if( isCurrentEntryList ) {
          ConfigEntryList< EntryType >& entry = dynamic_cast< ConfigEntryList< EntryType >& >( *currentEntry );
-         entry.getEnumValues().push_back( entryEnum );         
+         entry.getEnumValues().push_back( entryEnum );
       }
       else {
          ConfigEntry< EntryType >& entry = dynamic_cast< ConfigEntry< EntryType >& >( *currentEntry );
@@ -216,7 +216,7 @@ public:
       std::cerr << "Asking for the default value of unknown parameter." << std::endl;
       return nullptr;
    }
- 
+
    //! Returns zero pointer if there is no default value
    template< class T >
    T* getDefaultValue( const String& name )
@@ -254,55 +254,59 @@ public:
          if( entries[ i ]->hasDefaultValue &&
              ! parameter_container.checkParameter( entry_name ) )
          {
-            if( entries[ i ]->getEntryType() == "String" )
+            if( entries[ i ]->getEntryType() == "TNL::String" )
             {
                ConfigEntry< String >& entry = dynamic_cast< ConfigEntry< String >& >( *entries[ i ] );
                parameter_container.addParameter< String >( entry_name, entry.defaultValue );
                continue;
             }
-            if( entries[ i ]->getEntryType() == "bool" )
+            else if( entries[ i ]->getEntryType() == "bool" )
             {
                ConfigEntry< bool >& entry = dynamic_cast< ConfigEntry< bool >& >( *entries[ i ] );
                parameter_container.addParameter< bool >( entry_name, entry.defaultValue );
                continue;
             }
-            if( entries[ i ]->getEntryType() == "int" )
+            else if( entries[ i ]->getEntryType() == "int" )
             {
                ConfigEntry< int >& entry = dynamic_cast< ConfigEntry< int >& >( *entries[ i ] );
                parameter_container.addParameter< int >( entry_name, entry.defaultValue );
                continue;
             }
-            if( entries[ i ]->getEntryType() == "double" )
+            else if( entries[ i ]->getEntryType() == "double" )
             {
                ConfigEntry< double >& entry = dynamic_cast< ConfigEntry< double >& >( *entries[ i ] );
                parameter_container.addParameter< double >( entry_name, entry.defaultValue );
                continue;
             }
-            
-            if( entries[ i ]->getEntryType() == "ConfigEntryList< String >" )
+            else if( entries[ i ]->getEntryType() == "ConfigEntryList< TNL::String >" )
             {
                ConfigEntryList< String >& entry = dynamic_cast< ConfigEntryList< String >& >( *entries[ i ] );
                parameter_container.addList< String >( entry_name, entry.defaultValue );
                continue;
             }
-            if( entries[ i ]->getEntryType() == "ConfigEntryList< bool >" )
+            else if( entries[ i ]->getEntryType() == "ConfigEntryList< bool >" )
             {
                ConfigEntryList< bool >& entry = dynamic_cast< ConfigEntryList< bool >& >( *entries[ i ] );
                parameter_container.addList< bool >( entry_name, entry.defaultValue );
                continue;
             }
-            if( entries[ i ]->getEntryType() == "ConfigEntryList< int >" )
+            else if( entries[ i ]->getEntryType() == "ConfigEntryList< int >" )
             {
                ConfigEntryList< int >& entry = dynamic_cast< ConfigEntryList< int >& >( *entries[ i ] );
                parameter_container.addList< int >( entry_name, entry.defaultValue );
                continue;
             }
-            if( entries[ i ]->getEntryType() == "ConfigEntryList< double >" )
+            else if( entries[ i ]->getEntryType() == "ConfigEntryList< double >" )
             {
                ConfigEntryList< double >& entry = dynamic_cast< ConfigEntryList< double >& >( *entries[ i ] );
                parameter_container.addList< double >( entry_name, entry.defaultValue );
                continue;
             }
+            else
+            {
+               throw std::runtime_error( "Method ConfigDescription::addMissingEntries encountered "
+                                         "unsupported entry type: " + entries[ i ]->getEntryType() );
+            }
          }
       }
    }
diff --git a/src/TNL/Config/ParameterContainer.h b/src/TNL/Config/ParameterContainer.h
index cceaf2afb..734db27f5 100644
--- a/src/TNL/Config/ParameterContainer.h
+++ b/src/TNL/Config/ParameterContainer.h
@@ -63,7 +63,7 @@ public:
       parameters.push_back( std::make_unique< Parameter< T > >( name, TNL::getType< T >(), value ) );
       return true;
    }
-   
+
    /**
     * \brief Adds new parameter to the ParameterContainer.
     *
@@ -73,7 +73,7 @@ public:
     */
    template< class T >
    bool addList( const String& name,
-                      const T& value )
+                 const T& value )
    {
       std::vector< T > v;
       v.push_back( value );
diff --git a/src/TNL/Config/parseCommandLine.h b/src/TNL/Config/parseCommandLine.h
index 3e2849290..8993de027 100644
--- a/src/TNL/Config/parseCommandLine.h
+++ b/src/TNL/Config/parseCommandLine.h
@@ -50,7 +50,7 @@ parseCommandLine( int argc, char* argv[],
 
    int i;
    bool parse_error( false );
-   for( i = 1; i < argc; i ++ )
+   for( i = 1; i < argc; i++ )
    {
       const char* _option = argv[ i ];
       if( _option[ 0 ] != '-' )
@@ -74,7 +74,7 @@ parseCommandLine( int argc, char* argv[],
       else
       {
          const String& entryType = entry->getEntryType();
-         const char* value = argv[ ++ i ];
+         const char* value = argv[ ++i ];
          if( ! value )
          {
             std::cerr << "Missing value for the parameter " << option << "." << std::endl;
@@ -96,11 +96,11 @@ parseCommandLine( int argc, char* argv[],
             while( i < argc && ( ( argv[ i ] )[ 0 ] != '-' || ( atof( argv[ i ] ) < 0.0 && ( parsedEntryType[ 1 ] == "int" || parsedEntryType[ 1 ] == "double" ) ) ) )
             {
                const char* value = argv[ i ++ ];
-               if( parsedEntryType[ 1 ] == "String" )
+               if( parsedEntryType[ 1 ] == "TNL::String" )
                {
                   string_list.push_back( String( value ) );
                }
-               if( parsedEntryType[ 1 ] == "bool" )
+               else if( parsedEntryType[ 1 ] == "bool" )
                {
                   const int v = matob( value );
                   if( v == -1 )
@@ -110,14 +110,19 @@ parseCommandLine( int argc, char* argv[],
                   }
                   else bool_list.push_back( v );
                }
-               if( parsedEntryType[ 1 ] == "int" )
+               else if( parsedEntryType[ 1 ] == "int" )
                {
                   integer_list.push_back( atoi( value ) );
                }
-               if( parsedEntryType[ 1 ] == "double" )
+               else if( parsedEntryType[ 1 ] == "double" )
                {
                   real_list.push_back( atof( value ) );
                }
+               else
+               {
+                  // this will not happen if all entry types are handled above
+                  throw std::runtime_error( "Function parseCommandLine encountered unsupported entry type: " + entryType );
+               }
             }
             if( string_list.size() )
                parameters.addParameter< std::vector< String > >( option, string_list );
@@ -132,14 +137,14 @@ parseCommandLine( int argc, char* argv[],
          }
          else
          {
-            if( parsedEntryType[ 0 ] == "String" )
+            if( parsedEntryType[ 0 ] == "TNL::String" )
             {
                if( ! ( ( ConfigEntry< String >* ) entry )->checkValue( value ) )
                   return false;
                 parameters.addParameter< String >( option, value );
                 continue;
             }
-            if( parsedEntryType[ 0 ] == "bool" )
+            else if( parsedEntryType[ 0 ] == "bool" )
             {
                const int v = matob( value );
                if( v == -1 )
@@ -150,7 +155,7 @@ parseCommandLine( int argc, char* argv[],
                else parameters.addParameter< bool >( option, v );
                continue;
             }
-            if( parsedEntryType[ 0 ] == "int" )
+            else if( parsedEntryType[ 0 ] == "int" )
             {
                /*if( ! std::isdigit( value ) ) //TODO: Check for real number
                {
@@ -162,7 +167,7 @@ parseCommandLine( int argc, char* argv[],
                   return false;
                parameters.addParameter< int >( option, atoi( value ) );
             }
-            if( parsedEntryType[ 0 ] == "double" )
+            else if( parsedEntryType[ 0 ] == "double" )
             {
                /*if( ! std::isdigit( value ) )  //TODO: Check for real number
                {
@@ -174,6 +179,11 @@ parseCommandLine( int argc, char* argv[],
                   return false;
                parameters.addParameter< double >( option, atof( value ) );
             }
+            else
+            {
+               // this will not happen if all entry types are handled above
+               throw std::runtime_error( "Function parseCommandLine encountered unsupported entry type: " + entryType );
+            }
          }
       }
    }
-- 
GitLab


From fed5d45ca61dc013d570845412096454120428a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Wed, 21 Aug 2019 12:49:36 +0200
Subject: [PATCH 08/35] Added default stream synchronizations after kernel
 launches in CudaReductionKernel.h

---
 src/TNL/Containers/Algorithms/CudaReductionKernel.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/TNL/Containers/Algorithms/CudaReductionKernel.h b/src/TNL/Containers/Algorithms/CudaReductionKernel.h
index 82b030e1a..36bd5c88b 100644
--- a/src/TNL/Containers/Algorithms/CudaReductionKernel.h
+++ b/src/TNL/Containers/Algorithms/CudaReductionKernel.h
@@ -473,6 +473,7 @@ struct CudaReductionKernelLauncher
             default:
                TNL_ASSERT( false, std::cerr << "Block size is " << blockSize. x << " which is none of 1, 2, 4, 8, 16, 32, 64, 128, 256 or 512." );
          }
+         cudaStreamSynchronize(0);
          TNL_CHECK_CUDA_DEVICE;
 */
 
@@ -482,6 +483,8 @@ struct CudaReductionKernelLauncher
 
             CudaReductionKernel< Reduction_maxThreadsPerBlock >
             <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output);
+            cudaStreamSynchronize(0);
+            TNL_CHECK_CUDA_DEVICE;
          }
          else {
             TNL_ASSERT( false, std::cerr << "Block size was expected to be " << Reduction_maxThreadsPerBlock << ", but " << blockSize.x << " was specified." << std::endl; );
@@ -578,6 +581,7 @@ struct CudaReductionKernelLauncher
             default:
                TNL_ASSERT( false, std::cerr << "Block size is " << blockSize. x << " which is none of 1, 2, 4, 8, 16, 32, 64, 128, 256 or 512." );
          }
+         cudaStreamSynchronize(0);
          TNL_CHECK_CUDA_DEVICE;
 */
 
@@ -587,6 +591,8 @@ struct CudaReductionKernelLauncher
 
             CudaReductionWithArgumentKernel< Reduction_maxThreadsPerBlock >
             <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput );
+            cudaStreamSynchronize(0);
+            TNL_CHECK_CUDA_DEVICE;
          }
          else {
             TNL_ASSERT( false, std::cerr << "Block size was expected to be " << Reduction_maxThreadsPerBlock << ", but " << blockSize.x << " was specified." << std::endl; );
-- 
GitLab


From 2d5176fbd499dc84110e7105ed3fec69caa0e869 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Wed, 21 Aug 2019 16:30:14 +0200
Subject: [PATCH 09/35] Moved (most of) static methods from TNL::Devices::Cuda
 as free functions into separate namespace TNL::Cuda

The class TNL::Devices::Cuda was too bloated, breaking the Single
Responsibility Principle. It should be used only for template
specializations and other things common to all devices.

The functions in MemoryHelpers.h are deprecated, smart pointers should
be used instead.

The functions in LaunchHelpers.h are temporary, more refactoring is
needed with respect to execution policies and custom launch parameters.
---
 src/Benchmarks/BLAS/spmv.h                    |   8 +-
 src/Benchmarks/Benchmarks.h                   |  20 +-
 .../HeatEquationBenchmarkProblem_impl.h       |  30 +-
 .../HeatEquation/Tuning/GridTraverser.h       |   2 +-
 .../HeatEquation/Tuning/GridTraverser_impl.h  |  22 +-
 src/Benchmarks/ODESolvers/Euler.hpp           |  10 +-
 src/Benchmarks/ODESolvers/Merson.hpp          |  24 +-
 .../large-meshfunction-example.h              |   5 +-
 src/TNL/Allocators/Cuda.h                     |   4 +-
 src/TNL/Allocators/CudaHost.h                 |   4 +-
 src/TNL/Allocators/CudaManaged.h              |   4 +-
 src/TNL/Assert.h                              |   2 +-
 src/TNL/Communicators/MpiCommunicator.h       |   2 +-
 .../Algorithms/CudaMultireductionKernel.h     |  17 +-
 .../Algorithms/CudaReductionBuffer.h          |   2 +-
 .../Algorithms/CudaReductionKernel.h          |  19 +-
 .../Containers/Algorithms/CudaScanKernel.h    |  54 ++--
 .../Multimaps/EllpackIndexMultimapValues.h    |   2 +-
 .../StaticEllpackIndexMultimapValues.h        |   2 +-
 src/TNL/Containers/ndarray/SizesHolder.h      |   2 +-
 src/TNL/Cuda/CheckDevice.h                    |  40 +++
 src/TNL/{Devices => Cuda}/CudaCallable.h      |   3 -
 src/TNL/Cuda/DeviceInfo.h                     |  52 ++++
 .../DeviceInfo.hpp}                           |  52 ++--
 src/TNL/Cuda/LaunchHelpers.h                  | 162 +++++++++++
 src/TNL/Cuda/MemoryHelpers.h                  | 103 +++++++
 .../SharedMemory.h}                           |  38 ++-
 .../{CudaStreamPool.h => Cuda/StreamPool.h}   |  23 +-
 src/TNL/Devices/Cuda.h                        | 137 +--------
 src/TNL/Devices/CudaDeviceInfo.h              |  56 ----
 src/TNL/Devices/Cuda_impl.h                   | 268 +-----------------
 .../tnlDirectEikonalMethodBase1D_impl.h       |   2 +-
 .../tnlDirectEikonalMethodBase2D_impl.h       |   4 +-
 .../tnlDirectEikonalMethodBase3D_impl.h       |   6 +-
 .../tnlFastSweepingMethod1D_impl.h            |   2 +-
 .../tnlFastSweepingMethod2D_impl.h            |   4 +-
 .../tnlFastSweepingMethod3D_impl.h            |   6 +-
 src/TNL/File.hpp                              |   1 +
 src/TNL/Functions/FunctionAdapter.h           |   2 +-
 src/TNL/Functions/OperatorFunction.h          |   2 +-
 src/TNL/Functions/TestFunction_impl.h         |  12 +-
 src/TNL/Logger_impl.h                         |  20 +-
 src/TNL/Math.h                                |   2 +-
 src/TNL/Matrices/AdEllpack_impl.h             |  86 +++---
 src/TNL/Matrices/BiEllpackSymmetric_impl.h    |  54 ++--
 src/TNL/Matrices/BiEllpack_impl.h             |  54 ++--
 src/TNL/Matrices/CSR_impl.h                   |  34 +--
 src/TNL/Matrices/ChunkedEllpack_impl.h        |  22 +-
 src/TNL/Matrices/Dense_impl.h                 |  50 ++--
 src/TNL/Matrices/EllpackSymmetricGraph_impl.h |  22 +-
 src/TNL/Matrices/EllpackSymmetric_impl.h      |  22 +-
 src/TNL/Matrices/Ellpack_impl.h               |  24 +-
 src/TNL/Matrices/MatrixOperations.h           |  14 +-
 src/TNL/Matrices/Matrix_impl.h                |  23 +-
 .../SlicedEllpackSymmetricGraph_impl.h        |  32 +--
 .../Matrices/SlicedEllpackSymmetric_impl.h    |  32 +--
 src/TNL/Matrices/SlicedEllpack_impl.h         |  32 +--
 src/TNL/Matrices/SparseOperations_impl.h      |   4 +-
 src/TNL/Matrices/SparseRow.h                  |   2 +-
 src/TNL/Matrices/Tridiagonal_impl.h           |  16 +-
 src/TNL/Meshes/Geometry/getEntityCenter.h     |   2 +-
 src/TNL/Meshes/Geometry/getEntityMeasure.h    |   2 +-
 src/TNL/Meshes/GridDetails/GridTraverser.h    |   1 -
 .../Meshes/GridDetails/GridTraverser_1D.hpp   |  14 +-
 .../Meshes/GridDetails/GridTraverser_2D.hpp   |  44 +--
 .../Meshes/GridDetails/GridTraverser_3D.hpp   |  46 +--
 .../GridDetails/NeighborGridEntitiesStorage.h |   2 +-
 .../GridDetails/NeighborGridEntityGetter.h    |   2 +-
 src/TNL/Meshes/MeshDetails/MeshEntityIndex.h  |   2 +-
 src/TNL/Meshes/MeshDetails/Traverser_impl.h   |  14 +-
 src/TNL/Object.h                              |   1 -
 src/TNL/ParallelFor.h                         |  20 +-
 src/TNL/Pointers/DevicePointer.h              |   5 +-
 src/TNL/Pointers/SharedPointerCuda.h          |   5 +-
 src/TNL/Pointers/SharedPointerHost.h          |   2 +-
 src/TNL/Pointers/UniquePointer.h              |   5 +-
 .../Solvers/PDE/BoundaryConditionsSetter.h    |   2 +-
 src/TNL/StaticFor.h                           |   2 +-
 src/TNL/TemplateStaticFor.h                   |   2 +-
 src/UnitTests/AssertCudaTest.cu               |   2 +-
 src/UnitTests/Containers/ArrayTest.h          |   7 +-
 81 files changed, 940 insertions(+), 997 deletions(-)
 create mode 100644 src/TNL/Cuda/CheckDevice.h
 rename src/TNL/{Devices => Cuda}/CudaCallable.h (88%)
 create mode 100644 src/TNL/Cuda/DeviceInfo.h
 rename src/TNL/{Devices/CudaDeviceInfo_impl.h => Cuda/DeviceInfo.hpp} (86%)
 create mode 100644 src/TNL/Cuda/LaunchHelpers.h
 create mode 100644 src/TNL/Cuda/MemoryHelpers.h
 rename src/TNL/{CudaSharedMemory.h => Cuda/SharedMemory.h} (78%)
 rename src/TNL/{CudaStreamPool.h => Cuda/StreamPool.h} (73%)
 delete mode 100644 src/TNL/Devices/CudaDeviceInfo.h

diff --git a/src/Benchmarks/BLAS/spmv.h b/src/Benchmarks/BLAS/spmv.h
index 9fe469509..d515d52d7 100644
--- a/src/Benchmarks/BLAS/spmv.h
+++ b/src/Benchmarks/BLAS/spmv.h
@@ -53,7 +53,7 @@ __global__ void setCudaTestMatrixKernel( Matrix* matrix,
                                          const int elementsPerRow,
                                          const int gridIdx )
 {
-   const int rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   const int rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    if( rowIdx >= matrix->getRows() )
       return;
    int col = rowIdx - elementsPerRow / 2;
@@ -73,12 +73,12 @@ void setCudaTestMatrix( Matrix& matrix,
    typedef typename Matrix::IndexType IndexType;
    typedef typename Matrix::RealType RealType;
    Pointers::DevicePointer< Matrix > kernel_matrix( matrix );
-   dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+   dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
    const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-   const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+   const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
    for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) {
       if( gridIdx == cudaGrids - 1 )
-         cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+         cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
       setCudaTestMatrixKernel< Matrix >
          <<< cudaGridSize, cudaBlockSize >>>
          ( &kernel_matrix.template modifyData< Devices::Cuda >(), elementsPerRow, gridIdx );
diff --git a/src/Benchmarks/Benchmarks.h b/src/Benchmarks/Benchmarks.h
index 683a18376..3822fef28 100644
--- a/src/Benchmarks/Benchmarks.h
+++ b/src/Benchmarks/Benchmarks.h
@@ -24,7 +24,7 @@
 
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/SystemInfo.h>
-#include <TNL/Devices/CudaDeviceInfo.h>
+#include <TNL/Cuda/DeviceInfo.h>
 #include <TNL/Config/ConfigDescription.h>
 #include <TNL/Communicators/MpiCommunicator.h>
 
@@ -339,9 +339,9 @@ Benchmark::MetadataMap getHardwareMetadata()
                        + convertToString( cacheSizes.L2 ) + ", "
                        + convertToString( cacheSizes.L3 );
 #ifdef HAVE_CUDA
-   const int activeGPU = Devices::CudaDeviceInfo::getActiveDevice();
-   const String deviceArch = convertToString( Devices::CudaDeviceInfo::getArchitectureMajor( activeGPU ) ) + "." +
-                             convertToString( Devices::CudaDeviceInfo::getArchitectureMinor( activeGPU ) );
+   const int activeGPU = Cuda::DeviceInfo::getActiveDevice();
+   const String deviceArch = convertToString( Cuda::DeviceInfo::getArchitectureMajor( activeGPU ) ) + "." +
+                             convertToString( Cuda::DeviceInfo::getArchitectureMinor( activeGPU ) );
 #endif
    Benchmark::MetadataMap metadata {
        { "host name", Devices::SystemInfo::getHostname() },
@@ -362,13 +362,13 @@ Benchmark::MetadataMap getHardwareMetadata()
        { "CPU max frequency (MHz)", convertToString( Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 ) },
        { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo },
 #ifdef HAVE_CUDA
-       { "GPU name", Devices::CudaDeviceInfo::getDeviceName( activeGPU ) },
+       { "GPU name", Cuda::DeviceInfo::getDeviceName( activeGPU ) },
        { "GPU architecture", deviceArch },
-       { "GPU CUDA cores", convertToString( Devices::CudaDeviceInfo::getCudaCores( activeGPU ) ) },
-       { "GPU clock rate (MHz)", convertToString( (double) Devices::CudaDeviceInfo::getClockRate( activeGPU ) / 1e3 ) },
-       { "GPU global memory (GB)", convertToString( (double) Devices::CudaDeviceInfo::getGlobalMemory( activeGPU ) / 1e9 ) },
-       { "GPU memory clock rate (MHz)", convertToString( (double) Devices::CudaDeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 ) },
-       { "GPU memory ECC enabled", convertToString( Devices::CudaDeviceInfo::getECCEnabled( activeGPU ) ) },
+       { "GPU CUDA cores", convertToString( Cuda::DeviceInfo::getCudaCores( activeGPU ) ) },
+       { "GPU clock rate (MHz)", convertToString( (double) Cuda::DeviceInfo::getClockRate( activeGPU ) / 1e3 ) },
+       { "GPU global memory (GB)", convertToString( (double) Cuda::DeviceInfo::getGlobalMemory( activeGPU ) / 1e9 ) },
+       { "GPU memory clock rate (MHz)", convertToString( (double) Cuda::DeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 ) },
+       { "GPU memory ECC enabled", convertToString( Cuda::DeviceInfo::getECCEnabled( activeGPU ) ) },
 #endif
    };
 
diff --git a/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h
index 53cd0ec36..e3f472923 100644
--- a/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h
+++ b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h
@@ -82,9 +82,9 @@ setup( const Config::ParameterContainer& parameters,
 
    if( std::is_same< DeviceType, Devices::Cuda >::value )
    {
-      this->cudaBoundaryConditions = Devices::Cuda::passToDevice( *this->boundaryConditionPointer );
-      this->cudaRightHandSide = Devices::Cuda::passToDevice( *this->rightHandSidePointer );
-      this->cudaDifferentialOperator = Devices::Cuda::passToDevice( *this->differentialOperatorPointer );
+      this->cudaBoundaryConditions = Cuda::passToDevice( *this->boundaryConditionPointer );
+      this->cudaRightHandSide = Cuda::passToDevice( *this->rightHandSidePointer );
+      this->cudaDifferentialOperator = Cuda::passToDevice( *this->differentialOperatorPointer );
    }
    this->explicitUpdater.setDifferentialOperator( this->differentialOperatorPointer );
    this->explicitUpdater.setBoundaryConditions( this->boundaryConditionPointer );
@@ -266,8 +266,8 @@ boundaryConditionsTemplatedCompact( const GridType* grid,
 {
    typename GridType::CoordinatesType coordinates;
 
-   coordinates.x() = begin.x() + ( gridXIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-   coordinates.y() = begin.y() + ( gridYIdx * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;        
+   coordinates.x() = begin.x() + ( gridXIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   coordinates.y() = begin.y() + ( gridYIdx * Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;        
 
    if( coordinates.x() < end.x() &&
        coordinates.y() < end.y() )
@@ -357,8 +357,8 @@ heatEquationTemplatedCompact( const GridType* grid,
    typedef typename GridType::IndexType IndexType;
    typedef typename GridType::RealType RealType;
 
-   coordinates.x() = begin.x() + ( gridXIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-   coordinates.y() = begin.y() + ( gridYIdx * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;     
+   coordinates.x() = begin.x() + ( gridXIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   coordinates.y() = begin.y() + ( gridYIdx * Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;     
       
    MeshFunction& u = *_u;
    MeshFunction& fu = *_fu;
@@ -483,10 +483,10 @@ getExplicitUpdate( const RealType& time,
          CellType cell( mesh.template getData< DeviceType >() );
          dim3 cudaBlockSize( 16, 16 );
          dim3 cudaBlocks;
-         cudaBlocks.x = Devices::Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x );
-         cudaBlocks.y = Devices::Cuda::getNumberOfBlocks( end.y() - begin.y() + 1, cudaBlockSize.y );
-         const IndexType cudaXGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.x );
-         const IndexType cudaYGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.y );
+         cudaBlocks.x = Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x );
+         cudaBlocks.y = Cuda::getNumberOfBlocks( end.y() - begin.y() + 1, cudaBlockSize.y );
+         const IndexType cudaXGrids = Cuda::getNumberOfGrids( cudaBlocks.x );
+         const IndexType cudaYGrids = Cuda::getNumberOfGrids( cudaBlocks.y );
          
          //std::cerr << "Setting boundary conditions..." << std::endl;
 
@@ -762,10 +762,10 @@ template< typename Mesh,
 HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator, Communicator >::
 ~HeatEquationBenchmarkProblem()
 {
-   if( this->cudaMesh ) Devices::Cuda::freeFromDevice( this->cudaMesh );
-   if( this->cudaBoundaryConditions )  Devices::Cuda::freeFromDevice( this->cudaBoundaryConditions );
-   if( this->cudaRightHandSide ) Devices::Cuda::freeFromDevice( this->cudaRightHandSide );
-   if( this->cudaDifferentialOperator ) Devices::Cuda::freeFromDevice( this->cudaDifferentialOperator );
+   if( this->cudaMesh ) Cuda::freeFromDevice( this->cudaMesh );
+   if( this->cudaBoundaryConditions )  Cuda::freeFromDevice( this->cudaBoundaryConditions );
+   if( this->cudaRightHandSide ) Cuda::freeFromDevice( this->cudaRightHandSide );
+   if( this->cudaDifferentialOperator ) Cuda::freeFromDevice( this->cudaDifferentialOperator );
 }
 
 
diff --git a/src/Benchmarks/HeatEquation/Tuning/GridTraverser.h b/src/Benchmarks/HeatEquation/Tuning/GridTraverser.h
index cdbc4922c..7e7e53691 100644
--- a/src/Benchmarks/HeatEquation/Tuning/GridTraverser.h
+++ b/src/Benchmarks/HeatEquation/Tuning/GridTraverser.h
@@ -12,7 +12,7 @@
 
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Pointers/SharedPointer.h>
-#include <TNL/CudaStreamPool.h>
+#include <TNL/Cuda/StreamPool.h>
 
 namespace TNL {
 
diff --git a/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h b/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h
index 816ee5e2c..2a77f8bb5 100644
--- a/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h
+++ b/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h
@@ -126,8 +126,8 @@ _GridTraverser2D(
    typedef Meshes::Grid< 2, Real, Devices::Cuda, Index > GridType;
    typename GridType::CoordinatesType coordinates;
 
-   coordinates.x() = begin.x() + Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
-   coordinates.y() = begin.y() + Devices::Cuda::getGlobalThreadIdx_y( gridIdx );
+   coordinates.x() = begin.x() + Cuda::getGlobalThreadIdx_x( gridIdx );
+   coordinates.y() = begin.y() + Cuda::getGlobalThreadIdx_y( gridIdx );
    
    if( coordinates <= end )
    {
@@ -173,7 +173,7 @@ _GridTraverser2DBoundary(
    Index entitiesAlongX = endX - beginX + 1;
    Index entitiesAlongY = endY - beginY;
    
-   Index threadId = Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
+   Index threadId = Cuda::getGlobalThreadIdx_x( gridIdx );
    if( threadId < entitiesAlongX )
    {
       GridEntity entity( *grid, 
@@ -244,12 +244,12 @@ processEntities(
       dim3 cudaBlockSize( 256 );      
       dim3 cudaBlocksCount, cudaGridsCount;
       IndexType cudaThreadsCount = 2 * ( end.x() - begin.x() + end.y() - begin.y() + 1 );
-      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, cudaThreadsCount );
+      Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, cudaThreadsCount );
       dim3 gridIdx, cudaGridSize;
       Devices::Cuda::synchronizeDevice();
       for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x++ )
       {
-         Devices::Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
+         Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
          _GridTraverser2DBoundary< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                <<< cudaGridSize, cudaBlockSize >>>
                ( &gridPointer.template getData< Devices::Cuda >(),
@@ -266,11 +266,11 @@ processEntities(
    {
       dim3 cudaBlockSize( 16, 16 );
       dim3 cudaBlocksCount, cudaGridsCount;
-      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount,
-                                   end.x() - begin.x() + 1,
-                                   end.y() - begin.y() + 1 );
+      Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount,
+                          end.x() - begin.x() + 1,
+                          end.y() - begin.y() + 1 );
       
-      auto& pool = CudaStreamPool::getInstance();
+      auto& pool = Cuda::StreamPool::getInstance();
       const cudaStream_t& s = pool.getStream( stream );
 
       Devices::Cuda::synchronizeDevice();
@@ -278,8 +278,8 @@ processEntities(
       for( gridIdx.y = 0; gridIdx.y < cudaGridsCount.y; gridIdx.y ++ )
          for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x ++ )
          {
-            Devices::Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
-	    //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCount, cudaGridSize, cudaGridsCount );
+            Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
+	    //Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCount, cudaGridSize, cudaGridsCount );
             TNL::_GridTraverser2D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                <<< cudaGridSize, cudaBlockSize, 0, s >>>
                ( &gridPointer.template getData< Devices::Cuda >(),
diff --git a/src/Benchmarks/ODESolvers/Euler.hpp b/src/Benchmarks/ODESolvers/Euler.hpp
index 22c013041..ab975ed07 100644
--- a/src/Benchmarks/ODESolvers/Euler.hpp
+++ b/src/Benchmarks/ODESolvers/Euler.hpp
@@ -176,10 +176,10 @@ void Euler< Problem, SolverMonitor >::computeNewTimeLevel( DofVectorPointer& u,
    {
 #ifdef HAVE_CUDA
       dim3 cudaBlockSize( 512 );
-      const IndexType cudaBlocks = Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x );
-      const IndexType cudaGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks );
-      this->cudaBlockResidue.setSize( min( cudaBlocks, Devices::Cuda::getMaxGridSize() ) );
-      const IndexType threadsPerGrid = Devices::Cuda::getMaxGridSize() * cudaBlockSize.x;
+      const IndexType cudaBlocks = Cuda::getNumberOfBlocks( size, cudaBlockSize.x );
+      const IndexType cudaGrids = Cuda::getNumberOfGrids( cudaBlocks );
+      this->cudaBlockResidue.setSize( min( cudaBlocks, Cuda::getMaxGridSize() ) );
+      const IndexType threadsPerGrid = Cuda::getMaxGridSize() * cudaBlockSize.x;
 
       localResidue = 0.0;
       for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx ++ )
@@ -187,7 +187,7 @@ void Euler< Problem, SolverMonitor >::computeNewTimeLevel( DofVectorPointer& u,
          const IndexType sharedMemory = cudaBlockSize.x * sizeof( RealType );
          const IndexType gridOffset = gridIdx * threadsPerGrid;
          const IndexType currentSize = min( size - gridOffset, threadsPerGrid );
-         const IndexType currentGridSize = Devices::Cuda::getNumberOfBlocks( currentSize, cudaBlockSize.x );
+         const IndexType currentGridSize = Cuda::getNumberOfBlocks( currentSize, cudaBlockSize.x );
 
          updateUEuler<<< currentGridSize, cudaBlockSize, sharedMemory >>>( currentSize,
                                                                       tau,
diff --git a/src/Benchmarks/ODESolvers/Merson.hpp b/src/Benchmarks/ODESolvers/Merson.hpp
index 1ea606c4c..3c74bdf48 100644
--- a/src/Benchmarks/ODESolvers/Merson.hpp
+++ b/src/Benchmarks/ODESolvers/Merson.hpp
@@ -290,10 +290,10 @@ void Merson< Problem, SolverMonitor >::computeKFunctions( DofVectorPointer& u,
    {
 #ifdef HAVE_CUDA
       dim3 cudaBlockSize( 512 );
-      const IndexType cudaBlocks = Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x );
-      const IndexType cudaGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks );
-      this->cudaBlockResidue.setSize( min( cudaBlocks, Devices::Cuda::getMaxGridSize() ) );
-      const IndexType threadsPerGrid = Devices::Cuda::getMaxGridSize() * cudaBlockSize.x;
+      const IndexType cudaBlocks = Cuda::getNumberOfBlocks( size, cudaBlockSize.x );
+      const IndexType cudaGrids = Cuda::getNumberOfGrids( cudaBlocks );
+      this->cudaBlockResidue.setSize( min( cudaBlocks, Cuda::getMaxGridSize() ) );
+      const IndexType threadsPerGrid = Cuda::getMaxGridSize() * cudaBlockSize.x;
 
       this->problem->getExplicitUpdate( time, tau, u, k1 );
       cudaDeviceSynchronize();
@@ -384,10 +384,10 @@ typename Problem :: RealType Merson< Problem, SolverMonitor >::computeError( con
    {
 #ifdef HAVE_CUDA
       dim3 cudaBlockSize( 512 );
-      const IndexType cudaBlocks = Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x );
-      const IndexType cudaGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks );
-      this->cudaBlockResidue.setSize( min( cudaBlocks, Devices::Cuda::getMaxGridSize() ) );
-      const IndexType threadsPerGrid = Devices::Cuda::getMaxGridSize() * cudaBlockSize.x;
+      const IndexType cudaBlocks = Cuda::getNumberOfBlocks( size, cudaBlockSize.x );
+      const IndexType cudaGrids = Cuda::getNumberOfGrids( cudaBlocks );
+      this->cudaBlockResidue.setSize( min( cudaBlocks, Cuda::getMaxGridSize() ) );
+      const IndexType threadsPerGrid = Cuda::getMaxGridSize() * cudaBlockSize.x;
 
       for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx ++ )
       {
@@ -439,10 +439,10 @@ void Merson< Problem, SolverMonitor >::computeNewTimeLevel( const RealType time,
    {
 #ifdef HAVE_CUDA
       dim3 cudaBlockSize( 512 );
-      const IndexType cudaBlocks = Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x );
-      const IndexType cudaGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks );
-      this->cudaBlockResidue.setSize( min( cudaBlocks, Devices::Cuda::getMaxGridSize() ) );
-      const IndexType threadsPerGrid = Devices::Cuda::getMaxGridSize() * cudaBlockSize.x;
+      const IndexType cudaBlocks = Cuda::getNumberOfBlocks( size, cudaBlockSize.x );
+      const IndexType cudaGrids = Cuda::getNumberOfGrids( cudaBlocks );
+      this->cudaBlockResidue.setSize( min( cudaBlocks, Cuda::getMaxGridSize() ) );
+      const IndexType threadsPerGrid = Cuda::getMaxGridSize() * cudaBlockSize.x;
 
       localResidue = 0.0;
       for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx ++ )
diff --git a/src/Examples/simple-examples/large-meshfunction-example.h b/src/Examples/simple-examples/large-meshfunction-example.h
index 2f9c70b85..d5520b69e 100644
--- a/src/Examples/simple-examples/large-meshfunction-example.h
+++ b/src/Examples/simple-examples/large-meshfunction-example.h
@@ -10,7 +10,6 @@ using namespace TNL;
 using namespace TNL::Containers;
 using namespace TNL::Meshes;
 using namespace TNL::Functions;
-using namespace TNL::Devices;
 
 int main(int argc, char ** argv)
 {
@@ -28,9 +27,9 @@ int main(int argc, char ** argv)
     time.start();
 
 #ifdef HAVE_CUDA
-    using Device=Cuda;
+    using Device=Devices::Cuda;
 #else
-    using Device=Host;
+    using Device=Devices::Host;
 #endif
 
   using MeshType= Grid<2, double,Device,int>;
diff --git a/src/TNL/Allocators/Cuda.h b/src/TNL/Allocators/Cuda.h
index 74ebb8404..1b648f1ce 100644
--- a/src/TNL/Allocators/Cuda.h
+++ b/src/TNL/Allocators/Cuda.h
@@ -12,7 +12,9 @@
 
 #pragma once
 
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Exceptions/CudaBadAlloc.h>
+#include <TNL/Exceptions/CudaSupportMissing.h>
+#include <TNL/Cuda/CheckDevice.h>
 
 namespace TNL {
 namespace Allocators {
diff --git a/src/TNL/Allocators/CudaHost.h b/src/TNL/Allocators/CudaHost.h
index 284c91fe9..9047e0b9a 100644
--- a/src/TNL/Allocators/CudaHost.h
+++ b/src/TNL/Allocators/CudaHost.h
@@ -12,7 +12,9 @@
 
 #pragma once
 
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Exceptions/CudaBadAlloc.h>
+#include <TNL/Exceptions/CudaSupportMissing.h>
+#include <TNL/Cuda/CheckDevice.h>
 
 namespace TNL {
 namespace Allocators {
diff --git a/src/TNL/Allocators/CudaManaged.h b/src/TNL/Allocators/CudaManaged.h
index db29f86cb..bb878ca66 100644
--- a/src/TNL/Allocators/CudaManaged.h
+++ b/src/TNL/Allocators/CudaManaged.h
@@ -12,7 +12,9 @@
 
 #pragma once
 
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Exceptions/CudaBadAlloc.h>
+#include <TNL/Exceptions/CudaSupportMissing.h>
+#include <TNL/Cuda/CheckDevice.h>
 
 namespace TNL {
 namespace Allocators {
diff --git a/src/TNL/Assert.h b/src/TNL/Assert.h
index 3d91c8c76..df8629562 100644
--- a/src/TNL/Assert.h
+++ b/src/TNL/Assert.h
@@ -120,7 +120,7 @@
 #include <iostream>
 #include <stdio.h>
 
-#include <TNL/Devices/CudaCallable.h>
+#include <TNL/Cuda/CudaCallable.h>
 #include <TNL/Debugging/StackBacktrace.h>
 
 namespace TNL {
diff --git a/src/TNL/Communicators/MpiCommunicator.h b/src/TNL/Communicators/MpiCommunicator.h
index 926fa329a..0aa14a9ec 100644
--- a/src/TNL/Communicators/MpiCommunicator.h
+++ b/src/TNL/Communicators/MpiCommunicator.h
@@ -24,7 +24,7 @@
 #include <unistd.h>  // getpid
 
 #ifdef HAVE_CUDA
-    #include <TNL/Devices/Cuda.h>
+    #include <TNL/Cuda/CheckDevice.h>
 
     typedef struct __attribute__((__packed__))  {
        char name[MPI_MAX_PROCESSOR_NAME];
diff --git a/src/TNL/Containers/Algorithms/CudaMultireductionKernel.h b/src/TNL/Containers/Algorithms/CudaMultireductionKernel.h
index e67c11b41..c97e0a8aa 100644
--- a/src/TNL/Containers/Algorithms/CudaMultireductionKernel.h
+++ b/src/TNL/Containers/Algorithms/CudaMultireductionKernel.h
@@ -14,7 +14,8 @@
 
 #include <TNL/Assert.h>
 #include <TNL/Math.h>
-#include <TNL/Devices/CudaDeviceInfo.h>
+#include <TNL/Cuda/DeviceInfo.h>
+#include <TNL/Cuda/SharedMemory.h>
 #include <TNL/Containers/Algorithms/CudaReductionBuffer.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 
@@ -52,7 +53,7 @@ CudaMultireductionKernel( const Result zero,
                           const int n,
                           Result* output )
 {
-   Result* sdata = Devices::Cuda::getSharedMemory< Result >();
+   Result* sdata = Cuda::getSharedMemory< Result >();
 
    // Get the thread id (tid), global thread id (gid) and gridSize.
    const Index tid = threadIdx.y * blockDim.x + threadIdx.x;
@@ -160,10 +161,10 @@ CudaMultireductionKernelLauncher( const Result zero,
    // where blocksPerMultiprocessor is determined according to the number of
    // available registers on the multiprocessor.
    // On Tesla K40c, desGridSize = 8 * 15 = 120.
-   const int activeDevice = Devices::CudaDeviceInfo::getActiveDevice();
-   const int blocksdPerMultiprocessor = Devices::CudaDeviceInfo::getRegistersPerMultiprocessor( activeDevice )
+   const int activeDevice = Cuda::DeviceInfo::getActiveDevice();
+   const int blocksdPerMultiprocessor = Cuda::DeviceInfo::getRegistersPerMultiprocessor( activeDevice )
                                       / ( Multireduction_maxThreadsPerBlock * Multireduction_registersPerThread );
-   const int desGridSizeX = blocksdPerMultiprocessor * Devices::CudaDeviceInfo::getCudaMultiprocessors( activeDevice );
+   const int desGridSizeX = blocksdPerMultiprocessor * Cuda::DeviceInfo::getCudaMultiprocessors( activeDevice );
    dim3 blockSize, gridSize;
 
    // version A: max 16 rows of threads
@@ -189,10 +190,10 @@ CudaMultireductionKernelLauncher( const Result zero,
    while( blockSize.x * blockSize.y > Multireduction_maxThreadsPerBlock )
       blockSize.x /= 2;
 
-   gridSize.x = TNL::min( Devices::Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSizeX );
-   gridSize.y = Devices::Cuda::getNumberOfBlocks( n, blockSize.y );
+   gridSize.x = TNL::min( Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSizeX );
+   gridSize.y = Cuda::getNumberOfBlocks( n, blockSize.y );
 
-   if( gridSize.y > (unsigned) Devices::Cuda::getMaxGridSize() ) {
+   if( gridSize.y > (unsigned) Cuda::getMaxGridSize() ) {
       std::cerr << "Maximum gridSize.y limit exceeded (limit is 65535, attempted " << gridSize.y << ")." << std::endl;
       throw 1;
    }
diff --git a/src/TNL/Containers/Algorithms/CudaReductionBuffer.h b/src/TNL/Containers/Algorithms/CudaReductionBuffer.h
index 2897c7280..f873d7815 100644
--- a/src/TNL/Containers/Algorithms/CudaReductionBuffer.h
+++ b/src/TNL/Containers/Algorithms/CudaReductionBuffer.h
@@ -14,7 +14,7 @@
 
 #include <stdlib.h>
 
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/CheckDevice.h>
 #include <TNL/Exceptions/CudaBadAlloc.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 
diff --git a/src/TNL/Containers/Algorithms/CudaReductionKernel.h b/src/TNL/Containers/Algorithms/CudaReductionKernel.h
index 36bd5c88b..3e948a906 100644
--- a/src/TNL/Containers/Algorithms/CudaReductionKernel.h
+++ b/src/TNL/Containers/Algorithms/CudaReductionKernel.h
@@ -14,7 +14,8 @@
 
 #include <TNL/Assert.h>
 #include <TNL/Math.h>
-#include <TNL/Devices/CudaDeviceInfo.h>
+#include <TNL/Cuda/DeviceInfo.h>
+#include <TNL/Cuda/SharedMemory.h>
 #include <TNL/Containers/Algorithms/CudaReductionBuffer.h>
 #include <TNL/Containers/Algorithms/ArrayOperations.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
@@ -52,7 +53,7 @@ CudaReductionKernel( const Result zero,
                      const Index size,
                      Result* output )
 {
-   Result* sdata = Devices::Cuda::getSharedMemory< Result >();
+   Result* sdata = Cuda::getSharedMemory< Result >();
 
    // Get the thread id (tid), global thread id (gid) and gridSize.
    const Index tid = threadIdx.x;
@@ -147,7 +148,7 @@ CudaReductionWithArgumentKernel( const Result zero,
                                  Index* idxOutput,
                                  const Index* idxInput = nullptr )
 {
-   Result* sdata = Devices::Cuda::getSharedMemory< Result >();
+   Result* sdata = Cuda::getSharedMemory< Result >();
    Index* sidx = reinterpret_cast< Index* >( &sdata[ blockDim.x ] );
 
    // Get the thread id (tid), global thread id (gid) and gridSize.
@@ -282,11 +283,11 @@ struct CudaReductionKernelLauncher
    // It seems to be better to map only one CUDA block per one multiprocessor or maybe
    // just slightly more. Therefore we omit blocksdPerMultiprocessor in the following.
    CudaReductionKernelLauncher( const Index size )
-   : activeDevice( Devices::CudaDeviceInfo::getActiveDevice() ),
-     blocksdPerMultiprocessor( Devices::CudaDeviceInfo::getRegistersPerMultiprocessor( activeDevice )
+   : activeDevice( Cuda::DeviceInfo::getActiveDevice() ),
+     blocksdPerMultiprocessor( Cuda::DeviceInfo::getRegistersPerMultiprocessor( activeDevice )
                                / ( Reduction_maxThreadsPerBlock * Reduction_registersPerThread ) ),
-     //desGridSize( blocksdPerMultiprocessor * Devices::CudaDeviceInfo::getCudaMultiprocessors( activeDevice ) ),
-     desGridSize( Devices::CudaDeviceInfo::getCudaMultiprocessors( activeDevice ) ),
+     //desGridSize( blocksdPerMultiprocessor * Cuda::DeviceInfo::getCudaMultiprocessors( activeDevice ) ),
+     desGridSize( Cuda::DeviceInfo::getCudaMultiprocessors( activeDevice ) ),
      originalSize( size )
    {
    }
@@ -402,7 +403,7 @@ struct CudaReductionKernelLauncher
 #ifdef HAVE_CUDA
          dim3 blockSize, gridSize;
          blockSize.x = Reduction_maxThreadsPerBlock;
-         gridSize.x = TNL::min( Devices::Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSize );
+         gridSize.x = TNL::min( Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSize );
 
          // when there is only one warp per blockSize.x, we need to allocate two warps
          // worth of shared memory so that we don't index shared memory out of bounds
@@ -510,7 +511,7 @@ struct CudaReductionKernelLauncher
 #ifdef HAVE_CUDA
          dim3 blockSize, gridSize;
          blockSize.x = Reduction_maxThreadsPerBlock;
-         gridSize.x = TNL::min( Devices::Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSize );
+         gridSize.x = TNL::min( Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSize );
 
          // when there is only one warp per blockSize.x, we need to allocate two warps
          // worth of shared memory so that we don't index shared memory out of bounds
diff --git a/src/TNL/Containers/Algorithms/CudaScanKernel.h b/src/TNL/Containers/Algorithms/CudaScanKernel.h
index a8c354875..5b2016439 100644
--- a/src/TNL/Containers/Algorithms/CudaScanKernel.h
+++ b/src/TNL/Containers/Algorithms/CudaScanKernel.h
@@ -13,7 +13,7 @@
 #include <iostream>
 
 #include <TNL/Math.h>
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/SharedMemory.h>
 #include <TNL/Exceptions/CudaBadAlloc.h>
 #include <TNL/Containers/Array.h>
 
@@ -36,8 +36,8 @@ cudaFirstPhaseBlockScan( const ScanType scanType,
                          Real* output,
                          Real* auxArray )
 {
-   Real* sharedData = TNL::Devices::Cuda::getSharedMemory< Real >();
-   Real* auxData = &sharedData[ elementsInBlock + elementsInBlock / Devices::Cuda::getNumberOfSharedMemoryBanks() + 2 ];
+   Real* sharedData = TNL::Cuda::getSharedMemory< Real >();
+   Real* auxData = &sharedData[ elementsInBlock + elementsInBlock / Cuda::getNumberOfSharedMemoryBanks() + 2 ];
    Real* warpSums = &auxData[ blockDim.x ];
 
    const Index lastElementIdx = size - blockIdx.x * elementsInBlock;
@@ -54,7 +54,7 @@ cudaFirstPhaseBlockScan( const ScanType scanType,
          sharedData[ 0 ] = zero;
       while( idx < elementsInBlock && blockOffset + idx < size )
       {
-         sharedData[ Devices::Cuda::getInterleaving( idx + 1 ) ] = input[ blockOffset + idx ];
+         sharedData[ Cuda::getInterleaving( idx + 1 ) ] = input[ blockOffset + idx ];
          idx += blockDim.x;
       }
    }
@@ -62,7 +62,7 @@ cudaFirstPhaseBlockScan( const ScanType scanType,
    {
       while( idx < elementsInBlock && blockOffset + idx < size )
       {
-         sharedData[ Devices::Cuda::getInterleaving( idx ) ] = input[ blockOffset + idx ];
+         sharedData[ Cuda::getInterleaving( idx ) ] = input[ blockOffset + idx ];
          idx += blockDim.x;
       }
    }
@@ -78,33 +78,33 @@ cudaFirstPhaseBlockScan( const ScanType scanType,
    if( chunkOffset < lastElementInBlock )
    {
       auxData[ threadIdx.x ] =
-         sharedData[ Devices::Cuda::getInterleaving( chunkOffset ) ];
+         sharedData[ Cuda::getInterleaving( chunkOffset ) ];
    }
 
    int chunkPointer = 1;
    while( chunkPointer < chunkSize &&
           chunkOffset + chunkPointer < lastElementInBlock )
    {
-      sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer ) ] =
-         reduction( sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer ) ],
-                    sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer - 1 ) ] );
+      sharedData[ Cuda::getInterleaving( chunkOffset + chunkPointer ) ] =
+         reduction( sharedData[ Cuda::getInterleaving( chunkOffset + chunkPointer ) ],
+                    sharedData[ Cuda::getInterleaving( chunkOffset + chunkPointer - 1 ) ] );
       auxData[ threadIdx.x ] =
-         sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer ) ];
+         sharedData[ Cuda::getInterleaving( chunkOffset + chunkPointer ) ];
       chunkPointer++;
    }
 
    /***
     *  Perform the parallel prefix-sum inside warps.
     */
-   const int threadInWarpIdx = threadIdx.x % Devices::Cuda::getWarpSize();
-   const int warpIdx = threadIdx.x / Devices::Cuda::getWarpSize();
-   for( int stride = 1; stride < Devices::Cuda::getWarpSize(); stride *= 2 ) {
+   const int threadInWarpIdx = threadIdx.x % Cuda::getWarpSize();
+   const int warpIdx = threadIdx.x / Cuda::getWarpSize();
+   for( int stride = 1; stride < Cuda::getWarpSize(); stride *= 2 ) {
       if( threadInWarpIdx >= stride && threadIdx.x < numberOfChunks )
          auxData[ threadIdx.x ] = reduction( auxData[ threadIdx.x ], auxData[ threadIdx.x - stride ] );
       __syncwarp();
    }
 
-   if( threadInWarpIdx == Devices::Cuda::getWarpSize() - 1 )
+   if( threadInWarpIdx == Cuda::getWarpSize() - 1 )
       warpSums[ warpIdx ] = auxData[ threadIdx.x ];
    __syncthreads();
 
@@ -112,7 +112,7 @@ cudaFirstPhaseBlockScan( const ScanType scanType,
     * Compute prefix-sum of warp sums using one warp
     */
    if( warpIdx == 0 )
-      for( int stride = 1; stride < Devices::Cuda::getWarpSize(); stride *= 2 ) {
+      for( int stride = 1; stride < Cuda::getWarpSize(); stride *= 2 ) {
          if( threadInWarpIdx >= stride )
             warpSums[ threadIdx.x ] = reduction( warpSums[ threadIdx.x ], warpSums[ threadIdx.x - stride ] );
          __syncwarp();
@@ -136,9 +136,9 @@ cudaFirstPhaseBlockScan( const ScanType scanType,
       Real chunkShift( zero );
       if( chunkIdx > 0 )
          chunkShift = auxData[ chunkIdx - 1 ];
-      sharedData[ Devices::Cuda::getInterleaving( idx ) ] =
-         reduction( sharedData[ Devices::Cuda::getInterleaving( idx ) ], chunkShift );
-      output[ blockOffset + idx ] = sharedData[ Devices::Cuda::getInterleaving( idx ) ];
+      sharedData[ Cuda::getInterleaving( idx ) ] =
+         reduction( sharedData[ Cuda::getInterleaving( idx ) ], chunkShift );
+      output[ blockOffset + idx ] = sharedData[ Cuda::getInterleaving( idx ) ];
       idx += blockDim.x;
    }
    __syncthreads();
@@ -147,11 +147,11 @@ cudaFirstPhaseBlockScan( const ScanType scanType,
    {
       if( scanType == ScanType::Exclusive )
       {
-         auxArray[ blockIdx.x ] = reduction( sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock - 1 ) ],
-                                             sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock ) ] );
+         auxArray[ blockIdx.x ] = reduction( sharedData[ Cuda::getInterleaving( lastElementInBlock - 1 ) ],
+                                             sharedData[ Cuda::getInterleaving( lastElementInBlock ) ] );
       }
       else
-         auxArray[ blockIdx.x ] = sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock - 1 ) ];
+         auxArray[ blockIdx.x ] = sharedData[ Cuda::getInterleaving( lastElementInBlock - 1 ) ];
    }
 }
 
@@ -245,7 +245,7 @@ struct CudaScanKernelLauncher
       // compute the number of grids
       const int elementsInBlock = 8 * blockSize;
       const Index numberOfBlocks = roundUpDivision( size, elementsInBlock );
-      const Index numberOfGrids = Devices::Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize() );
+      const Index numberOfGrids = Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize() );
       //std::cerr << "numberOfgrids =  " << numberOfGrids << std::endl;
 
       // allocate array for the block sums
@@ -268,8 +268,8 @@ struct CudaScanKernelLauncher
 
          // run the kernel
          const std::size_t sharedDataSize = elementsInBlock +
-                                            elementsInBlock / Devices::Cuda::getNumberOfSharedMemoryBanks() + 2;
-         const std::size_t sharedMemory = ( sharedDataSize + blockSize + Devices::Cuda::getWarpSize() ) * sizeof( Real );
+                                            elementsInBlock / Cuda::getNumberOfSharedMemoryBanks() + 2;
+         const std::size_t sharedMemory = ( sharedDataSize + blockSize + Cuda::getWarpSize() ) * sizeof( Real );
          cudaFirstPhaseBlockScan<<< cudaGridSize, cudaBlockSize, sharedMemory >>>
             ( scanType,
               reduction,
@@ -330,7 +330,7 @@ struct CudaScanKernelLauncher
       // compute the number of grids
       const int elementsInBlock = 8 * blockSize;
       const Index numberOfBlocks = roundUpDivision( size, elementsInBlock );
-      const Index numberOfGrids = Devices::Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize() );
+      const Index numberOfGrids = Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize() );
 
       // loop over all grids
       for( Index gridIdx = 0; gridIdx < numberOfGrids; gridIdx++ ) {
@@ -369,13 +369,13 @@ struct CudaScanKernelLauncher
     */
    static int& maxGridSize()
    {
-      static int maxGridSize = Devices::Cuda::getMaxGridSize();
+      static int maxGridSize = Cuda::getMaxGridSize();
       return maxGridSize;
    }
 
    static void resetMaxGridSize()
    {
-      maxGridSize() = Devices::Cuda::getMaxGridSize();
+      maxGridSize() = Cuda::getMaxGridSize();
    }
 
    static int& gridsCount()
diff --git a/src/TNL/Containers/Multimaps/EllpackIndexMultimapValues.h b/src/TNL/Containers/Multimaps/EllpackIndexMultimapValues.h
index fe7a0fb38..9be47980d 100644
--- a/src/TNL/Containers/Multimaps/EllpackIndexMultimapValues.h
+++ b/src/TNL/Containers/Multimaps/EllpackIndexMultimapValues.h
@@ -13,7 +13,7 @@
 #include <type_traits>
 #include <ostream>
 
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/CudaCallable.h>
 
 namespace TNL {
 namespace Containers {
diff --git a/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimapValues.h b/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimapValues.h
index 95ffade9f..efae4f051 100644
--- a/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimapValues.h
+++ b/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimapValues.h
@@ -13,7 +13,7 @@
 #include <type_traits>
 #include <ostream>
 
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/CudaCallable.h>
 
 namespace TNL {
 namespace Containers {
diff --git a/src/TNL/Containers/ndarray/SizesHolder.h b/src/TNL/Containers/ndarray/SizesHolder.h
index c3334e19b..72d61bf81 100644
--- a/src/TNL/Containers/ndarray/SizesHolder.h
+++ b/src/TNL/Containers/ndarray/SizesHolder.h
@@ -13,7 +13,7 @@
 #pragma once
 
 #include <TNL/Assert.h>
-#include <TNL/Devices/CudaCallable.h>
+#include <TNL/Cuda/CudaCallable.h>
 #include <TNL/TemplateStaticFor.h>
 
 #include <TNL/Containers/ndarray/Meta.h>
diff --git a/src/TNL/Cuda/CheckDevice.h b/src/TNL/Cuda/CheckDevice.h
new file mode 100644
index 000000000..c857d8dd6
--- /dev/null
+++ b/src/TNL/Cuda/CheckDevice.h
@@ -0,0 +1,40 @@
+/***************************************************************************
+                          CheckDevice.h  -  description
+                             -------------------
+    begin                : Aug 18, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Exceptions/CudaRuntimeError.h>
+
+namespace TNL {
+namespace Cuda {
+
+#ifdef HAVE_CUDA
+   /****
+    * I do not know why, but it is more reliable to pass the error code instead
+    * of calling cudaGetLastError() inside the function.
+    * We recommend to use macro 'TNL_CHECK_CUDA_DEVICE' defined bellow.
+    */
+   inline void checkDevice( const char* file_name, int line, cudaError error )
+   {
+      if( error != cudaSuccess )
+         throw Exceptions::CudaRuntimeError( error, file_name, line );
+   }
+#else
+   inline void checkDevice() {}
+#endif
+
+} // namespace Cuda
+} // namespace TNL
+
+#ifdef HAVE_CUDA
+#define TNL_CHECK_CUDA_DEVICE ::TNL::Cuda::checkDevice( __FILE__, __LINE__, cudaGetLastError() )
+#else
+#define TNL_CHECK_CUDA_DEVICE ::TNL::Cuda::checkDevice()
+#endif
diff --git a/src/TNL/Devices/CudaCallable.h b/src/TNL/Cuda/CudaCallable.h
similarity index 88%
rename from src/TNL/Devices/CudaCallable.h
rename to src/TNL/Cuda/CudaCallable.h
index f63e4e430..5cd3e8fbb 100644
--- a/src/TNL/Devices/CudaCallable.h
+++ b/src/TNL/Cuda/CudaCallable.h
@@ -12,9 +12,6 @@
 
 // The __cuda_callable__ macro has to be in a separate header file to avoid
 // infinite loops by the #include directives.
-//
-// For example, the implementation of Devices::Cuda needs TNL_ASSERT_*
-// macros, which need __cuda_callable__ functions.
 
 /***
  * This macro serves for definition of function which are supposed to be called
diff --git a/src/TNL/Cuda/DeviceInfo.h b/src/TNL/Cuda/DeviceInfo.h
new file mode 100644
index 000000000..d53b46fec
--- /dev/null
+++ b/src/TNL/Cuda/DeviceInfo.h
@@ -0,0 +1,52 @@
+/***************************************************************************
+                          CudaDeviceInfo.h  -  description
+                             -------------------
+    begin                : Jun 21, 2015
+    copyright            : (C) 2007 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/String.h>
+
+namespace TNL {
+namespace Cuda {
+
+struct DeviceInfo
+{
+   static int getNumberOfDevices();
+
+   static int getActiveDevice();
+
+   static String getDeviceName( int deviceNum );
+
+   static int getArchitectureMajor( int deviceNum );
+
+   static int getArchitectureMinor( int deviceNum );
+
+   static int getClockRate( int deviceNum );
+
+   static std::size_t getGlobalMemory( int deviceNum );
+
+   static std::size_t getFreeGlobalMemory();
+
+   static int getMemoryClockRate( int deviceNum );
+
+   static bool getECCEnabled( int deviceNum );
+
+   static int getCudaMultiprocessors( int deviceNum );
+
+   static int getCudaCoresPerMultiprocessors( int deviceNum );
+
+   static int getCudaCores( int deviceNum );
+
+   static int getRegistersPerMultiprocessor( int deviceNum );
+};
+
+} // namespace Cuda
+} // namespace TNL
+
+#include <TNL/Cuda/DeviceInfo.hpp>
diff --git a/src/TNL/Devices/CudaDeviceInfo_impl.h b/src/TNL/Cuda/DeviceInfo.hpp
similarity index 86%
rename from src/TNL/Devices/CudaDeviceInfo_impl.h
rename to src/TNL/Cuda/DeviceInfo.hpp
index f29ecd8c9..d10e6f05c 100644
--- a/src/TNL/Devices/CudaDeviceInfo_impl.h
+++ b/src/TNL/Cuda/DeviceInfo.hpp
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          CudaDeviceInfo_impl.h  -  description
+                          DeviceInfo.hpp  -  description
                              -------------------
     begin                : Jun 21, 2015
     copyright            : (C) 2007 by Tomas Oberhuber
@@ -12,14 +12,14 @@
 
 #include <unordered_map>
 
-#include <TNL/Devices/CudaDeviceInfo.h>
+#include <TNL/Cuda/DeviceInfo.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 
 namespace TNL {
-namespace Devices {
+namespace Cuda {
 
 inline int
-CudaDeviceInfo::
+DeviceInfo::
 getNumberOfDevices()
 {
 #ifdef HAVE_CUDA
@@ -32,7 +32,7 @@ getNumberOfDevices()
 }
 
 inline int
-CudaDeviceInfo::
+DeviceInfo::
 getActiveDevice()
 {
 #ifdef HAVE_CUDA
@@ -45,7 +45,7 @@ getActiveDevice()
 }
 
 inline String
-CudaDeviceInfo::
+DeviceInfo::
 getDeviceName( int deviceNum )
 {
 #ifdef HAVE_CUDA
@@ -58,7 +58,7 @@ getDeviceName( int deviceNum )
 }
 
 inline int
-CudaDeviceInfo::
+DeviceInfo::
 getArchitectureMajor( int deviceNum )
 {
 #ifdef HAVE_CUDA
@@ -71,7 +71,7 @@ getArchitectureMajor( int deviceNum )
 }
 
 inline int
-CudaDeviceInfo::
+DeviceInfo::
 getArchitectureMinor( int deviceNum )
 {
 #ifdef HAVE_CUDA
@@ -84,7 +84,7 @@ getArchitectureMinor( int deviceNum )
 }
 
 inline int
-CudaDeviceInfo::
+DeviceInfo::
 getClockRate( int deviceNum )
 {
 #ifdef HAVE_CUDA
@@ -96,8 +96,8 @@ getClockRate( int deviceNum )
 #endif
 }
 
-inline size_t
-CudaDeviceInfo::
+inline std::size_t
+DeviceInfo::
 getGlobalMemory( int deviceNum )
 {
 #ifdef HAVE_CUDA
@@ -109,13 +109,13 @@ getGlobalMemory( int deviceNum )
 #endif
 }
 
-inline size_t
-CudaDeviceInfo::
+inline std::size_t
+DeviceInfo::
 getFreeGlobalMemory()
 {
 #ifdef HAVE_CUDA
-   size_t free = 0;
-   size_t total = 0;
+   std::size_t free = 0;
+   std::size_t total = 0;
    cudaMemGetInfo( &free, &total );
    return free;
 #else
@@ -124,7 +124,7 @@ getFreeGlobalMemory()
 }
 
 inline int
-CudaDeviceInfo::
+DeviceInfo::
 getMemoryClockRate( int deviceNum )
 {
 #ifdef HAVE_CUDA
@@ -137,7 +137,7 @@ getMemoryClockRate( int deviceNum )
 }
 
 inline bool
-CudaDeviceInfo::
+DeviceInfo::
 getECCEnabled( int deviceNum )
 {
 #ifdef HAVE_CUDA
@@ -150,7 +150,7 @@ getECCEnabled( int deviceNum )
 }
 
 inline int
-CudaDeviceInfo::
+DeviceInfo::
 getCudaMultiprocessors( int deviceNum )
 {
 #ifdef HAVE_CUDA
@@ -169,12 +169,12 @@ getCudaMultiprocessors( int deviceNum )
 }
 
 inline int
-CudaDeviceInfo::
+DeviceInfo::
 getCudaCoresPerMultiprocessors( int deviceNum )
 {
 #ifdef HAVE_CUDA
-   int major = CudaDeviceInfo::getArchitectureMajor( deviceNum );
-   int minor = CudaDeviceInfo::getArchitectureMinor( deviceNum );
+   int major = DeviceInfo::getArchitectureMajor( deviceNum );
+   int minor = DeviceInfo::getArchitectureMinor( deviceNum );
    switch( major )
    {
       case 1:   // Tesla generation, G80, G8x, G9x classes
@@ -209,19 +209,19 @@ getCudaCoresPerMultiprocessors( int deviceNum )
 }
 
 inline int
-CudaDeviceInfo::
+DeviceInfo::
 getCudaCores( int deviceNum )
 {
 #ifdef HAVE_CUDA
-   return CudaDeviceInfo::getCudaMultiprocessors( deviceNum ) *
-          CudaDeviceInfo::getCudaCoresPerMultiprocessors( deviceNum );
+   return DeviceInfo::getCudaMultiprocessors( deviceNum ) *
+          DeviceInfo::getCudaCoresPerMultiprocessors( deviceNum );
 #else
    throw Exceptions::CudaSupportMissing();
 #endif
 }
 
 inline int
-CudaDeviceInfo::
+DeviceInfo::
 getRegistersPerMultiprocessor( int deviceNum )
 {
 #ifdef HAVE_CUDA
@@ -239,5 +239,5 @@ getRegistersPerMultiprocessor( int deviceNum )
 #endif
 }
 
-} // namespace Devices
+} // namespace Cuda
 } // namespace TNL
diff --git a/src/TNL/Cuda/LaunchHelpers.h b/src/TNL/Cuda/LaunchHelpers.h
new file mode 100644
index 000000000..aaca4a67d
--- /dev/null
+++ b/src/TNL/Cuda/LaunchHelpers.h
@@ -0,0 +1,162 @@
+/***************************************************************************
+                          LaunchHelpers.h  -  description
+                             -------------------
+    begin                : Aug 19, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Math.h>
+
+namespace TNL {
+namespace Cuda {
+
+inline constexpr int getMaxGridSize()
+{
+   return 65535;
+}
+
+inline constexpr int getMaxBlockSize()
+{
+   return 1024;
+}
+
+inline constexpr int getWarpSize()
+{
+   return 32;
+}
+
+#ifdef HAVE_CUDA
+__device__ inline int getGlobalThreadIdx( const int gridIdx = 0,
+                                          const int gridSize = getMaxGridSize() )
+{
+   return ( gridIdx * gridSize + blockIdx.x ) * blockDim.x + threadIdx.x;
+}
+
+__device__ inline int getGlobalThreadIdx_x( const dim3& gridIdx )
+{
+   return ( gridIdx.x * getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+}
+
+__device__ inline int getGlobalThreadIdx_y( const dim3& gridIdx )
+{
+   return ( gridIdx.y * getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
+}
+
+__device__ inline int getGlobalThreadIdx_z( const dim3& gridIdx )
+{
+   return ( gridIdx.z * getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z;
+}
+#endif
+
+inline int getNumberOfBlocks( const int threads,
+                              const int blockSize )
+{
+   return roundUpDivision( threads, blockSize );
+}
+
+inline int getNumberOfGrids( const int blocks,
+                             const int gridSize = getMaxGridSize() )
+{
+   return roundUpDivision( blocks, gridSize );
+}
+
+#ifdef HAVE_CUDA
+inline void setupThreads( const dim3& blockSize,
+                          dim3& blocksCount,
+                          dim3& gridsCount,
+                          long long int xThreads,
+                          long long int yThreads = 0,
+                          long long int zThreads = 0 )
+{
+   blocksCount.x = max( 1, xThreads / blockSize.x + ( xThreads % blockSize.x != 0 ) );
+   blocksCount.y = max( 1, yThreads / blockSize.y + ( yThreads % blockSize.y != 0 ) );
+   blocksCount.z = max( 1, zThreads / blockSize.z + ( zThreads % blockSize.z != 0 ) );
+
+   /****
+    * TODO: Fix the following:
+    * I do not known how to get max grid size in kernels :(
+    *
+    * Also, this is very slow. */
+   /*int currentDevice( 0 );
+   cudaGetDevice( currentDevice );
+   cudaDeviceProp properties;
+   cudaGetDeviceProperties( &properties, currentDevice );
+   gridsCount.x = blocksCount.x / properties.maxGridSize[ 0 ] + ( blocksCount.x % properties.maxGridSize[ 0 ] != 0 );
+   gridsCount.y = blocksCount.y / properties.maxGridSize[ 1 ] + ( blocksCount.y % properties.maxGridSize[ 1 ] != 0 );
+   gridsCount.z = blocksCount.z / properties.maxGridSize[ 2 ] + ( blocksCount.z % properties.maxGridSize[ 2 ] != 0 );
+   */
+   gridsCount.x = blocksCount.x / getMaxGridSize() + ( blocksCount.x % getMaxGridSize() != 0 );
+   gridsCount.y = blocksCount.y / getMaxGridSize() + ( blocksCount.y % getMaxGridSize() != 0 );
+   gridsCount.z = blocksCount.z / getMaxGridSize() + ( blocksCount.z % getMaxGridSize() != 0 );
+}
+
+inline void setupGrid( const dim3& blocksCount,
+                       const dim3& gridsCount,
+                       const dim3& gridIdx,
+                       dim3& gridSize )
+{
+   /* TODO: this is ext slow!!!!
+   int currentDevice( 0 );
+   cudaGetDevice( &currentDevice );
+   cudaDeviceProp properties;
+   cudaGetDeviceProperties( &properties, currentDevice );*/
+
+   /****
+    * TODO: fix the following
+   if( gridIdx.x < gridsCount.x )
+      gridSize.x = properties.maxGridSize[ 0 ];
+   else
+      gridSize.x = blocksCount.x % properties.maxGridSize[ 0 ];
+
+   if( gridIdx.y < gridsCount.y )
+      gridSize.y = properties.maxGridSize[ 1 ];
+   else
+      gridSize.y = blocksCount.y % properties.maxGridSize[ 1 ];
+
+   if( gridIdx.z < gridsCount.z )
+      gridSize.z = properties.maxGridSize[ 2 ];
+   else
+      gridSize.z = blocksCount.z % properties.maxGridSize[ 2 ];*/
+
+   if( gridIdx.x < gridsCount.x - 1 )
+      gridSize.x = getMaxGridSize();
+   else
+      gridSize.x = blocksCount.x % getMaxGridSize();
+
+   if( gridIdx.y < gridsCount.y - 1 )
+      gridSize.y = getMaxGridSize();
+   else
+      gridSize.y = blocksCount.y % getMaxGridSize();
+
+   if( gridIdx.z < gridsCount.z - 1 )
+      gridSize.z = getMaxGridSize();
+   else
+      gridSize.z = blocksCount.z % getMaxGridSize();
+}
+
+inline std::ostream& operator<<( std::ostream& str, const dim3& d )
+{
+   str << "( " << d.x << ", " << d.y << ", " << d.z << " )";
+   return str;
+}
+
+inline void printThreadsSetup( const dim3& blockSize,
+                               const dim3& blocksCount,
+                               const dim3& gridSize,
+                               const dim3& gridsCount,
+                               std::ostream& str = std::cout )
+{
+   str << "Block size: " << blockSize << std::endl
+       << " Blocks count: " << blocksCount << std::endl
+       << " Grid size: " << gridSize << std::endl
+       << " Grids count: " << gridsCount << std::endl;
+}
+#endif
+
+} // namespace Cuda
+} // namespace TNL
diff --git a/src/TNL/Cuda/MemoryHelpers.h b/src/TNL/Cuda/MemoryHelpers.h
new file mode 100644
index 000000000..cb214f5d0
--- /dev/null
+++ b/src/TNL/Cuda/MemoryHelpers.h
@@ -0,0 +1,103 @@
+/***************************************************************************
+                          MemoryHelpers.h  -  description
+                             -------------------
+    begin                : Aug 19, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <iostream>
+
+#include <TNL/Cuda/CheckDevice.h>
+#include <TNL/Exceptions/CudaSupportMissing.h>
+#include <TNL/Exceptions/CudaBadAlloc.h>
+
+namespace TNL {
+namespace Cuda {
+
+template< typename ObjectType >
+[[deprecated("Allocators and MemoryOperations hould be used instead.")]]
+ObjectType* passToDevice( const ObjectType& object )
+{
+#ifdef HAVE_CUDA
+   ObjectType* deviceObject;
+   if( cudaMalloc( ( void** ) &deviceObject,
+                   ( size_t ) sizeof( ObjectType ) ) != cudaSuccess )
+      throw Exceptions::CudaBadAlloc();
+   if( cudaMemcpy( ( void* ) deviceObject,
+                   ( void* ) &object,
+                   sizeof( ObjectType ),
+                   cudaMemcpyHostToDevice ) != cudaSuccess )
+   {
+      TNL_CHECK_CUDA_DEVICE;
+      cudaFree( ( void* ) deviceObject );
+      TNL_CHECK_CUDA_DEVICE;
+      return 0;
+   }
+   return deviceObject;
+#else
+   throw Exceptions::CudaSupportMissing();
+#endif
+}
+
+template< typename ObjectType >
+[[deprecated("Allocators and MemoryOperations hould be used instead.")]]
+ObjectType passFromDevice( const ObjectType* object )
+{
+#ifdef HAVE_CUDA
+   ObjectType aux;
+   cudaMemcpy( ( void* ) aux,
+               ( void* ) &object,
+               sizeof( ObjectType ),
+               cudaMemcpyDeviceToHost );
+   TNL_CHECK_CUDA_DEVICE;
+   return aux;
+#else
+   throw Exceptions::CudaSupportMissing();
+#endif
+}
+
+template< typename ObjectType >
+[[deprecated("Allocators and MemoryOperations hould be used instead.")]]
+void passFromDevice( const ObjectType* deviceObject,
+                     ObjectType& hostObject )
+{
+#ifdef HAVE_CUDA
+   cudaMemcpy( ( void* ) &hostObject,
+               ( void* ) deviceObject,
+               sizeof( ObjectType ),
+               cudaMemcpyDeviceToHost );
+   TNL_CHECK_CUDA_DEVICE;
+#else
+   throw Exceptions::CudaSupportMissing();
+#endif
+}
+
+template< typename ObjectType >
+[[deprecated("Allocators and MemoryOperations hould be used instead.")]]
+void freeFromDevice( ObjectType* deviceObject )
+{
+#ifdef HAVE_CUDA
+   cudaFree( ( void* ) deviceObject );
+   TNL_CHECK_CUDA_DEVICE;
+#else
+   throw Exceptions::CudaSupportMissing();
+#endif
+}
+
+template< typename ObjectType >
+void print( const ObjectType* deviceObject, std::ostream& str = std::cout )
+{
+#ifdef HAVE_CUDA
+   ObjectType hostObject;
+   passFromDevice( deviceObject, hostObject );
+   str << hostObject;
+#endif
+}
+
+} // namespace Cuda
+} // namespace TNL
diff --git a/src/TNL/CudaSharedMemory.h b/src/TNL/Cuda/SharedMemory.h
similarity index 78%
rename from src/TNL/CudaSharedMemory.h
rename to src/TNL/Cuda/SharedMemory.h
index ec9a43c20..29851952c 100644
--- a/src/TNL/CudaSharedMemory.h
+++ b/src/TNL/Cuda/SharedMemory.h
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          CudaSharedMemory.h  -  description
+                          SharedMemory.h  -  description
                              -------------------
     begin                : Oct 18, 2017
     copyright            : (C) 2017 by Tomas Oberhuber et al.
@@ -26,11 +26,11 @@
  *
  * Until CUDA 8.0, it was possible to use reinterpret_cast this way:
  *
- *    template< typename Element, size_t Alignment >
- *    __device__ Element* Cuda::getSharedMemory()
+ *    template< typename T, size_t Alignment >
+ *    __device__ T* getSharedMemory()
  *    {
  *       extern __shared__ __align__ ( Alignment ) unsigned char __sdata[];
- *       return reinterpret_cast< Element* >( __sdata );
+ *       return reinterpret_cast< T* >( __sdata );
  *    }
  *
  * But since CUDA 9.0 there is a new restriction that the alignment of the
@@ -44,12 +44,13 @@
 #include <stdint.h>
 
 namespace TNL {
+namespace Cuda {
 
 template< typename T, std::size_t _alignment = CHAR_BIT * sizeof(T) >
-struct CudaSharedMemory {};
+struct SharedMemory;
 
 template< typename T >
-struct CudaSharedMemory< T, 8 >
+struct SharedMemory< T, 8 >
 {
    __device__ inline operator T* ()
    {
@@ -65,7 +66,7 @@ struct CudaSharedMemory< T, 8 >
 };
 
 template< typename T >
-struct CudaSharedMemory< T, 16 >
+struct SharedMemory< T, 16 >
 {
    __device__ inline operator T* ()
    {
@@ -81,7 +82,7 @@ struct CudaSharedMemory< T, 16 >
 };
 
 template< typename T >
-struct CudaSharedMemory< T, 32 >
+struct SharedMemory< T, 32 >
 {
    __device__ inline operator T* ()
    {
@@ -97,7 +98,7 @@ struct CudaSharedMemory< T, 32 >
 };
 
 template< typename T >
-struct CudaSharedMemory< T, 64 >
+struct SharedMemory< T, 64 >
 {
    __device__ inline operator T* ()
    {
@@ -112,6 +113,25 @@ struct CudaSharedMemory< T, 64 >
    }
 };
 
+template< typename T >
+__device__ inline T* getSharedMemory()
+{
+   return SharedMemory< T >{};
+}
+
+// helper functions for indexing shared memory
+inline constexpr int getNumberOfSharedMemoryBanks()
+{
+   return 32;
+}
+
+template< typename Index >
+__device__ Index getInterleaving( const Index index )
+{
+   return index + index / Cuda::getNumberOfSharedMemoryBanks();
+}
+
+} // namespace Cuda
 } // namespace TNL
 
 #endif
diff --git a/src/TNL/CudaStreamPool.h b/src/TNL/Cuda/StreamPool.h
similarity index 73%
rename from src/TNL/CudaStreamPool.h
rename to src/TNL/Cuda/StreamPool.h
index 1dd2b7907..59bf38a57 100644
--- a/src/TNL/CudaStreamPool.h
+++ b/src/TNL/Cuda/StreamPool.h
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          CudaStreamPool.h  -  description
+                          StreamPool.h  -  description
                              -------------------
     begin                : Oct 14, 2016
     copyright            : (C) 2016 by Tomas Oberhuber et al.
@@ -15,22 +15,20 @@
 #include <stdlib.h>
 #include <unordered_map>
 
-#include <TNL/Devices/Host.h>
-#include <TNL/Devices/Cuda.h>
-
 namespace TNL {
+namespace Cuda {
 
 #ifdef HAVE_CUDA
-class CudaStreamPool
+class StreamPool
 {
    public:
       // stop the compiler generating methods of copy the object
-      CudaStreamPool( CudaStreamPool const& copy ) = delete;
-      CudaStreamPool& operator=( CudaStreamPool const& copy ) = delete;
+      StreamPool( StreamPool const& copy ) = delete;
+      StreamPool& operator=( StreamPool const& copy ) = delete;
 
-      inline static CudaStreamPool& getInstance()
+      inline static StreamPool& getInstance()
       {
-         static CudaStreamPool instance;
+         static StreamPool instance;
          return instance;
       }
 
@@ -47,14 +45,14 @@ class CudaStreamPool
 
    private:
       // private constructor of the singleton
-      inline CudaStreamPool()
+      inline StreamPool()
       {
-         atexit( CudaStreamPool::free_atexit );
+         atexit( StreamPool::free_atexit );
       }
 
       inline static void free_atexit( void )
       {
-         CudaStreamPool::getInstance().free();
+         StreamPool::getInstance().free();
       }
 
    protected:
@@ -70,5 +68,6 @@ class CudaStreamPool
 };
 #endif
 
+} // namespace Cuda
 } // namespace TNL
 
diff --git a/src/TNL/Devices/Cuda.h b/src/TNL/Devices/Cuda.h
index 122c466c9..853cd2e03 100644
--- a/src/TNL/Devices/Cuda.h
+++ b/src/TNL/Devices/Cuda.h
@@ -16,7 +16,7 @@
 #include <TNL/Assert.h>
 #include <TNL/Pointers/SmartPointersRegister.h>
 #include <TNL/Timer.h>
-#include <TNL/Devices/CudaCallable.h>
+#include <TNL/Cuda/CudaCallable.h>
 #include <TNL/Config/ConfigDescription.h>
 #include <TNL/Config/ParameterContainer.h>
 
@@ -31,131 +31,8 @@ public:
    static inline bool setup( const Config::ParameterContainer& parameters,
                              const String& prefix = "" );
 
-   __cuda_callable__ static inline constexpr int getMaxGridSize();
-
-   __cuda_callable__ static inline constexpr int getMaxBlockSize();
-
-   __cuda_callable__ static inline constexpr int getWarpSize();
-
-   __cuda_callable__ static inline constexpr int getNumberOfSharedMemoryBanks();
-
    static inline constexpr int getGPUTransferBufferSize();
 
-#ifdef HAVE_CUDA
-   /***
-    * This function is obsolete and should be replaced by the following functions.
-    */
-   __device__ static inline int
-   getGlobalThreadIdx( const int gridIdx = 0,
-                       const int gridSize = getMaxGridSize() );   
-
-   __device__ static inline int
-   getGlobalThreadIdx_x( const dim3& gridIdx );
-
-   __device__ static inline int
-   getGlobalThreadIdx_y( const dim3& gridIdx );
-
-   __device__ static inline int
-   getGlobalThreadIdx_z( const dim3& gridIdx );   
-#endif
-
-   /****
-    * This functions helps to count number of CUDA blocks depending on the 
-    * number of the CUDA threads and the block size.
-    * It is obsolete and it will be replaced by setupThreads.
-    */
-   static inline int getNumberOfBlocks( const int threads,
-                                        const int blockSize );
-
-   /****
-    * This functions helps to count number of CUDA grids depending on the 
-    * number of the CUDA blocks and maximum grid size.
-    * It is obsolete and it will be replaced by setupThreads.
-    */
-   static inline int getNumberOfGrids( const int blocks,
-                                       const int gridSize = getMaxGridSize() );
-   
-#ifdef HAVE_CUDA   
-   /*! This method sets up gridSize and computes number of grids depending
-    *  on total number of CUDA threads.
-    */
-   static void setupThreads( const dim3& blockSize,
-                             dim3& blocksCount,
-                             dim3& gridsCount,
-                             long long int xThreads,
-                             long long int yThreads = 0,
-                             long long int zThreads = 0 );
-   
-   /*! This method sets up grid size when one iterates over more grids.
-    * If gridIdx.? < gridsCount.? then the gridSize.? is set to maximum
-    * allowed by CUDA. Otherwise gridSize.? is set to the size of the grid
-    * in the last loop i.e. blocksCount.? % maxGridSize.?.
-    */
-   static void setupGrid( const dim3& blocksCount,
-                          const dim3& gridsCount,
-                          const dim3& gridIdx,
-                          dim3& gridSize );
-   
-   static void printThreadsSetup( const dim3& blockSize,
-                                  const dim3& blocksCount,
-                                  const dim3& gridSize,
-                                  const dim3& gridsCount,
-                                  std::ostream& str = std::cout );
-#endif   
-
-   template< typename ObjectType >
-   static ObjectType* passToDevice( const ObjectType& object );
-
-   template< typename ObjectType >
-   static ObjectType passFromDevice( const ObjectType* object );
-
-   template< typename ObjectType >
-   static void passFromDevice( const ObjectType* deviceObject,
-                               ObjectType& hostObject );
-
-   template< typename ObjectType >
-   static void freeFromDevice( ObjectType* object );
-
-   template< typename ObjectType >
-   static void print( const ObjectType* object, std::ostream& str = std::cout );
-
-#ifdef HAVE_CUDA
-   template< typename Index >
-   static __device__ Index getInterleaving( const Index index );
-
-   /****
-    * Declaration of variables for dynamic shared memory is difficult in
-    * templated functions. For example, the following does not work for
-    * different types T:
-    *
-    *    template< typename T >
-    *    void foo()
-    *    {
-    *        extern __shared__ T shx[];
-    *    }
-    *
-    * This is because extern variables must be declared exactly once. In
-    * templated functions we need to have same variable name with different
-    * type, which causes the conflict. In CUDA samples they solve the problem
-    * using template specialization via classes, but using one base type and
-    * reinterpret_cast works too.
-    * See http://stackoverflow.com/a/19339004/4180822 for reference.
-    */
-   template< typename Element >
-   static __device__ Element* getSharedMemory();
-#endif
-
-#ifdef HAVE_CUDA
-   /****
-    * I do not know why, but it is more reliable to pass the error code instead
-    * of calling cudaGetLastError() inside the method.
-    * We recommend to use macro 'TNL_CHECK_CUDA_DEVICE' defined bellow.
-    */
-   static inline void checkDevice( const char* file_name, int line, cudaError error );
-#else
-   static inline void checkDevice() {}
-#endif
-
    static inline void insertSmartPointer( Pointers::SmartPointer* pointer );
 
    static inline void removeSmartPointer( Pointers::SmartPointer* pointer );
@@ -180,18 +57,6 @@ public:
    static inline Pointers::SmartPointersRegister& getSmartPointersRegister();
 };
 
-#ifdef HAVE_CUDA
-#define TNL_CHECK_CUDA_DEVICE ::TNL::Devices::Cuda::checkDevice( __FILE__, __LINE__, cudaGetLastError() )
-#else
-#define TNL_CHECK_CUDA_DEVICE ::TNL::Devices::Cuda::checkDevice()
-#endif
-
-#ifdef HAVE_CUDA
-namespace {
-   std::ostream& operator << ( std::ostream& str, const dim3& d );
-}
-#endif
-
 #ifdef HAVE_CUDA
 #if __CUDA_ARCH__ < 600
 namespace {
diff --git a/src/TNL/Devices/CudaDeviceInfo.h b/src/TNL/Devices/CudaDeviceInfo.h
deleted file mode 100644
index 9eefe3bad..000000000
--- a/src/TNL/Devices/CudaDeviceInfo.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/***************************************************************************
-                          CudaDeviceInfo.h  -  description
-                             -------------------
-    begin                : Jun 21, 2015
-    copyright            : (C) 2007 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <stdlib.h>
-
-#include <TNL/String.h>
-
-namespace TNL {
-namespace Devices {
-
-class CudaDeviceInfo
-{
-   public:
-
-      static int getNumberOfDevices();
-
-      static int getActiveDevice();
-
-      static String getDeviceName( int deviceNum );
-
-      static int getArchitectureMajor( int deviceNum );
-
-      static int getArchitectureMinor( int deviceNum );
-
-      static int getClockRate( int deviceNum );
-
-      static size_t getGlobalMemory( int deviceNum );
-
-      static size_t getFreeGlobalMemory();
-
-      static int getMemoryClockRate( int deviceNum );
-
-      static bool getECCEnabled( int deviceNum );
-
-      static int getCudaMultiprocessors( int deviceNum );
-
-      static int getCudaCoresPerMultiprocessors( int deviceNum );
-
-      static int getCudaCores( int deviceNum );
-
-      static int getRegistersPerMultiprocessor( int deviceNum );
-};
-
-} // namespace Devices
-} // namespace TNL
-
-#include <TNL/Devices/CudaDeviceInfo_impl.h>
diff --git a/src/TNL/Devices/Cuda_impl.h b/src/TNL/Devices/Cuda_impl.h
index b758584bd..6d3daa356 100644
--- a/src/TNL/Devices/Cuda_impl.h
+++ b/src/TNL/Devices/Cuda_impl.h
@@ -12,11 +12,10 @@
 
 #include <TNL/Math.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/Devices/CudaDeviceInfo.h>
+#include <TNL/Cuda/DeviceInfo.h>
 #include <TNL/Exceptions/CudaBadAlloc.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 #include <TNL/Exceptions/CudaRuntimeError.h>
-#include <TNL/CudaSharedMemory.h>
 
 namespace TNL {
 namespace Devices {
@@ -49,279 +48,30 @@ Cuda::setup( const Config::ParameterContainer& parameters,
    return true;
 }
 
-__cuda_callable__
-inline constexpr int Cuda::getMaxGridSize()
-{
-   return 65535;
-}
-
-__cuda_callable__
-inline constexpr int Cuda::getMaxBlockSize()
-{
-   return 1024;
-}
-
-__cuda_callable__
-inline constexpr int Cuda::getWarpSize()
-{
-   return 32;
-}
-
-__cuda_callable__
-inline constexpr int Cuda::getNumberOfSharedMemoryBanks()
-{
-   return 32;
-}
-
 inline constexpr int Cuda::getGPUTransferBufferSize()
 {
    return 1 << 20;
 }
 
-#ifdef HAVE_CUDA
-__device__ inline int Cuda::getGlobalThreadIdx( const int gridIdx, const int gridSize )
-{
-   return ( gridIdx * gridSize + blockIdx.x ) * blockDim.x + threadIdx.x;
-}
-
-__device__ inline int Cuda::getGlobalThreadIdx_x( const dim3& gridIdx )
-{
-   return ( gridIdx.x * getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-}
-
-__device__ inline int Cuda::getGlobalThreadIdx_y( const dim3& gridIdx )
-{
-   return ( gridIdx.y * getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
-}
-
-__device__ inline int Cuda::getGlobalThreadIdx_z( const dim3& gridIdx )
-{
-   return ( gridIdx.z * getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z;
-}
-#endif
-
-inline int Cuda::getNumberOfBlocks( const int threads,
-                                    const int blockSize )
-{
-   return roundUpDivision( threads, blockSize );
-}
-
-inline int Cuda::getNumberOfGrids( const int blocks,
-                                   const int gridSize )
-{
-   return roundUpDivision( blocks, gridSize );
-}
-
-#ifdef HAVE_CUDA
-inline void Cuda::setupThreads( const dim3& blockSize,
-                                dim3& blocksCount,
-                                dim3& gridsCount,
-                                long long int xThreads,
-                                long long int yThreads,
-                                long long int zThreads )
-{
-   blocksCount.x = max( 1, xThreads / blockSize.x + ( xThreads % blockSize.x != 0 ) );
-   blocksCount.y = max( 1, yThreads / blockSize.y + ( yThreads % blockSize.y != 0 ) );
-   blocksCount.z = max( 1, zThreads / blockSize.z + ( zThreads % blockSize.z != 0 ) );
-   
-   /****
-    * TODO: Fix the following:
-    * I do not known how to get max grid size in kernels :(
-    * 
-    * Also, this is very slow. */
-   /*int currentDevice( 0 );
-   cudaGetDevice( currentDevice );
-   cudaDeviceProp properties;
-   cudaGetDeviceProperties( &properties, currentDevice );
-   gridsCount.x = blocksCount.x / properties.maxGridSize[ 0 ] + ( blocksCount.x % properties.maxGridSize[ 0 ] != 0 );
-   gridsCount.y = blocksCount.y / properties.maxGridSize[ 1 ] + ( blocksCount.y % properties.maxGridSize[ 1 ] != 0 );
-   gridsCount.z = blocksCount.z / properties.maxGridSize[ 2 ] + ( blocksCount.z % properties.maxGridSize[ 2 ] != 0 );
-   */
-   gridsCount.x = blocksCount.x / getMaxGridSize() + ( blocksCount.x % getMaxGridSize() != 0 );
-   gridsCount.y = blocksCount.y / getMaxGridSize() + ( blocksCount.y % getMaxGridSize() != 0 );
-   gridsCount.z = blocksCount.z / getMaxGridSize() + ( blocksCount.z % getMaxGridSize() != 0 );
-}
-
-inline void Cuda::setupGrid( const dim3& blocksCount,
-                             const dim3& gridsCount,
-                             const dim3& gridIdx,
-                             dim3& gridSize )
-{
-   /* TODO: this is extremely slow!!!!
-   int currentDevice( 0 );
-   cudaGetDevice( &currentDevice );
-   cudaDeviceProp properties;
-   cudaGetDeviceProperties( &properties, currentDevice );*/
- 
-   /****
-    * TODO: fix the following
-   if( gridIdx.x < gridsCount.x )
-      gridSize.x = properties.maxGridSize[ 0 ];
-   else
-      gridSize.x = blocksCount.x % properties.maxGridSize[ 0 ];
-   
-   if( gridIdx.y < gridsCount.y )
-      gridSize.y = properties.maxGridSize[ 1 ];
-   else
-      gridSize.y = blocksCount.y % properties.maxGridSize[ 1 ];
-
-   if( gridIdx.z < gridsCount.z )
-      gridSize.z = properties.maxGridSize[ 2 ];
-   else
-      gridSize.z = blocksCount.z % properties.maxGridSize[ 2 ];*/
-   
-   if( gridIdx.x < gridsCount.x - 1 )
-      gridSize.x = getMaxGridSize();
-   else
-      gridSize.x = blocksCount.x % getMaxGridSize();
-   
-   if( gridIdx.y < gridsCount.y - 1 )
-      gridSize.y = getMaxGridSize();
-   else
-      gridSize.y = blocksCount.y % getMaxGridSize();
-
-   if( gridIdx.z < gridsCount.z - 1 )
-      gridSize.z = getMaxGridSize();
-   else
-      gridSize.z = blocksCount.z % getMaxGridSize();
-}
-
-inline void Cuda::printThreadsSetup( const dim3& blockSize,
-                                     const dim3& blocksCount,
-                                     const dim3& gridSize,
-                                     const dim3& gridsCount,
-                                     std::ostream& str )
-{
-   str << "Block size: " << blockSize << std::endl
-       << " Blocks count: " << blocksCount << std::endl
-       << " Grid size: " << gridSize << std::endl
-       << " Grids count: " << gridsCount << std::endl;
-}
-#endif
-
-
-template< typename ObjectType >
-ObjectType* Cuda::passToDevice( const ObjectType& object )
-{
-#ifdef HAVE_CUDA
-   ObjectType* deviceObject;
-   if( cudaMalloc( ( void** ) &deviceObject,
-                   ( size_t ) sizeof( ObjectType ) ) != cudaSuccess )
-      throw Exceptions::CudaBadAlloc();
-   if( cudaMemcpy( ( void* ) deviceObject,
-                   ( void* ) &object,
-                   sizeof( ObjectType ),
-                   cudaMemcpyHostToDevice ) != cudaSuccess )
-   {
-      TNL_CHECK_CUDA_DEVICE;
-      cudaFree( ( void* ) deviceObject );
-      TNL_CHECK_CUDA_DEVICE;
-      return 0;
-   }
-   return deviceObject;
-#else
-   throw Exceptions::CudaSupportMissing();
-#endif
-}
-
-template< typename ObjectType >
-ObjectType Cuda::passFromDevice( const ObjectType* object )
-{
-#ifdef HAVE_CUDA
-   ObjectType aux;
-   cudaMemcpy( ( void* ) aux,
-               ( void* ) &object,
-               sizeof( ObjectType ),
-               cudaMemcpyDeviceToHost );
-   TNL_CHECK_CUDA_DEVICE;
-   return aux;
-#else
-   throw Exceptions::CudaSupportMissing();
-#endif
-}
-
-template< typename ObjectType >
-void Cuda::passFromDevice( const ObjectType* deviceObject,
-                           ObjectType& hostObject )
-{
-#ifdef HAVE_CUDA
-   cudaMemcpy( ( void* ) &hostObject,
-               ( void* ) deviceObject,
-               sizeof( ObjectType ),
-               cudaMemcpyDeviceToHost );
-   TNL_CHECK_CUDA_DEVICE;
-#else
-   throw Exceptions::CudaSupportMissing();
-#endif
-}
-
-template< typename ObjectType >
-void Cuda::print( const ObjectType* deviceObject, std::ostream& str )
-{
-#ifdef HAVE_CUDA
-   ObjectType hostObject;
-   passFromDevice( deviceObject, hostObject );
-   str << hostObject;
-#endif
-}
-
-
-template< typename ObjectType >
-void Cuda::freeFromDevice( ObjectType* deviceObject )
-{
-#ifdef HAVE_CUDA
-   cudaFree( ( void* ) deviceObject );
-   TNL_CHECK_CUDA_DEVICE;
-#else
-   throw Exceptions::CudaSupportMissing();
-#endif
-}
-
-#ifdef HAVE_CUDA
-template< typename Index >
-__device__ Index Cuda::getInterleaving( const Index index )
-{
-   return index + index / Cuda::getNumberOfSharedMemoryBanks();
-}
-
-template< typename Element >
-__device__ Element* Cuda::getSharedMemory()
-{
-   return CudaSharedMemory< Element >();
-}
-#endif
-
-#ifdef HAVE_CUDA
-inline void Cuda::checkDevice( const char* file_name, int line, cudaError error )
-{
-   if( error != cudaSuccess )
-      throw Exceptions::CudaRuntimeError( error, file_name, line );
-}
-#endif
-
 inline void Cuda::insertSmartPointer( Pointers::SmartPointer* pointer )
 {
-   getSmartPointersRegister().insert( pointer, Devices::CudaDeviceInfo::getActiveDevice() );
+   getSmartPointersRegister().insert( pointer, TNL::Cuda::DeviceInfo::getActiveDevice() );
 }
 
 inline void Cuda::removeSmartPointer( Pointers::SmartPointer* pointer )
 {
-   getSmartPointersRegister().remove( pointer, Devices::CudaDeviceInfo::getActiveDevice() );
+   getSmartPointersRegister().remove( pointer, TNL::Cuda::DeviceInfo::getActiveDevice() );
 }
 
 inline bool Cuda::synchronizeDevice( int deviceId )
 {
 #ifdef HAVE_CUDA
-#ifdef HAVE_CUDA_UNIFIED_MEMORY
-   return true;
-#else
    if( deviceId < 0 )
-      deviceId = Devices::CudaDeviceInfo::getActiveDevice();
+      deviceId = TNL::Cuda::DeviceInfo::getActiveDevice();
    getSmartPointersSynchronizationTimer().start();
    bool b = getSmartPointersRegister().synchronizeDevice( deviceId );
    getSmartPointersSynchronizationTimer().stop();
    return b;
-#endif
 #else
    return true;
 #endif
@@ -339,16 +89,6 @@ inline Pointers::SmartPointersRegister& Cuda::getSmartPointersRegister()
    return reg;
 }
 
-#ifdef HAVE_CUDA
-namespace {
-   std::ostream& operator << ( std::ostream& str, const dim3& d )
-   {
-      str << "( " << d.x << ", " << d.y << ", " << d.z << " )";
-      return str;
-   }
-}
-#endif
-
 // double-precision atomicAdd function for Maxwell and older GPUs
 // copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions
 #ifdef HAVE_CUDA
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase1D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase1D_impl.h
index 55129c4e1..4dac64a23 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase1D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase1D_impl.h
@@ -22,7 +22,7 @@ initInterface( const MeshFunctionPointer& _input,
     const MeshType& mesh = _input->getMesh();
     
     const int cudaBlockSize( 16 );
-    int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize );
+    int numBlocksX = Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize );
     dim3 blockSize( cudaBlockSize );
     dim3 gridSize( numBlocksX );
     Devices::Cuda::synchronizeDevice();
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h
index cddf4f9cb..947a4be06 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h
@@ -25,8 +25,8 @@ initInterface( const MeshFunctionPointer& _input,
     const MeshType& mesh = _input->getMesh();
     
     const int cudaBlockSize( 16 );
-    int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize );
-    int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize );
+    int numBlocksX = Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize );
+    int numBlocksY = Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize );
     dim3 blockSize( cudaBlockSize, cudaBlockSize );
     dim3 gridSize( numBlocksX, numBlocksY );
     Devices::Cuda::synchronizeDevice();
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h
index 32548abcf..eb0665c7e 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h
@@ -23,9 +23,9 @@ initInterface( const MeshFunctionPointer& _input,
     const MeshType& mesh = _input->getMesh();
     
     const int cudaBlockSize( 8 );
-    int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize );
-    int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize );
-    int numBlocksZ = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().z(), cudaBlockSize );
+    int numBlocksX = Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize );
+    int numBlocksY = Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize );
+    int numBlocksZ = Cuda::getNumberOfBlocks( mesh.getDimensions().z(), cudaBlockSize );
     if( cudaBlockSize * cudaBlockSize * cudaBlockSize > 1024 || numBlocksX > 1024 || numBlocksY > 1024 || numBlocksZ > 64 )
       std::cout << "Invalid kernel call. Dimensions of grid are max: [1024,1024,64], and maximum threads per block are 1024!" << std::endl;
     dim3 blockSize( cudaBlockSize, cudaBlockSize, cudaBlockSize );
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h
index f2f033ccb..52c2ebbee 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h
@@ -105,7 +105,7 @@ solve( const MeshPointer& mesh,
          // TODO: CUDA code
 #ifdef HAVE_CUDA
           const int cudaBlockSize( 16 );
-          int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize );
+          int numBlocksX = Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize );
           dim3 blockSize( cudaBlockSize );
           dim3 gridSize( numBlocksX );
           
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h
index e5638c11d..c5a0f74cc 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h
@@ -251,8 +251,8 @@ solve( const MeshPointer& mesh,
         const int cudaBlockSize( 16 );
         
         // Setting number of threads and blocks for kernel
-        int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x() - vecLowerOverlaps[0] - vecUpperOverlaps[0], cudaBlockSize );
-        int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y() - vecLowerOverlaps[1] - vecUpperOverlaps[1], cudaBlockSize );
+        int numBlocksX = Cuda::getNumberOfBlocks( mesh->getDimensions().x() - vecLowerOverlaps[0] - vecUpperOverlaps[0], cudaBlockSize );
+        int numBlocksY = Cuda::getNumberOfBlocks( mesh->getDimensions().y() - vecLowerOverlaps[1] - vecUpperOverlaps[1], cudaBlockSize );
         dim3 blockSize( cudaBlockSize, cudaBlockSize );
         dim3 gridSize( numBlocksX, numBlocksY );
         
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h
index 325b626f7..3fce5564e 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h
@@ -263,9 +263,9 @@ solve( const MeshPointer& mesh,
         const int cudaBlockSize( 8 );
         
         // Getting the number of blocks in grid in each direction (without overlaps bcs we dont calculate on overlaps)
-        int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x() - vecLowerOverlaps[0] - vecUpperOverlaps[0], cudaBlockSize );
-        int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y() - vecLowerOverlaps[1] - vecUpperOverlaps[1], cudaBlockSize );
-        int numBlocksZ = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().z() - vecLowerOverlaps[2] - vecUpperOverlaps[2], cudaBlockSize ); 
+        int numBlocksX = Cuda::getNumberOfBlocks( mesh->getDimensions().x() - vecLowerOverlaps[0] - vecUpperOverlaps[0], cudaBlockSize );
+        int numBlocksY = Cuda::getNumberOfBlocks( mesh->getDimensions().y() - vecLowerOverlaps[1] - vecUpperOverlaps[1], cudaBlockSize );
+        int numBlocksZ = Cuda::getNumberOfBlocks( mesh->getDimensions().z() - vecLowerOverlaps[2] - vecUpperOverlaps[2], cudaBlockSize ); 
         if( cudaBlockSize * cudaBlockSize * cudaBlockSize > 1024 || numBlocksX > 1024 || numBlocksY > 1024 || numBlocksZ > 64 )
           std::cout << "Invalid kernel call. Dimensions of grid are max: [1024,1024,64], and maximum threads per block are 1024!" << std::endl;
         
diff --git a/src/TNL/File.hpp b/src/TNL/File.hpp
index 19a9eaa06..a3eb66066 100644
--- a/src/TNL/File.hpp
+++ b/src/TNL/File.hpp
@@ -17,6 +17,7 @@
 
 #include <TNL/File.h>
 #include <TNL/Assert.h>
+#include <TNL/Cuda/CheckDevice.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 #include <TNL/Exceptions/FileSerializationError.h>
 #include <TNL/Exceptions/FileDeserializationError.h>
diff --git a/src/TNL/Functions/FunctionAdapter.h b/src/TNL/Functions/FunctionAdapter.h
index b9c358866..b763ee476 100644
--- a/src/TNL/Functions/FunctionAdapter.h
+++ b/src/TNL/Functions/FunctionAdapter.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Devices/CudaCallable.h>
+#include <TNL/Cuda/CudaCallable.h>
 #include <TNL/Config/ParameterContainer.h>
 #include <TNL/Functions/Domain.h>
 
diff --git a/src/TNL/Functions/OperatorFunction.h b/src/TNL/Functions/OperatorFunction.h
index 1f1e89b02..cc46d557a 100644
--- a/src/TNL/Functions/OperatorFunction.h
+++ b/src/TNL/Functions/OperatorFunction.h
@@ -11,7 +11,7 @@
 #pragma once
 
 #include <type_traits>
-#include <TNL/Devices/CudaCallable.h>
+#include <TNL/Cuda/CudaCallable.h>
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Solvers/PDE/BoundaryConditionsSetter.h>
 
diff --git a/src/TNL/Functions/TestFunction_impl.h b/src/TNL/Functions/TestFunction_impl.h
index e2bdce1f1..918f24107 100644
--- a/src/TNL/Functions/TestFunction_impl.h
+++ b/src/TNL/Functions/TestFunction_impl.h
@@ -11,6 +11,8 @@
 #pragma once
 
 #include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/MemoryHelpers.h>
+
 #include <TNL/Functions/Analytic/Constant.h>
 #include <TNL/Functions/Analytic/ExpBump.h>
 #include <TNL/Functions/Analytic/SinBumps.h>
@@ -137,7 +139,7 @@ setupFunction( const Config::ParameterContainer& parameters,
    }
    if( std::is_same< Device, Devices::Cuda >::value )
    {
-      this->function = Devices::Cuda::passToDevice( *auxFunction );
+      this->function = Cuda::passToDevice( *auxFunction );
       delete auxFunction;
       TNL_CHECK_CUDA_DEVICE;
    }
@@ -166,7 +168,7 @@ setupOperator( const Config::ParameterContainer& parameters,
    }
    if( std::is_same< Device, Devices::Cuda >::value )
    {
-      this->operator_ = Devices::Cuda::passToDevice( *auxOperator );
+      this->operator_ = Cuda::passToDevice( *auxOperator );
       delete auxOperator;
       TNL_CHECK_CUDA_DEVICE;
    }
@@ -736,7 +738,7 @@ deleteFunction()
    if( std::is_same< Device, Devices::Cuda >::value )
    {
       if( function )
-         Devices::Cuda::freeFromDevice( ( FunctionType * ) function );
+         Cuda::freeFromDevice( ( FunctionType * ) function );
    }
 }
 
@@ -756,7 +758,7 @@ deleteOperator()
    if( std::is_same< Device, Devices::Cuda >::value )
    {
       if( operator_ )
-         Devices::Cuda::freeFromDevice( ( OperatorType * ) operator_ );
+         Cuda::freeFromDevice( ( OperatorType * ) operator_ );
    }
 }
 
@@ -912,7 +914,7 @@ printFunction( std::ostream& str ) const
    }
    if( std::is_same< Device, Devices::Cuda >::value )
    {
-      Devices::Cuda::print( f, str );
+      Cuda::print( f, str );
       return str;
    }
 }
diff --git a/src/TNL/Logger_impl.h b/src/TNL/Logger_impl.h
index 0e1dd8dc6..6a3da0f96 100644
--- a/src/TNL/Logger_impl.h
+++ b/src/TNL/Logger_impl.h
@@ -14,7 +14,7 @@
 #include <iomanip>
 
 #include <TNL/Logger.h>
-#include <TNL/Devices/CudaDeviceInfo.h>
+#include <TNL/Cuda/DeviceInfo.h>
 #include <TNL/Devices/SystemInfo.h>
 
 namespace TNL {
@@ -95,19 +95,19 @@ Logger::writeSystemInformation( const Config::ParameterContainer& parameters )
    //   for( int i = 0; i < devices; i++ )
    //   {
    //      logger.writeParameter< int >( "Device no.", i, 1 );
-         const int i = Devices::CudaDeviceInfo::getActiveDevice();
-         writeParameter< String >( "Name", Devices::CudaDeviceInfo::getDeviceName( i ), 2 );
-         const String deviceArch = convertToString( Devices::CudaDeviceInfo::getArchitectureMajor( i ) ) + "." +
-                                   convertToString( Devices::CudaDeviceInfo::getArchitectureMinor( i ) );
+         const int i = Cuda::DeviceInfo::getActiveDevice();
+         writeParameter< String >( "Name", Cuda::DeviceInfo::getDeviceName( i ), 2 );
+         const String deviceArch = convertToString( Cuda::DeviceInfo::getArchitectureMajor( i ) ) + "." +
+                                   convertToString( Cuda::DeviceInfo::getArchitectureMinor( i ) );
          writeParameter< String >( "Architecture", deviceArch, 2 );
-         writeParameter< int >( "CUDA cores", Devices::CudaDeviceInfo::getCudaCores( i ), 2 );
-         const double clockRate = ( double ) Devices::CudaDeviceInfo::getClockRate( i ) / 1.0e3;
+         writeParameter< int >( "CUDA cores", Cuda::DeviceInfo::getCudaCores( i ), 2 );
+         const double clockRate = ( double ) Cuda::DeviceInfo::getClockRate( i ) / 1.0e3;
          writeParameter< double >( "Clock rate (in MHz)", clockRate, 2 );
-         const double globalMemory = ( double ) Devices::CudaDeviceInfo::getGlobalMemory( i ) / 1.0e9;
+         const double globalMemory = ( double ) Cuda::DeviceInfo::getGlobalMemory( i ) / 1.0e9;
          writeParameter< double >( "Global memory (in GB)", globalMemory, 2 );
-         const double memoryClockRate = ( double ) Devices::CudaDeviceInfo::getMemoryClockRate( i ) / 1.0e3;
+         const double memoryClockRate = ( double ) Cuda::DeviceInfo::getMemoryClockRate( i ) / 1.0e3;
          writeParameter< double >( "Memory clock rate (in Mhz)", memoryClockRate, 2 );
-         writeParameter< bool >( "ECC enabled", Devices::CudaDeviceInfo::getECCEnabled( i ), 2 );
+         writeParameter< bool >( "ECC enabled", Cuda::DeviceInfo::getECCEnabled( i ), 2 );
    //   }
    }
    return true;
diff --git a/src/TNL/Math.h b/src/TNL/Math.h
index b7591bf65..321cc7ce3 100644
--- a/src/TNL/Math.h
+++ b/src/TNL/Math.h
@@ -15,7 +15,7 @@
 #include <algorithm>
 
 #include <TNL/TypeTraits.h>
-#include <TNL/Devices/CudaCallable.h>
+#include <TNL/Cuda/CudaCallable.h>
 
 namespace TNL {
 
diff --git a/src/TNL/Matrices/AdEllpack_impl.h b/src/TNL/Matrices/AdEllpack_impl.h
index e754eca68..a0f293b3d 100644
--- a/src/TNL/Matrices/AdEllpack_impl.h
+++ b/src/TNL/Matrices/AdEllpack_impl.h
@@ -936,14 +936,14 @@ void AdEllpack< Real, Device, Index >::spmvCuda2( const InVector& inVector,
                                                   OutVector& outVector,
                                                   const int gridIdx ) const
 {
-    IndexType globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+    IndexType globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
     IndexType warpIdx = globalIdx >> 5;
     IndexType inWarpIdx = globalIdx & ( this->warpSize - 1 );
     if( globalIdx >= this->reduceMap.getSize() )
 	return;
 
     const int blockSize = 256;
-    Real* temp = Devices::Cuda::getSharedMemory< Real >();
+    Real* temp = Cuda::getSharedMemory< Real >();
     __shared__ IndexType reduceMap[ blockSize ];
     reduceMap[ threadIdx.x ] = this->reduceMap[ globalIdx ];
     temp[ threadIdx.x ] = 0.0;
@@ -984,14 +984,14 @@ void AdEllpack< Real, Device, Index >::spmvCuda4( const InVector& inVector,
                                                            OutVector& outVector,
                                                            const int gridIdx ) const
 {
-    IndexType globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+    IndexType globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
     IndexType warpIdx = globalIdx >> 5;
     IndexType inWarpIdx = globalIdx & ( this->warpSize - 1 );
     if( globalIdx >= this->reduceMap.getSize() )
 	return;
 
     const int blockSize = 192;
-    Real* temp = Devices::Cuda::getSharedMemory< Real >();
+    Real* temp = Cuda::getSharedMemory< Real >();
     __shared__ IndexType reduceMap[ blockSize ];
     reduceMap[ threadIdx.x ] = this->reduceMap[ globalIdx ];
     temp[ threadIdx.x ] = 0.0;
@@ -1043,14 +1043,14 @@ void AdEllpack< Real, Device, Index >::spmvCuda8( const InVector& inVector,
                                                            OutVector& outVector,
                                                            const int gridIdx ) const
 {
-    IndexType globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+    IndexType globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
     IndexType warpIdx = globalIdx >> 5;
     IndexType inWarpIdx = globalIdx & ( this->warpSize - 1 );
     if( globalIdx >= this->reduceMap.getSize() )
 	return;
 
     const int blockSize = 128;
-    Real* temp = Devices::Cuda::getSharedMemory< Real >();
+    Real* temp = Cuda::getSharedMemory< Real >();
     __shared__ IndexType reduceMap[ blockSize ];
     reduceMap[ threadIdx.x ] = this->reduceMap[ globalIdx ];
     temp[ threadIdx.x ] = 0.0;
@@ -1101,14 +1101,14 @@ void AdEllpack< Real, Device, Index >::spmvCuda16( const InVector& inVector,
                                                             OutVector& outVector,
                                                             const int gridIdx ) const
 {
-    IndexType globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+    IndexType globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
     IndexType warpIdx = globalIdx >> 5;
     IndexType inWarpIdx = globalIdx & ( this->warpSize - 1 );
     if( globalIdx >= this->reduceMap.getSize() )
 	return;
 
     const int blockSize = 128;
-    Real* temp = Devices::Cuda::getSharedMemory< Real >();
+    Real* temp = Cuda::getSharedMemory< Real >();
     __shared__ IndexType reduceMap[ blockSize ];
     reduceMap[ threadIdx.x ] = this->reduceMap[ globalIdx ];
     temp[ threadIdx.x ] = 0.0;
@@ -1159,14 +1159,14 @@ void AdEllpack< Real, Device, Index >::spmvCuda32( const InVector& inVector,
                                                             OutVector& outVector,
                                                             const int gridIdx ) const
 {
-    IndexType globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+    IndexType globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
     IndexType warpIdx = globalIdx >> 5;
     IndexType inWarpIdx = globalIdx & ( this->warpSize - 1 );
     if( globalIdx >= this->reduceMap.getSize() )
 	return;
 
     const int blockSize = 96;
-    Real* temp = Devices::Cuda::getSharedMemory< Real >();
+    Real* temp = Cuda::getSharedMemory< Real >();
     __shared__ IndexType reduceMap[ blockSize ];
     reduceMap[ threadIdx.x ] = this->reduceMap[ globalIdx ];
     temp[ threadIdx.x ] = 0.0;
@@ -1292,18 +1292,18 @@ public:
     {
         typedef AdEllpack< Real, Devices::Cuda, Index > Matrix;
 	typedef typename Matrix::IndexType IndexType;
-	Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-	InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-	OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
+	Matrix* kernel_this = Cuda::passToDevice( matrix );
+	InVector* kernel_inVector = Cuda::passToDevice( inVector );
+	OutVector* kernel_outVector = Cuda::passToDevice( outVector );
 	if( matrix.totalLoad < 2 )
 	{
-	    dim3 blockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+	    dim3 blockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
 	    IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x );
-	    IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+	    IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
 	    for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
 	    {
 	        if( gridIdx == cudaGrids - 1 )
-		    cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+		    cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
 	        const int sharedMemory = blockSize.x * sizeof( Real );
 	        AdEllpackVectorProductCuda2< Real, Index, InVector, OutVector >
                                                     <<< cudaGridSize, blockSize, sharedMemory >>>
@@ -1313,20 +1313,20 @@ public:
                                                       gridIdx );
 	    }
 	    TNL_CHECK_CUDA_DEVICE;
-	    Devices::Cuda::freeFromDevice( kernel_this );
-	    Devices::Cuda::freeFromDevice( kernel_inVector );
-	    Devices::Cuda::freeFromDevice( kernel_outVector );
+	    Cuda::freeFromDevice( kernel_this );
+	    Cuda::freeFromDevice( kernel_inVector );
+	    Cuda::freeFromDevice( kernel_outVector );
 	    TNL_CHECK_CUDA_DEVICE;
 	}
 	else if( matrix.totalLoad < 4 )
 	{
-	    dim3 blockSize( 192 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+	    dim3 blockSize( 192 ), cudaGridSize( Cuda::getMaxGridSize() );
 	    IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x );
-	    IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+	    IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
 	    for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
 	    {
 	        if( gridIdx == cudaGrids - 1 )
-		    cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+		    cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
 	        const int sharedMemory = blockSize.x * sizeof( Real );
 	        AdEllpackVectorProductCuda4< Real, Index, InVector, OutVector >
                                                     <<< cudaGridSize, blockSize, sharedMemory >>>
@@ -1336,20 +1336,20 @@ public:
                                                       gridIdx );
 	    }
 	    TNL_CHECK_CUDA_DEVICE;
-	    Devices::Cuda::freeFromDevice( kernel_this );
-	    Devices::Cuda::freeFromDevice( kernel_inVector );
-	    Devices::Cuda::freeFromDevice( kernel_outVector );
+	    Cuda::freeFromDevice( kernel_this );
+	    Cuda::freeFromDevice( kernel_inVector );
+	    Cuda::freeFromDevice( kernel_outVector );
 	    TNL_CHECK_CUDA_DEVICE;
 	}
 	else if( matrix.totalLoad < 8 )
 	{
-	    dim3 blockSize( 128 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+	    dim3 blockSize( 128 ), cudaGridSize( Cuda::getMaxGridSize() );
 	    IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x );
-	    IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+	    IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
 	    for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
 	    {
 	        if( gridIdx == cudaGrids - 1 )
-		    cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+		    cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
 	        const int sharedMemory = blockSize.x * sizeof( Real );
 	        AdEllpackVectorProductCuda8< Real, Index, InVector, OutVector >
                                                     <<< cudaGridSize, blockSize, sharedMemory >>>
@@ -1359,20 +1359,20 @@ public:
                                                       gridIdx );
 	    }
 	    TNL_CHECK_CUDA_DEVICE;
-	    Devices::Cuda::freeFromDevice( kernel_this );
-	    Devices::Cuda::freeFromDevice( kernel_inVector );
-	    Devices::Cuda::freeFromDevice( kernel_outVector );
+	    Cuda::freeFromDevice( kernel_this );
+	    Cuda::freeFromDevice( kernel_inVector );
+	    Cuda::freeFromDevice( kernel_outVector );
 	    TNL_CHECK_CUDA_DEVICE;
 	}
 	else if( matrix.totalLoad < 16 )
 	{
-	    dim3 blockSize( 128 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+	    dim3 blockSize( 128 ), cudaGridSize( Cuda::getMaxGridSize() );
 	    IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x );
-	    IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+	    IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
 	    for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
 	    {
 	        if( gridIdx == cudaGrids - 1 )
-		    cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+		    cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
 	        const int sharedMemory = blockSize.x * sizeof( Real );
 	        AdEllpackVectorProductCuda16< Real, Index, InVector, OutVector >
                                                      <<< cudaGridSize, blockSize, sharedMemory >>>
@@ -1382,20 +1382,20 @@ public:
                                                        gridIdx );
 	    }
 	    TNL_CHECK_CUDA_DEVICE;
-	    Devices::Cuda::freeFromDevice( kernel_this );
-	    Devices::Cuda::freeFromDevice( kernel_inVector );
-	    Devices::Cuda::freeFromDevice( kernel_outVector );
+	    Cuda::freeFromDevice( kernel_this );
+	    Cuda::freeFromDevice( kernel_inVector );
+	    Cuda::freeFromDevice( kernel_outVector );
 	    TNL_CHECK_CUDA_DEVICE;
 	}
 	else
 	{
-	    dim3 blockSize( 96 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+	    dim3 blockSize( 96 ), cudaGridSize( Cuda::getMaxGridSize() );
 	    IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x );
-	    IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+	    IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
 	    for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
 	    {
 	        if( gridIdx == cudaGrids - 1 )
-		    cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+		    cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
 	        const int sharedMemory = blockSize.x * sizeof( Real );
 	        AdEllpackVectorProductCuda32< Real, Index, InVector, OutVector >
                                                      <<< cudaGridSize, blockSize, sharedMemory >>>
@@ -1405,9 +1405,9 @@ public:
                                                        gridIdx );
 	    }
 	    TNL_CHECK_CUDA_DEVICE;
-	    Devices::Cuda::freeFromDevice( kernel_this );
-	    Devices::Cuda::freeFromDevice( kernel_inVector );
-	    Devices::Cuda::freeFromDevice( kernel_outVector );
+	    Cuda::freeFromDevice( kernel_this );
+	    Cuda::freeFromDevice( kernel_inVector );
+	    Cuda::freeFromDevice( kernel_outVector );
 	    TNL_CHECK_CUDA_DEVICE;
 	}
     }
diff --git a/src/TNL/Matrices/BiEllpackSymmetric_impl.h b/src/TNL/Matrices/BiEllpackSymmetric_impl.h
index 47b342828..0af180c0e 100644
--- a/src/TNL/Matrices/BiEllpackSymmetric_impl.h
+++ b/src/TNL/Matrices/BiEllpackSymmetric_impl.h
@@ -1053,7 +1053,7 @@ void BiEllpackSymmetric< Real, Device, Index, StripSize >::spmvCuda( const InVec
     IndexType bisection = this->warpSize;
     IndexType groupBegin = strip * ( this->logWarpSize + 1 );
 
-    Real* temp = Devices::Cuda::getSharedMemory< Real >();
+    Real* temp = Cuda::getSharedMemory< Real >();
     __shared__ Real results[ cudaBlockSize ];
     results[ threadIdx.x ] = 0.0;
     IndexType elementPtr = ( this->groupPointers[ groupBegin ] << this->logWarpSize ) + inWarpIdx;
@@ -1274,7 +1274,7 @@ void BiEllpackSymmetricVectorProductCuda( const BiEllpackSymmetric< Real, Device
                                           int gridIdx,
                                           const int warpSize )
 {
-    Index globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+    Index globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
     matrix->spmvCuda( *inVector, *outVector, globalIdx );
 }
 #endif
@@ -1394,7 +1394,7 @@ void performRowBubbleSortCuda( BiEllpackSymmetric< Real, Devices::Cuda, Index, S
                                const typename BiEllpackSymmetric< Real, Devices::Cuda, Index, StripSize >::RowLengthsVector* rowLengths,
                                int gridIdx )
 {
-    const Index stripIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
+    const Index stripIdx = gridIdx * Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
     matrix->performRowBubbleSortCudaKernel( *rowLengths, stripIdx );
 }
 #endif
@@ -1409,7 +1409,7 @@ void computeColumnSizesCuda( BiEllpackSymmetric< Real, Devices::Cuda, Index, Str
                              const Index numberOfStrips,
                              int gridIdx )
 {
-    const Index stripIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
+    const Index stripIdx = gridIdx * Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
     matrix->computeColumnSizesCudaKernel( *rowLengths, numberOfStrips, stripIdx );
 }
 #endif
@@ -1513,23 +1513,23 @@ public:
         Index numberOfStrips = matrix.virtualRows / StripSize;
         typedef BiEllpackSymmetric< Real, Devices::Cuda, Index, StripSize > Matrix;
         typedef typename Matrix::RowLengthsVector CompressedRowLengthsVector;
-        Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-        CompressedRowLengthsVector* kernel_rowLengths = Devices::Cuda::passToDevice( rowLengths );
-        dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+        Matrix* kernel_this = Cuda::passToDevice( matrix );
+        CompressedRowLengthsVector* kernel_rowLengths = Cuda::passToDevice( rowLengths );
+        dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
         const Index cudaBlocks = roundUpDivision( numberOfStrips, cudaBlockSize.x );
-        const Index cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+        const Index cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
         for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
         {
              if( gridIdx == cudaGrids - 1 )
-                 cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+                 cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
              performRowBubbleSortCuda< Real, Index, StripSize >
                                      <<< cudaGridSize, cudaBlockSize >>>
                                      ( kernel_this,
                                        kernel_rowLengths,
                                        gridIdx );
         }
-        Devices::Cuda::freeFromDevice( kernel_this );
-        Devices::Cuda::freeFromDevice( kernel_rowLengths );
+        Cuda::freeFromDevice( kernel_this );
+        Cuda::freeFromDevice( kernel_rowLengths );
         TNL_CHECK_CUDA_DEVICE;
 #endif
     }
@@ -1544,15 +1544,15 @@ public:
         const Index numberOfStrips = matrix.virtualRows / StripSize;
         typedef BiEllpackSymmetric< Real, Devices::Cuda, Index, StripSize > Matrix;
         typedef typename Matrix::RowLengthsVector CompressedRowLengthsVector;
-        Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-        CompressedRowLengthsVector* kernel_rowLengths = Devices::Cuda::passToDevice( rowLengths );
-        dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+        Matrix* kernel_this = Cuda::passToDevice( matrix );
+        CompressedRowLengthsVector* kernel_rowLengths = Cuda::passToDevice( rowLengths );
+        dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
         const Index cudaBlocks = roundUpDivision( numberOfStrips, cudaBlockSize.x );
-        const Index cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+        const Index cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
         for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
         {
              if( gridIdx == cudaGrids - 1 )
-                 cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+                 cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
              computeColumnSizesCuda< Real, Index, StripSize >
                                    <<< cudaGridSize, cudaBlockSize >>>
                                    ( kernel_this,
@@ -1560,8 +1560,8 @@ public:
                                      numberOfStrips,
                                      gridIdx );
         }
-        Devices::Cuda::freeFromDevice( kernel_this );
-        Devices::Cuda::freeFromDevice( kernel_rowLengths );
+        Cuda::freeFromDevice( kernel_this );
+        Cuda::freeFromDevice( kernel_rowLengths );
         TNL_CHECK_CUDA_DEVICE;
 #endif
     }
@@ -1579,16 +1579,16 @@ public:
 #ifdef HAVE_CUDA
         typedef BiEllpackSymmetric< Real, Devices::Cuda, Index > Matrix;
         typedef typename Matrix::IndexType IndexType;
-        Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-        InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-        OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
-        dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+        Matrix* kernel_this = Cuda::passToDevice( matrix );
+        InVector* kernel_inVector = Cuda::passToDevice( inVector );
+        OutVector* kernel_outVector = Cuda::passToDevice( outVector );
+        dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
         const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-        const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+        const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
         for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
         {
             if( gridIdx == cudaGrids - 1 )
-                cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+                cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
             const int sharedMemory = cudaBlockSize.x * sizeof( Real );
             BiEllpackSymmetricVectorProductCuda< Real, Index, StripSize, InVector, OutVector >
                                                <<< cudaGridSize, cudaBlockSize, sharedMemory >>>
@@ -1598,9 +1598,9 @@ public:
                                                  gridIdx,
                                                  matrix.warpSize );
         }
-        Devices::Cuda::freeFromDevice( kernel_this );
-        Devices::Cuda::freeFromDevice( kernel_inVector );
-        Devices::Cuda::freeFromDevice( kernel_outVector );
+        Cuda::freeFromDevice( kernel_this );
+        Cuda::freeFromDevice( kernel_inVector );
+        Cuda::freeFromDevice( kernel_outVector );
         TNL_CHECK_CUDA_DEVICE;
 #endif
     }
diff --git a/src/TNL/Matrices/BiEllpack_impl.h b/src/TNL/Matrices/BiEllpack_impl.h
index ac0285361..6af808995 100644
--- a/src/TNL/Matrices/BiEllpack_impl.h
+++ b/src/TNL/Matrices/BiEllpack_impl.h
@@ -1057,7 +1057,7 @@ void BiEllpack< Real, Device, Index, StripSize >::spmvCuda( const InVector& inVe
     IndexType bisection = this->warpSize;
     IndexType groupBegin = strip * ( this->logWarpSize + 1 );
 
-    Real* temp = Devices::Cuda::getSharedMemory< Real >();
+    Real* temp = Cuda::getSharedMemory< Real >();
     __shared__ Real results[ cudaBlockSize ];
     results[ threadIdx.x ] = 0.0;
     IndexType elementPtr = ( this->groupPointers[ groupBegin ] << this->logWarpSize ) + inWarpIdx;
@@ -1277,7 +1277,7 @@ void BiEllpackVectorProductCuda( const BiEllpack< Real, Devices::Cuda, Index, St
 										  int gridIdx,
 										  const int warpSize )
 {
-	Index globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+	Index globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
 	matrix->spmvCuda( *inVector, *outVector, globalIdx );
 }
 #endif
@@ -1397,7 +1397,7 @@ void performRowBubbleSortCuda( BiEllpack< Real, Devices::Cuda, Index, StripSize
 							   const typename BiEllpack< Real, Devices::Cuda, Index, StripSize >::CompressedRowLengthsVector* rowLengths,
 							   int gridIdx )
 {
-	const Index stripIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
+	const Index stripIdx = gridIdx * Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
 	matrix->performRowBubbleSortCudaKernel( *rowLengths, stripIdx );
 }
 #endif
@@ -1412,7 +1412,7 @@ void computeColumnSizesCuda( BiEllpack< Real, Devices::Cuda, Index, StripSize >*
 							 const Index numberOfStrips,
 							 int gridIdx )
 {
-	const Index stripIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
+	const Index stripIdx = gridIdx * Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
 	matrix->computeColumnSizesCudaKernel( *rowLengths, numberOfStrips, stripIdx );
 }
 #endif
@@ -1516,23 +1516,23 @@ public:
 		Index numberOfStrips = matrix.virtualRows / StripSize;
 		typedef BiEllpack< Real, Devices::Cuda, Index, StripSize > Matrix;
 		typedef typename Matrix::CompressedRowLengthsVector CompressedRowLengthsVector;
-		Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-		CompressedRowLengthsVector* kernel_rowLengths = Devices::Cuda::passToDevice( rowLengths );
-		dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+		Matrix* kernel_this = Cuda::passToDevice( matrix );
+		CompressedRowLengthsVector* kernel_rowLengths = Cuda::passToDevice( rowLengths );
+		dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
 		const Index cudaBlocks = roundUpDivision( numberOfStrips, cudaBlockSize.x );
-		const Index cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+		const Index cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
 		for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
 		{
 		     if( gridIdx == cudaGrids - 1 )
-		         cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+		         cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
 		     performRowBubbleSortCuda< Real, Index, StripSize >
 		     	 	 	 	 	 	 <<< cudaGridSize, cudaBlockSize >>>
 		                             ( kernel_this,
 		                               kernel_rowLengths,
 		                               gridIdx );
 		}
-		Devices::Cuda::freeFromDevice( kernel_this );
-		Devices::Cuda::freeFromDevice( kernel_rowLengths );
+		Cuda::freeFromDevice( kernel_this );
+		Cuda::freeFromDevice( kernel_rowLengths );
 		TNL_CHECK_CUDA_DEVICE;
 #endif
 	}
@@ -1547,15 +1547,15 @@ public:
 		const Index numberOfStrips = matrix.virtualRows / StripSize;
 		typedef BiEllpack< Real, Devices::Cuda, Index, StripSize > Matrix;
 		typedef typename Matrix::CompressedRowLengthsVector CompressedRowLengthsVector;
-		Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-		CompressedRowLengthsVector* kernel_rowLengths = Devices::Cuda::passToDevice( rowLengths );
-		dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+		Matrix* kernel_this = Cuda::passToDevice( matrix );
+		CompressedRowLengthsVector* kernel_rowLengths = Cuda::passToDevice( rowLengths );
+		dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
 		const Index cudaBlocks = roundUpDivision( numberOfStrips, cudaBlockSize.x );
-		const Index cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+		const Index cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
 		for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
 		{
 		     if( gridIdx == cudaGrids - 1 )
-		         cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+		         cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
 		     computeColumnSizesCuda< Real, Index, StripSize >
 		     	 	 	 	 	   <<< cudaGridSize, cudaBlockSize >>>
 		                           ( kernel_this,
@@ -1563,8 +1563,8 @@ public:
 		                             numberOfStrips,
 		                             gridIdx );
         }
-		Devices::Cuda::freeFromDevice( kernel_this );
-		Devices::Cuda::freeFromDevice( kernel_rowLengths );
+		Cuda::freeFromDevice( kernel_this );
+		Cuda::freeFromDevice( kernel_rowLengths );
 		TNL_CHECK_CUDA_DEVICE;
 #endif
 	}
@@ -1582,16 +1582,16 @@ public:
 #ifdef HAVE_CUDA
 		typedef BiEllpack< Real, Devices::Cuda, Index > Matrix;
 		typedef typename Matrix::IndexType IndexType;
-		Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-		InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-		OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
-		dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+		Matrix* kernel_this = Cuda::passToDevice( matrix );
+		InVector* kernel_inVector = Cuda::passToDevice( inVector );
+		OutVector* kernel_outVector = Cuda::passToDevice( outVector );
+		dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
 		const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-		const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+		const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
 		for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
 		{
 			if( gridIdx == cudaGrids - 1 )
-				cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+				cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
 			const int sharedMemory = cudaBlockSize.x * sizeof( Real );
 			BiEllpackVectorProductCuda< Real, Index, StripSize, InVector, OutVector >
 			                                   <<< cudaGridSize, cudaBlockSize, sharedMemory >>>
@@ -1601,9 +1601,9 @@ public:
 			                                     gridIdx,
 			                                     matrix.warpSize );
 		}
-		Devices::Cuda::freeFromDevice( kernel_this );
-		Devices::Cuda::freeFromDevice( kernel_inVector );
-		Devices::Cuda::freeFromDevice( kernel_outVector );
+		Cuda::freeFromDevice( kernel_this );
+		Cuda::freeFromDevice( kernel_inVector );
+		Cuda::freeFromDevice( kernel_outVector );
 		TNL_CHECK_CUDA_DEVICE;
 #endif
     }
diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/CSR_impl.h
index 1e28157f9..86b10119c 100644
--- a/src/TNL/Matrices/CSR_impl.h
+++ b/src/TNL/Matrices/CSR_impl.h
@@ -33,7 +33,7 @@ template< typename Real,
           typename Index >
 CSR< Real, Device, Index >::CSR()
 : spmvCudaKernel( hybrid ),
-  cudaWarpSize( 32 ), //Devices::Cuda::getWarpSize() )
+  cudaWarpSize( 32 ), //Cuda::getWarpSize() )
   hybridModeSplit( 4 )
 {
 };
@@ -145,16 +145,16 @@ Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) con
 //       //  (gdb) p rowPointers.getElement(0)
 //       //    Attempt to take address of value not located in memory.
 //       IndexType resultHost ( 0 );
-//       IndexType* resultCuda = Devices::Cuda::passToDevice( resultHost );
+//       IndexType* resultCuda = Cuda::passToDevice( resultHost );
 //       // PROBLEM: If the second parameter of getNonZeroRowLengthCudaKernel is '&resultCuda', the following issue is thrown:
 //       //          'error: no instance of function template "TNL::Matrices::getNonZeroRowLengthCudaKernel" matches the argument list'
 //       TNL::Matrices::getNonZeroRowLengthCudaKernel< ConstMatrixRow, IndexType ><<< 1, 1 >>>( matrixRow, resultCuda ); // matrixRow works fine, tested them both separately
 //       delete []cols;
 //       delete []vals;
 //       std::cout << "Checkpoint BEFORE passFromDevice" << std::endl;
-//       resultHost = Devices::Cuda::passFromDevice( resultCuda ); // This causes a crash: Illegal memory address in Cuda_impl.h at TNL_CHECK_CUDA_DEVICE
+//       resultHost = Cuda::passFromDevice( resultCuda ); // This causes a crash: Illegal memory address in Cuda_impl.h at TNL_CHECK_CUDA_DEVICE
 //       std::cout << "Checkpoint AFTER passFromDevice" << std::endl;
-//       Devices::Cuda::freeFromDevice( resultCuda );
+//       Cuda::freeFromDevice( resultCuda );
 //       return resultHost;
 //   }
 }
@@ -713,7 +713,7 @@ void CSR< Real, Device, Index >::spmvCudaVectorized( const InVector& inVector,
                                                               const IndexType warpEnd,
                                                               const IndexType inWarpIdx ) const
 {
-   volatile Real* aux = Devices::Cuda::getSharedMemory< Real >();
+   volatile Real* aux = Cuda::getSharedMemory< Real >();
    for( IndexType row = warpStart; row < warpEnd; row++ )
    {
       aux[ threadIdx.x ] = 0.0;
@@ -753,7 +753,7 @@ void CSR< Real, Device, Index >::vectorProductCuda( const InVector& inVector,
                                                              OutVector& outVector,
                                                              int gridIdx ) const
 {
-   IndexType globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   IndexType globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    const IndexType warpStart = warpSize * ( globalIdx / warpSize );
    const IndexType warpEnd = min( warpStart + warpSize, this->getRows() );
    const IndexType inWarpIdx = globalIdx % warpSize;
@@ -764,7 +764,7 @@ void CSR< Real, Device, Index >::vectorProductCuda( const InVector& inVector,
    /****
     * Hybrid mode
     */
-   const Index firstRow = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x;
+   const Index firstRow = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x;
    const IndexType lastRow = min( this->getRows(), firstRow + blockDim. x );
    const IndexType nonzerosPerRow = ( this->rowPointers[ lastRow ] - this->rowPointers[ firstRow ] ) /
                                     ( lastRow - firstRow );
@@ -828,7 +828,7 @@ __global__ void CSRVectorProductCudaKernel( const CSR< Real, Devices::Cuda, Inde
 {
    typedef CSR< Real, Devices::Cuda, Index > Matrix;
    static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "" );
-   const typename Matrix::IndexType rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   const typename Matrix::IndexType rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    if( matrix->getCudaKernelType() == Matrix::scalar )
    {
       if( rowIdx < matrix->getRows() )
@@ -854,17 +854,17 @@ void CSRVectorProductCuda( const CSR< Real, Devices::Cuda, Index >& matrix,
 #ifdef HAVE_CUDA
    typedef CSR< Real, Devices::Cuda, Index > Matrix;
    typedef typename Matrix::IndexType IndexType;
-   Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-   InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-   OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
+   Matrix* kernel_this = Cuda::passToDevice( matrix );
+   InVector* kernel_inVector = Cuda::passToDevice( inVector );
+   OutVector* kernel_outVector = Cuda::passToDevice( outVector );
    TNL_CHECK_CUDA_DEVICE;
-   dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+   dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
    const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-   const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+   const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
    for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
    {
       if( gridIdx == cudaGrids - 1 )
-         cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+         cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
       const int sharedMemory = cudaBlockSize.x * sizeof( Real );
       if( matrix.getCudaWarpSize() == 32 )
          CSRVectorProductCudaKernel< Real, Index, InVector, OutVector, 32 >
@@ -911,9 +911,9 @@ void CSRVectorProductCuda( const CSR< Real, Devices::Cuda, Index >& matrix,
 
    }
    TNL_CHECK_CUDA_DEVICE;
-   Devices::Cuda::freeFromDevice( kernel_this );
-   Devices::Cuda::freeFromDevice( kernel_inVector );
-   Devices::Cuda::freeFromDevice( kernel_outVector );
+   Cuda::freeFromDevice( kernel_this );
+   Cuda::freeFromDevice( kernel_inVector );
+   Cuda::freeFromDevice( kernel_outVector );
    TNL_CHECK_CUDA_DEVICE;
 #endif
 }
diff --git a/src/TNL/Matrices/ChunkedEllpack_impl.h b/src/TNL/Matrices/ChunkedEllpack_impl.h
index 5bc0cfd27..89e525e87 100644
--- a/src/TNL/Matrices/ChunkedEllpack_impl.h
+++ b/src/TNL/Matrices/ChunkedEllpack_impl.h
@@ -1123,7 +1123,7 @@ __device__ void ChunkedEllpack< Real, Device, Index >::computeSliceVectorProduct
 {
    static_assert( std::is_same < DeviceType, Devices::Cuda >::value, "" );
 
-   RealType* chunkProducts = Devices::Cuda::getSharedMemory< RealType >();
+   RealType* chunkProducts = Cuda::getSharedMemory< RealType >();
    ChunkedEllpackSliceInfo* sliceInfo = ( ChunkedEllpackSliceInfo* ) & chunkProducts[ blockDim.x ];
 
    if( threadIdx.x == 0 )
@@ -1403,7 +1403,7 @@ __global__ void ChunkedEllpackVectorProductCudaKernel( const ChunkedEllpack< Rea
                                                                 OutVector* outVector,
                                                                 int gridIdx )
 {
-   const Index sliceIdx = gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x;
+   const Index sliceIdx = gridIdx * Cuda::getMaxGridSize() + blockIdx.x;
    if( sliceIdx < matrix->getNumberOfSlices() )
       matrix->computeSliceVectorProduct( inVector, outVector, sliceIdx );
 
@@ -1456,19 +1456,19 @@ class ChunkedEllpackDeviceDependentCode< Devices::Cuda >
             typedef ChunkedEllpack< Real, Devices::Cuda, Index > Matrix;
             typedef Index IndexType;
             typedef Real RealType;
-            Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-            InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-            OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
+            Matrix* kernel_this = Cuda::passToDevice( matrix );
+            InVector* kernel_inVector = Cuda::passToDevice( inVector );
+            OutVector* kernel_outVector = Cuda::passToDevice( outVector );
             dim3 cudaBlockSize( matrix.getNumberOfChunksInSlice() ),
-                 cudaGridSize( Devices::Cuda::getMaxGridSize() );
+                 cudaGridSize( Cuda::getMaxGridSize() );
             const IndexType cudaBlocks = matrix.getNumberOfSlices();
-            const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+            const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
             const IndexType sharedMemory = cudaBlockSize.x * sizeof( RealType ) +
                                            sizeof( tnlChunkedEllpackSliceInfo< IndexType > );
             for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
             {
                if( gridIdx == cudaGrids - 1 )
-                  cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+                  cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
                ChunkedEllpackVectorProductCudaKernel< Real, Index, InVector, OutVector >
                                                              <<< cudaGridSize, cudaBlockSize, sharedMemory  >>>
                                                              ( kernel_this,
@@ -1476,9 +1476,9 @@ class ChunkedEllpackDeviceDependentCode< Devices::Cuda >
                                                                kernel_outVector,
                                                                gridIdx );
             }
-            Devices::Cuda::freeFromDevice( kernel_this );
-            Devices::Cuda::freeFromDevice( kernel_inVector );
-            Devices::Cuda::freeFromDevice( kernel_outVector );
+            Cuda::freeFromDevice( kernel_this );
+            Cuda::freeFromDevice( kernel_inVector );
+            Cuda::freeFromDevice( kernel_outVector );
             TNL_CHECK_CUDA_DEVICE;
          #endif
       }
diff --git a/src/TNL/Matrices/Dense_impl.h b/src/TNL/Matrices/Dense_impl.h
index f57671197..246bd09ed 100644
--- a/src/TNL/Matrices/Dense_impl.h
+++ b/src/TNL/Matrices/Dense_impl.h
@@ -586,20 +586,20 @@ void Dense< Real, Device, Index >::getMatrixProduct( const Matrix1& matrix1,
       const IndexType cudaBlockRows( matrixProductCudaBlockSize / tileDim );
       cudaBlockSize.x = cudaBlockColumns;
       cudaBlockSize.y = cudaBlockRows;
-      const IndexType rowGrids = roundUpDivision( rowTiles, Devices::Cuda::getMaxGridSize() );
-      const IndexType columnGrids = roundUpDivision( columnTiles, Devices::Cuda::getMaxGridSize() );
+      const IndexType rowGrids = roundUpDivision( rowTiles, Cuda::getMaxGridSize() );
+      const IndexType columnGrids = roundUpDivision( columnTiles, Cuda::getMaxGridSize() );
 
       for( IndexType gridIdx_x = 0; gridIdx_x < columnGrids; gridIdx_x++ )
          for( IndexType gridIdx_y = 0; gridIdx_y < rowGrids; gridIdx_y++ )
          {
-            cudaGridSize.x = cudaGridSize.y = Devices::Cuda::getMaxGridSize();
+            cudaGridSize.x = cudaGridSize.y = Cuda::getMaxGridSize();
             if( gridIdx_x == columnGrids - 1 )
-               cudaGridSize.x = columnTiles % Devices::Cuda::getMaxGridSize();
+               cudaGridSize.x = columnTiles % Cuda::getMaxGridSize();
             if( gridIdx_y == rowGrids - 1 )
-               cudaGridSize.y = rowTiles % Devices::Cuda::getMaxGridSize();
-            Dense* this_kernel = Devices::Cuda::passToDevice( *this );
-            Matrix1* matrix1_kernel = Devices::Cuda::passToDevice( matrix1 );
-            Matrix2* matrix2_kernel = Devices::Cuda::passToDevice( matrix2 );
+               cudaGridSize.y = rowTiles % Cuda::getMaxGridSize();
+            Dense* this_kernel = Cuda::passToDevice( *this );
+            Matrix1* matrix1_kernel = Cuda::passToDevice( matrix1 );
+            Matrix2* matrix2_kernel = Cuda::passToDevice( matrix2 );
             DenseMatrixProductKernel< Real,
                                                Index,
                                                Matrix1,
@@ -616,9 +616,9 @@ void Dense< Real, Device, Index >::getMatrixProduct( const Matrix1& matrix1,
                                                matrix2Multiplicator,
                                                gridIdx_x,
                                                gridIdx_y );
-            Devices::Cuda::freeFromDevice( this_kernel );
-            Devices::Cuda::freeFromDevice( matrix1_kernel );
-            Devices::Cuda::freeFromDevice( matrix2_kernel );
+            Cuda::freeFromDevice( this_kernel );
+            Cuda::freeFromDevice( matrix1_kernel );
+            Cuda::freeFromDevice( matrix2_kernel );
          }
 #endif
    }
@@ -669,7 +669,7 @@ __global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Ind
         rowBlock < tileDim;
         rowBlock += tileRowBlockSize )
    {
-      tile[ Devices::Cuda::getInterleaving( threadIdx.x*tileDim +  threadIdx.y + rowBlock ) ] =
+      tile[ Cuda::getInterleaving( threadIdx.x*tileDim +  threadIdx.y + rowBlock ) ] =
                inputMatrix->getElementFast( readColumnPosition,
                                             readRowPosition + rowBlock );
    }
@@ -688,7 +688,7 @@ __global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Ind
    {
       resultMatrix->setElementFast( writeColumnPosition,
                                     writeRowPosition + rowBlock,
-                                    matrixMultiplicator * tile[ Devices::Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] );
+                                    matrixMultiplicator * tile[ Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] );
 
    }
 
@@ -741,7 +741,7 @@ __global__ void DenseTranspositionNonAlignedKernel( Dense< Real, Devices::Cuda,
            rowBlock += tileRowBlockSize )
       {
          if( readRowPosition + rowBlock < rows )
-            tile[ Devices::Cuda::getInterleaving( threadIdx.x*tileDim +  threadIdx.y + rowBlock ) ] =
+            tile[ Cuda::getInterleaving( threadIdx.x*tileDim +  threadIdx.y + rowBlock ) ] =
                inputMatrix->getElementFast( readColumnPosition,
                                             readRowPosition + rowBlock );
       }
@@ -765,7 +765,7 @@ __global__ void DenseTranspositionNonAlignedKernel( Dense< Real, Devices::Cuda,
          if( writeRowPosition + rowBlock < columns )
             resultMatrix->setElementFast( writeColumnPosition,
                                           writeRowPosition + rowBlock,
-                                          matrixMultiplicator * tile[ Devices::Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] );
+                                          matrixMultiplicator * tile[ Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] );
       }
    }
 
@@ -809,21 +809,21 @@ void Dense< Real, Device, Index >::getTransposition( const Matrix& matrix,
       const IndexType cudaBlockRows( matrixProductCudaBlockSize / tileDim );
       cudaBlockSize.x = cudaBlockColumns;
       cudaBlockSize.y = cudaBlockRows;
-      const IndexType rowGrids = roundUpDivision( rowTiles, Devices::Cuda::getMaxGridSize() );
-      const IndexType columnGrids = roundUpDivision( columnTiles, Devices::Cuda::getMaxGridSize() );
-      const IndexType sharedMemorySize = tileDim*tileDim + tileDim*tileDim/Devices::Cuda::getNumberOfSharedMemoryBanks();
+      const IndexType rowGrids = roundUpDivision( rowTiles, Cuda::getMaxGridSize() );
+      const IndexType columnGrids = roundUpDivision( columnTiles, Cuda::getMaxGridSize() );
+      const IndexType sharedMemorySize = tileDim*tileDim + tileDim*tileDim/Cuda::getNumberOfSharedMemoryBanks();
 
-      Dense* this_device = Devices::Cuda::passToDevice( *this );
-      Matrix* matrix_device = Devices::Cuda::passToDevice( matrix );
+      Dense* this_device = Cuda::passToDevice( *this );
+      Matrix* matrix_device = Cuda::passToDevice( matrix );
 
       for( IndexType gridIdx_x = 0; gridIdx_x < columnGrids; gridIdx_x++ )
          for( IndexType gridIdx_y = 0; gridIdx_y < rowGrids; gridIdx_y++ )
          {
-            cudaGridSize.x = cudaGridSize.y = Devices::Cuda::getMaxGridSize();
+            cudaGridSize.x = cudaGridSize.y = Cuda::getMaxGridSize();
             if( gridIdx_x == columnGrids - 1)
-               cudaGridSize.x = columnTiles % Devices::Cuda::getMaxGridSize();
+               cudaGridSize.x = columnTiles % Cuda::getMaxGridSize();
             if( gridIdx_y == rowGrids - 1 )
-               cudaGridSize.y = rowTiles % Devices::Cuda::getMaxGridSize();
+               cudaGridSize.y = rowTiles % Cuda::getMaxGridSize();
             if( ( gridIdx_x < columnGrids - 1 || matrix.getColumns() % tileDim == 0 ) &&
                 ( gridIdx_y < rowGrids - 1 || matrix.getRows() % tileDim == 0 ) )
             {
@@ -859,8 +859,8 @@ void Dense< Real, Device, Index >::getTransposition( const Matrix& matrix,
             }
             TNL_CHECK_CUDA_DEVICE;
          }
-      Devices::Cuda::freeFromDevice( this_device );
-      Devices::Cuda::freeFromDevice( matrix_device );
+      Cuda::freeFromDevice( this_device );
+      Cuda::freeFromDevice( matrix_device );
 #endif
    }
 }
diff --git a/src/TNL/Matrices/EllpackSymmetricGraph_impl.h b/src/TNL/Matrices/EllpackSymmetricGraph_impl.h
index b817372dc..b949292c5 100644
--- a/src/TNL/Matrices/EllpackSymmetricGraph_impl.h
+++ b/src/TNL/Matrices/EllpackSymmetricGraph_impl.h
@@ -54,7 +54,7 @@ void EllpackSymmetricGraph< Real, Device, Index >::setDimensions( const IndexTyp
    this->rows = rows;
    this->columns = columns;   
    if( std::is_same< DeviceType, Devices::Cuda >::value )
-      this->alignedRows = roundToMultiple( columns, Devices::Cuda::getWarpSize() );
+      this->alignedRows = roundToMultiple( columns, Cuda::getWarpSize() );
    else this->alignedRows = rows;
    if( this->rowLengths != 0 )
    allocateElements();
@@ -917,7 +917,7 @@ void EllpackSymmetricGraphVectorProductCuda( const EllpackSymmetricGraph< Real,
                                              const int gridIdx,
                                              const int color )
 {
-   int globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   int globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    matrix->spmvCuda( *inVector, *outVector, globalIdx, color );
 }
 #endif
@@ -966,19 +966,19 @@ class EllpackSymmetricGraphDeviceDependentCode< Devices::Cuda >
 #ifdef HAVE_CUDA
           typedef EllpackSymmetricGraph< Real, Devices::Cuda, Index > Matrix;
           typedef typename Matrix::IndexType IndexType;
-          Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-          InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-          OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
-          dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+          Matrix* kernel_this = Cuda::passToDevice( matrix );
+          InVector* kernel_inVector = Cuda::passToDevice( inVector );
+          OutVector* kernel_outVector = Cuda::passToDevice( outVector );
+          dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
           for( IndexType color = 0; color < matrix.getNumberOfColors(); color++ )
           {
               IndexType rows = matrix.getRowsOfColor( color );
               const IndexType cudaBlocks = roundUpDivision( rows, cudaBlockSize.x );
-              const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+              const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
               for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
               {
                   if( gridIdx == cudaGrids - 1 )
-                      cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+                      cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
                   EllpackSymmetricGraphVectorProductCuda< Real, Index, InVector, OutVector >
                                                       <<< cudaGridSize, cudaBlockSize >>>
                                                         ( kernel_this,
@@ -989,9 +989,9 @@ class EllpackSymmetricGraphDeviceDependentCode< Devices::Cuda >
               }
           }
 
-          Devices::Cuda::freeFromDevice( kernel_this );
-          Devices::Cuda::freeFromDevice( kernel_inVector );
-          Devices::Cuda::freeFromDevice( kernel_outVector );
+          Cuda::freeFromDevice( kernel_this );
+          Cuda::freeFromDevice( kernel_inVector );
+          Cuda::freeFromDevice( kernel_outVector );
           TNL_CHECK_CUDA_DEVICE;
 #endif
       }
diff --git a/src/TNL/Matrices/EllpackSymmetric_impl.h b/src/TNL/Matrices/EllpackSymmetric_impl.h
index 5890212a4..90369f77a 100644
--- a/src/TNL/Matrices/EllpackSymmetric_impl.h
+++ b/src/TNL/Matrices/EllpackSymmetric_impl.h
@@ -38,7 +38,7 @@ void EllpackSymmetric< Real, Device, Index >::setDimensions( const IndexType row
    this->rows = rows;
    this->columns = columns;   
    if( std::is_same< DeviceType, Devices::Cuda >::value )
-      this->alignedRows = roundToMultiple( columns, Devices::Cuda::getWarpSize() );
+      this->alignedRows = roundToMultiple( columns, Cuda::getWarpSize() );
    else this->alignedRows = rows;
    if( this->rowLengths != 0 )
       allocateElements();
@@ -708,7 +708,7 @@ void EllpackSymmetricVectorProductCuda( const EllpackSymmetric< Real, Devices::C
                                            OutVector* outVector,
                                            const int gridIdx )
 {
-    int globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+    int globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
     if( globalIdx >= matrix->getRows() )
         return;
     matrix->spmvCuda( *inVector, *outVector, globalIdx );
@@ -760,16 +760,16 @@ class EllpackSymmetricDeviceDependentCode< Devices::Cuda >
 #ifdef HAVE_CUDA
           typedef EllpackSymmetric< Real, Devices::Cuda, Index > Matrix;
           typedef typename Matrix::IndexType IndexType;
-          Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-          InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-          OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
-          dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+          Matrix* kernel_this = Cuda::passToDevice( matrix );
+          InVector* kernel_inVector = Cuda::passToDevice( inVector );
+          OutVector* kernel_outVector = Cuda::passToDevice( outVector );
+          dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
           const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-          const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+          const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
           for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
           {
               if( gridIdx == cudaGrids - 1 )
-                  cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+                  cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
               const int sharedMemory = cudaBlockSize.x * sizeof( Real );
               EllpackSymmetricVectorProductCuda< Real, Index, InVector, OutVector >
                                                 <<< cudaGridSize, cudaBlockSize, sharedMemory >>>
@@ -778,9 +778,9 @@ class EllpackSymmetricDeviceDependentCode< Devices::Cuda >
                                                     kernel_outVector,
                                                     gridIdx );
           }
-          Devices::Cuda::freeFromDevice( kernel_this );
-          Devices::Cuda::freeFromDevice( kernel_inVector );
-          Devices::Cuda::freeFromDevice( kernel_outVector );
+          Cuda::freeFromDevice( kernel_this );
+          Cuda::freeFromDevice( kernel_inVector );
+          Cuda::freeFromDevice( kernel_outVector );
           TNL_CHECK_CUDA_DEVICE;
 #endif
       }
diff --git a/src/TNL/Matrices/Ellpack_impl.h b/src/TNL/Matrices/Ellpack_impl.h
index 2b8675c04..7651ea0d7 100644
--- a/src/TNL/Matrices/Ellpack_impl.h
+++ b/src/TNL/Matrices/Ellpack_impl.h
@@ -60,7 +60,7 @@ void Ellpack< Real, Device, Index >::setDimensions( const IndexType rows,
    this->rows = rows;
    this->columns = columns;
    if( std::is_same< Device, Devices::Cuda >::value )
-      this->alignedRows = roundToMultiple( rows, Devices::Cuda::getWarpSize() );
+      this->alignedRows = roundToMultiple( rows, Cuda::getWarpSize() );
    else this->alignedRows = rows;
    if( this->rowLengths != 0 )
       allocateElements();
@@ -128,7 +128,7 @@ void Ellpack< Real, Device, Index >::setLike( const Ellpack< Real2, Device2, Ind
    Sparse< Real, Device, Index >::setLike( matrix );
    this->rowLengths = matrix.rowLengths;
    if( std::is_same< Device, Devices::Cuda >::value )
-      this->alignedRows = roundToMultiple( this->getRows(), Devices::Cuda::getWarpSize() );
+      this->alignedRows = roundToMultiple( this->getRows(), Cuda::getWarpSize() );
    else this->alignedRows = this->getRows();
 }
 
@@ -836,7 +836,7 @@ __global__ void EllpackVectorProductCudaKernel(
    Real multiplicator,
    const Index gridIdx )
 {
-   const Index rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    if( rowIdx >= rows )
       return;
    Index i = rowIdx;
@@ -902,16 +902,16 @@ class EllpackDeviceDependentCode< Devices::Cuda >
          #ifdef HAVE_CUDA
             typedef Ellpack< Real, Device, Index > Matrix;
             typedef typename Matrix::IndexType IndexType;
-            //Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-            //InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-            //OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
-            dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+            //Matrix* kernel_this = Cuda::passToDevice( matrix );
+            //InVector* kernel_inVector = Cuda::passToDevice( inVector );
+            //OutVector* kernel_outVector = Cuda::passToDevice( outVector );
+            dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
             const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-            const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+            const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
             for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
             {
                if( gridIdx == cudaGrids - 1 )
-                  cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+                  cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
                EllpackVectorProductCudaKernel
                < Real, Index >
                 <<< cudaGridSize, cudaBlockSize >>>
@@ -928,9 +928,9 @@ class EllpackDeviceDependentCode< Devices::Cuda >
                   gridIdx );
                TNL_CHECK_CUDA_DEVICE;
             }
-            //Devices::Cuda::freeFromDevice( kernel_this );
-            //Devices::Cuda::freeFromDevice( kernel_inVector );
-            //Devices::Cuda::freeFromDevice( kernel_outVector );
+            //Cuda::freeFromDevice( kernel_this );
+            //Cuda::freeFromDevice( kernel_inVector );
+            //Cuda::freeFromDevice( kernel_outVector );
             TNL_CHECK_CUDA_DEVICE;
             cudaDeviceSynchronize();
          #endif
diff --git a/src/TNL/Matrices/MatrixOperations.h b/src/TNL/Matrices/MatrixOperations.h
index 07991a573..a6ede3f7b 100644
--- a/src/TNL/Matrices/MatrixOperations.h
+++ b/src/TNL/Matrices/MatrixOperations.h
@@ -21,6 +21,8 @@
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Math.h>
+#include <TNL/Cuda/DeviceInfo.h>
+#include <TNL/Cuda/SharedMemory.h>
 
 namespace TNL {
 namespace Matrices {
@@ -248,7 +250,7 @@ GemvCudaKernel( const IndexType m,
    IndexType elementIdx = blockIdx.x * blockDim.x + threadIdx.x;
    const IndexType gridSize = blockDim.x * gridDim.x;
 
-   RealType* shx = Devices::Cuda::getSharedMemory< RealType >();
+   RealType* shx = Cuda::getSharedMemory< RealType >();
 
    if( threadIdx.x < n )
       shx[ threadIdx.x ] = alpha * x[ threadIdx.x ];
@@ -344,10 +346,10 @@ public:
       Containers::Algorithms::ArrayOperations< Devices::Cuda, Devices::Host >::copy< RealType, RealType, IndexType >( xDevice.getData(), x, n );
 
       // desGridSize = blocksPerMultiprocessor * numberOfMultiprocessors
-      const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
+      const int desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() );
       dim3 blockSize, gridSize;
       blockSize.x = 256;
-      gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( m, blockSize.x ) );
+      gridSize.x = min( desGridSize, Cuda::getNumberOfBlocks( m, blockSize.x ) );
 
       GemvCudaKernel<<< gridSize, blockSize, n * sizeof( RealType ) >>>(
             m, n,
@@ -401,9 +403,9 @@ public:
          blockSize.x /= 2;
 
       // desGridSize = blocksPerMultiprocessor * numberOfMultiprocessors
-      const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
-      gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( m, blockSize.x ) );
-      gridSize.y = Devices::Cuda::getNumberOfBlocks( n, blockSize.y );
+      const int desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() );
+      gridSize.x = min( desGridSize, Cuda::getNumberOfBlocks( m, blockSize.x ) );
+      gridSize.y = Cuda::getNumberOfBlocks( n, blockSize.y );
 
       GeamCudaKernel<<< gridSize, blockSize >>>(
             m, n,
diff --git a/src/TNL/Matrices/Matrix_impl.h b/src/TNL/Matrices/Matrix_impl.h
index 7472760c2..33c4d2e65 100644
--- a/src/TNL/Matrices/Matrix_impl.h
+++ b/src/TNL/Matrices/Matrix_impl.h
@@ -12,6 +12,9 @@
 
 #include <TNL/Matrices/Matrix.h>
 #include <TNL/Assert.h>
+#include <TNL/Cuda/LaunchHelpers.h>
+#include <TNL/Cuda/MemoryHelpers.h>
+#include <TNL/Cuda/SharedMemory.h>
 
 namespace TNL {
 namespace Matrices {
@@ -240,7 +243,7 @@ __global__ void MatrixVectorProductCudaKernel( const Matrix* matrix,
                                                   int gridIdx )
 {
    static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "" );
-   const typename Matrix::IndexType rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   const typename Matrix::IndexType rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    if( rowIdx < matrix->getRows() )
       ( *outVector )[ rowIdx ] = matrix->rowVectorProduct( rowIdx, *inVector );
 }
@@ -255,16 +258,16 @@ void MatrixVectorProductCuda( const Matrix& matrix,
 {
 #ifdef HAVE_CUDA
    typedef typename Matrix::IndexType IndexType;
-   Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-   InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-   OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
-   dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+   Matrix* kernel_this = Cuda::passToDevice( matrix );
+   InVector* kernel_inVector = Cuda::passToDevice( inVector );
+   OutVector* kernel_outVector = Cuda::passToDevice( outVector );
+   dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
    const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-   const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+   const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
    for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
    {
       if( gridIdx == cudaGrids - 1 )
-         cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+         cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
       MatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>>
                                      ( kernel_this,
                                        kernel_inVector,
@@ -272,9 +275,9 @@ void MatrixVectorProductCuda( const Matrix& matrix,
                                        gridIdx );
       TNL_CHECK_CUDA_DEVICE;
    }
-   Devices::Cuda::freeFromDevice( kernel_this );
-   Devices::Cuda::freeFromDevice( kernel_inVector );
-   Devices::Cuda::freeFromDevice( kernel_outVector );
+   Cuda::freeFromDevice( kernel_this );
+   Cuda::freeFromDevice( kernel_inVector );
+   Cuda::freeFromDevice( kernel_outVector );
    TNL_CHECK_CUDA_DEVICE;
 #endif
 }
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h b/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h
index f9ef284da..bfe73f231 100644
--- a/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h
+++ b/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h
@@ -1095,7 +1095,7 @@ __global__ void SlicedEllpackSymmetricGraph_computeMaximalRowLengthInSlices_Cuda
                                                                                         typename SlicedEllpackSymmetricGraph< Real, Devices::Cuda, Index, SliceSize >::ConstCompressedRowLengthsVector rowLengths,
                                                                                         int gridIdx )
 {
-   const Index sliceIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
+   const Index sliceIdx = gridIdx * Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
    matrix->computeMaximalRowLengthInSlicesCuda( rowLengths, sliceIdx );
 }
 #endif
@@ -1152,7 +1152,7 @@ void SlicedEllpackSymmetricGraphVectorProductCuda( const SlicedEllpackSymmetricG
                                                    const int color,
                                                    const int sliceOffset )
 {
-    int globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x + sliceOffset;
+    int globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x + sliceOffset;
     matrix->smvCuda( *inVector, *outVector, globalIdx, color );
 }
 #endif
@@ -1213,21 +1213,21 @@ class SlicedEllpackSymmetricGraphDeviceDependentCode< Devices::Cuda >
 #ifdef HAVE_CUDA
          typedef SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize > Matrix;
          typedef typename Matrix::RowLengthsVector CompressedRowLengthsVector;
-         Matrix* kernel_matrix = Devices::Cuda::passToDevice( matrix );
+         Matrix* kernel_matrix = Cuda::passToDevice( matrix );
          const Index numberOfSlices = roundUpDivision( matrix.getRows(), SliceSize );
-         dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+         dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
          const Index cudaBlocks = roundUpDivision( numberOfSlices, cudaBlockSize.x );
-         const Index cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+         const Index cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
          for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
          {
             if( gridIdx == cudaGrids - 1 )
-               cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+               cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
             SlicedEllpackSymmetricGraph_computeMaximalRowLengthInSlices_CudaKernel< Real, Index, SliceSize ><<< cudaGridSize, cudaBlockSize >>>
                                                                              ( kernel_matrix,
                                                                                rowLengths,
                                                                                gridIdx );
          }
-         Devices::Cuda::freeFromDevice( kernel_matrix );
+         Cuda::freeFromDevice( kernel_matrix );
          TNL_CHECK_CUDA_DEVICE;
 #endif
       }
@@ -1245,10 +1245,10 @@ class SlicedEllpackSymmetricGraphDeviceDependentCode< Devices::Cuda >
 #ifdef HAVE_CUDA
          typedef SlicedEllpackSymmetricGraph< Real, Devices::Cuda, Index, SliceSize > Matrix;
          typedef typename Matrix::IndexType IndexType;
-         Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-         InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-         OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
-         dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+         Matrix* kernel_this = Cuda::passToDevice( matrix );
+         InVector* kernel_inVector = Cuda::passToDevice( inVector );
+         OutVector* kernel_outVector = Cuda::passToDevice( outVector );
+         dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
          for( IndexType color = 0; color < matrix.getNumberOfColors(); color++ )
          {
             IndexType offset = matrix.colorPointers.getElement( color ); //can be computed in kernel
@@ -1258,11 +1258,11 @@ class SlicedEllpackSymmetricGraphDeviceDependentCode< Devices::Cuda >
             //IndexType rows = matrix.colorPointers.getElement( color + 1 ) - matrix.colorPointers.getElement( color ) + inSliceIdx;
             // TODO: rows id undefined
             /*const IndexType cudaBlocks = roundUpDivision( rows, cudaBlockSize.x );
-            const IndexType cudaGrids = rondUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize );
+            const IndexType cudaGrids = rondUpDivision( cudaBlocks, Cuda::getMaxGridSize );
             for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
             {
                if( gridIdx == cudaGrids - 1 )
-                  cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+                  cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
                // TODO: this cannot be used here and i is undefined
                //IndexType offset = this->colorPointers[ i ];
                IndexType inSliceIdx = offset % SliceSize;
@@ -1277,9 +1277,9 @@ class SlicedEllpackSymmetricGraphDeviceDependentCode< Devices::Cuda >
                                                              sliceOffset );
             }*/
          }
-         Devices::Cuda::freeFromDevice( kernel_this );
-         Devices::Cuda::freeFromDevice( kernel_inVector );
-         Devices::Cuda::freeFromDevice( kernel_outVector );
+         Cuda::freeFromDevice( kernel_this );
+         Cuda::freeFromDevice( kernel_inVector );
+         Cuda::freeFromDevice( kernel_outVector );
          TNL_CHECK_CUDA_DEVICE;
 #endif
       }
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h
index edc645688..f53efdc0c 100644
--- a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h
+++ b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h
@@ -512,7 +512,7 @@ const SlicedEllpackSymmetric< Real, Devices::Cuda, Index, SliceSize >* matrix,
                                                        OutVector* outVector,
                                                        int gridIdx )
 {
-   int rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   int rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    matrix->spmvCuda( *inVector, *outVector, rowIdx );
 }
 #endif
@@ -784,7 +784,7 @@ __global__ void SlicedEllpackSymmetric_computeMaximalRowLengthInSlices_CudaKerne
                                                                                    typename SlicedEllpackSymmetric< Real, Devices::Cuda, Index, SliceSize >::ConstCompressedRowLengthsVectorView rowLengths,
                                                                                    int gridIdx )
 {
-   const Index sliceIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
+   const Index sliceIdx = gridIdx * Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
    matrix->computeMaximalRowLengthInSlicesCuda( rowLengths, sliceIdx );
 }
 #endif
@@ -843,21 +843,21 @@ class SlicedEllpackSymmetricDeviceDependentCode< Devices::Cuda >
 #ifdef HAVE_CUDA
          typedef SlicedEllpackSymmetric< Real, Device, Index, SliceSize > Matrix;
          typedef typename Matrix::RowLengthsVector CompressedRowLengthsVector;
-         Matrix* kernel_matrix = Devices::Cuda::passToDevice( matrix );
+         Matrix* kernel_matrix = Cuda::passToDevice( matrix );
          const Index numberOfSlices = roundUpDivision( matrix.getRows(), SliceSize );
-         dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+         dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
          const Index cudaBlocks = roundUpDivision( numberOfSlices, cudaBlockSize.x );
-         const Index cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+         const Index cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
          for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
          {
             if( gridIdx == cudaGrids - 1 )
-               cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+               cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
             SlicedEllpackSymmetric_computeMaximalRowLengthInSlices_CudaKernel< Real, Index, SliceSize ><<< cudaGridSize, cudaBlockSize >>>
                                                                              ( kernel_matrix,
                                                                                rowLengths,
                                                                                gridIdx );
          }
-         Devices::Cuda::freeFromDevice( kernel_matrix );
+         Cuda::freeFromDevice( kernel_matrix );
          TNL_CHECK_CUDA_DEVICE;
 #endif
       }
@@ -874,16 +874,16 @@ class SlicedEllpackSymmetricDeviceDependentCode< Devices::Cuda >
 #ifdef HAVE_CUDA
          typedef SlicedEllpackSymmetric< Real, Device, Index, SliceSize > Matrix;
          typedef typename Matrix::IndexType IndexType;
-         Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-         InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-         OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
-         dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+         Matrix* kernel_this = Cuda::passToDevice( matrix );
+         InVector* kernel_inVector = Cuda::passToDevice( inVector );
+         OutVector* kernel_outVector = Cuda::passToDevice( outVector );
+         dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
          const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-         const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+         const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
          for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
          {
             if( gridIdx == cudaGrids - 1 )
-               cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+               cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
             SlicedEllpackSymmetricVectorProductCudaKernel< Real, Index, SliceSize, InVector, OutVector >
                                                             <<< cudaGridSize, cudaBlockSize >>>
                                                               ( kernel_this,
@@ -891,9 +891,9 @@ class SlicedEllpackSymmetricDeviceDependentCode< Devices::Cuda >
                                                                 kernel_outVector,
                                                                 gridIdx );
          }
-         Devices::Cuda::freeFromDevice( kernel_this );
-         Devices::Cuda::freeFromDevice( kernel_inVector );
-         Devices::Cuda::freeFromDevice( kernel_outVector );
+         Cuda::freeFromDevice( kernel_this );
+         Cuda::freeFromDevice( kernel_inVector );
+         Cuda::freeFromDevice( kernel_outVector );
          TNL_CHECK_CUDA_DEVICE;
 #endif
       }
diff --git a/src/TNL/Matrices/SlicedEllpack_impl.h b/src/TNL/Matrices/SlicedEllpack_impl.h
index c6caa5639..9f5875f17 100644
--- a/src/TNL/Matrices/SlicedEllpack_impl.h
+++ b/src/TNL/Matrices/SlicedEllpack_impl.h
@@ -876,7 +876,7 @@ __global__ void SlicedEllpack_computeMaximalRowLengthInSlices_CudaKernel( Sliced
                                                                           typename SlicedEllpack< Real, Devices::Cuda, Index, SliceSize >::ConstCompressedRowLengthsVectorView rowLengths,
                                                                           int gridIdx )
 {
-   const Index sliceIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
+   const Index sliceIdx = gridIdx * Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
    matrix->computeMaximalRowLengthInSlicesCuda( rowLengths, sliceIdx );
 }
 #endif
@@ -899,7 +899,7 @@ __global__ void SlicedEllpackVectorProductCudaKernel(
    Real multiplicator,
    const Index gridIdx )
 {
-   const Index rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    if( rowIdx >= rows )
       return;
    const Index sliceIdx = rowIdx / SliceSize;
@@ -975,21 +975,21 @@ class SlicedEllpackDeviceDependentCode< Devices::Cuda >
 #ifdef HAVE_CUDA
          typedef SlicedEllpack< Real, Device, Index, SliceSize > Matrix;
          typedef typename Matrix::CompressedRowLengthsVector CompressedRowLengthsVector;
-         Matrix* kernel_matrix = Devices::Cuda::passToDevice( matrix );
+         Matrix* kernel_matrix = Cuda::passToDevice( matrix );
          const Index numberOfSlices = roundUpDivision( matrix.getRows(), SliceSize );
-         dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+         dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
          const Index cudaBlocks = roundUpDivision( numberOfSlices, cudaBlockSize.x );
-         const Index cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+         const Index cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
          for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
          {
             if( gridIdx == cudaGrids - 1 )
-               cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+               cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
             SlicedEllpack_computeMaximalRowLengthInSlices_CudaKernel< Real, Index, SliceSize ><<< cudaGridSize, cudaBlockSize >>>
                                                                              ( kernel_matrix,
                                                                                rowLengths,
                                                                                gridIdx );
          }
-         Devices::Cuda::freeFromDevice( kernel_matrix );
+         Cuda::freeFromDevice( kernel_matrix );
          TNL_CHECK_CUDA_DEVICE;
 #endif
          return true;
@@ -1009,16 +1009,16 @@ class SlicedEllpackDeviceDependentCode< Devices::Cuda >
          #ifdef HAVE_CUDA
             typedef SlicedEllpack< Real, Device, Index, SliceSize > Matrix;
             typedef typename Matrix::IndexType IndexType;
-            //Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-            //InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-            //OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
-            dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+            //Matrix* kernel_this = Cuda::passToDevice( matrix );
+            //InVector* kernel_inVector = Cuda::passToDevice( inVector );
+            //OutVector* kernel_outVector = Cuda::passToDevice( outVector );
+            dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
             const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-            const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+            const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
             for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
             {
                if( gridIdx == cudaGrids - 1 )
-                  cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+                  cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
                SlicedEllpackVectorProductCudaKernel
                < Real, Index, SliceSize >
                 <<< cudaGridSize, cudaBlockSize >>>
@@ -1035,9 +1035,9 @@ class SlicedEllpackDeviceDependentCode< Devices::Cuda >
                   gridIdx );
                TNL_CHECK_CUDA_DEVICE;
             }
-            //Devices::Cuda::freeFromDevice( kernel_this );
-            //Devices::Cuda::freeFromDevice( kernel_inVector );
-            //Devices::Cuda::freeFromDevice( kernel_outVector );
+            //Cuda::freeFromDevice( kernel_this );
+            //Cuda::freeFromDevice( kernel_inVector );
+            //Cuda::freeFromDevice( kernel_outVector );
             TNL_CHECK_CUDA_DEVICE;
             cudaDeviceSynchronize();
          #endif
diff --git a/src/TNL/Matrices/SparseOperations_impl.h b/src/TNL/Matrices/SparseOperations_impl.h
index ccc8930f9..b6d118bf2 100644
--- a/src/TNL/Matrices/SparseOperations_impl.h
+++ b/src/TNL/Matrices/SparseOperations_impl.h
@@ -130,8 +130,8 @@ copySparseMatrix_impl( Matrix1& A, const Matrix2& B )
 #ifdef HAVE_CUDA
       dim3 blockSize( 256 );
       dim3 gridSize;
-      const IndexType desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
-      gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( rows, blockSize.x ) );
+      const IndexType desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() );
+      gridSize.x = min( desGridSize, Cuda::getNumberOfBlocks( rows, blockSize.x ) );
 
       typename Matrix1::CompressedRowLengthsVector rowLengths;
       rowLengths.setSize( rows );
diff --git a/src/TNL/Matrices/SparseRow.h b/src/TNL/Matrices/SparseRow.h
index c7ebd0703..f66cd2cea 100644
--- a/src/TNL/Matrices/SparseRow.h
+++ b/src/TNL/Matrices/SparseRow.h
@@ -14,7 +14,7 @@
 #include <type_traits>
 #include <ostream>
 
-#include <TNL/Devices/CudaCallable.h>
+#include <TNL/Cuda/CudaCallable.h>
 
 namespace TNL {
 namespace Matrices {
diff --git a/src/TNL/Matrices/Tridiagonal_impl.h b/src/TNL/Matrices/Tridiagonal_impl.h
index 78e798c2b..62575f177 100644
--- a/src/TNL/Matrices/Tridiagonal_impl.h
+++ b/src/TNL/Matrices/Tridiagonal_impl.h
@@ -452,7 +452,7 @@ __global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, De
                                                              const Real matrixMultiplicator,
                                                              const Index gridIdx )
 {
-   const Index rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    if( rowIdx < inMatrix->getRows() )
    {
       if( rowIdx > 0 )
@@ -494,24 +494,24 @@ void Tridiagonal< Real, Device, Index >::getTransposition( const Tridiagonal< Re
    if( std::is_same< Device, Devices::Cuda >::value )
    {
 #ifdef HAVE_CUDA
-      Tridiagonal* kernel_this = Devices::Cuda::passToDevice( *this );
+      Tridiagonal* kernel_this = Cuda::passToDevice( *this );
       typedef  Tridiagonal< Real2, Device, Index2 > InMatrixType;
-      InMatrixType* kernel_inMatrix = Devices::Cuda::passToDevice( matrix );
-      dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+      InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix );
+      dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
       const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-      const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+      const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
       for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
       {
          if( gridIdx == cudaGrids - 1 )
-            cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+            cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
          TridiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>>
                                                     ( kernel_inMatrix,
                                                       kernel_this,
                                                       matrixMultiplicator,
                                                       gridIdx );
       }
-      Devices::Cuda::freeFromDevice( kernel_this );
-      Devices::Cuda::freeFromDevice( kernel_inMatrix );
+      Cuda::freeFromDevice( kernel_this );
+      Cuda::freeFromDevice( kernel_inMatrix );
       TNL_CHECK_CUDA_DEVICE;
 #endif
    }
diff --git a/src/TNL/Meshes/Geometry/getEntityCenter.h b/src/TNL/Meshes/Geometry/getEntityCenter.h
index 59cd950ca..a37c27acf 100644
--- a/src/TNL/Meshes/Geometry/getEntityCenter.h
+++ b/src/TNL/Meshes/Geometry/getEntityCenter.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Devices/CudaCallable.h>
+#include <TNL/Cuda/CudaCallable.h>
 #include <TNL/Meshes/GridEntity.h>
 #include <TNL/Meshes/Mesh.h>
 #include <TNL/Meshes/MeshEntity.h>
diff --git a/src/TNL/Meshes/Geometry/getEntityMeasure.h b/src/TNL/Meshes/Geometry/getEntityMeasure.h
index 7402e4f6d..a3381ed96 100644
--- a/src/TNL/Meshes/Geometry/getEntityMeasure.h
+++ b/src/TNL/Meshes/Geometry/getEntityMeasure.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Devices/CudaCallable.h>
+#include <TNL/Cuda/CudaCallable.h>
 #include <TNL/Meshes/GridEntity.h>
 #include <TNL/Meshes/Mesh.h>
 #include <TNL/Meshes/MeshEntity.h>
diff --git a/src/TNL/Meshes/GridDetails/GridTraverser.h b/src/TNL/Meshes/GridDetails/GridTraverser.h
index 7ce106f5d..e8702153f 100644
--- a/src/TNL/Meshes/GridDetails/GridTraverser.h
+++ b/src/TNL/Meshes/GridDetails/GridTraverser.h
@@ -12,7 +12,6 @@
 
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Pointers/SharedPointer.h>
-#include <TNL/CudaStreamPool.h>
 
 namespace TNL {
 namespace Meshes {
diff --git a/src/TNL/Meshes/GridDetails/GridTraverser_1D.hpp b/src/TNL/Meshes/GridDetails/GridTraverser_1D.hpp
index 533708538..796ffe491 100644
--- a/src/TNL/Meshes/GridDetails/GridTraverser_1D.hpp
+++ b/src/TNL/Meshes/GridDetails/GridTraverser_1D.hpp
@@ -16,7 +16,7 @@
 
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Pointers/SharedPointer.h>
-#include <TNL/CudaStreamPool.h>
+#include <TNL/Cuda/StreamPool.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 #include <TNL/Meshes/GridDetails/GridTraverser.h>
 #include <TNL/Exceptions/NotImplementedError.h>
@@ -120,7 +120,7 @@ GridTraverser1D(
    typedef Meshes::Grid< 1, Real, Devices::Cuda, Index > GridType;
    typename GridType::CoordinatesType coordinates;
  
-   coordinates.x() = begin.x() + ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   coordinates.x() = begin.x() + ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    if( coordinates <= end )
    {   
       GridEntity entity( *grid, coordinates );
@@ -182,7 +182,7 @@ processEntities(
    const int& stream )
 {
 #ifdef HAVE_CUDA
-   auto& pool = CudaStreamPool::getInstance();
+   auto& pool = Cuda::StreamPool::getInstance();
    const cudaStream_t& s = pool.getStream( stream );
 
    Devices::Cuda::synchronizeDevice();
@@ -200,7 +200,7 @@ processEntities(
    else
    {
       dim3 blockSize( 256 ), blocksCount, gridsCount;
-      Devices::Cuda::setupThreads(
+      Cuda::setupThreads(
          blockSize,
          blocksCount,
          gridsCount,
@@ -209,7 +209,7 @@ processEntities(
       for( gridIdx.x = 0; gridIdx.x < gridsCount.x; gridIdx.x++ )
       {
          dim3 gridSize;
-         Devices::Cuda::setupGrid(
+         Cuda::setupGrid(
             blocksCount,
             gridsCount,
             gridIdx,
@@ -225,8 +225,8 @@ processEntities(
 
       /*dim3 cudaBlockSize( 256 );
       dim3 cudaBlocks;
-      cudaBlocks.x = Devices::Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x );
-      const IndexType cudaXGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.x );
+      cudaBlocks.x = Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x );
+      const IndexType cudaXGrids = Cuda::getNumberOfGrids( cudaBlocks.x );
 
       for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
          GridTraverser1D< Real, Index, GridEntity, UserData, EntitiesProcessor >
diff --git a/src/TNL/Meshes/GridDetails/GridTraverser_2D.hpp b/src/TNL/Meshes/GridDetails/GridTraverser_2D.hpp
index 3efdb478f..15c5a0eda 100644
--- a/src/TNL/Meshes/GridDetails/GridTraverser_2D.hpp
+++ b/src/TNL/Meshes/GridDetails/GridTraverser_2D.hpp
@@ -12,7 +12,7 @@
 
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Pointers/SharedPointer.h>
-#include <TNL/CudaStreamPool.h>
+#include <TNL/Cuda/StreamPool.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 #include <TNL/Meshes/GridDetails/GridTraverser.h>
 
@@ -148,8 +148,8 @@ GridTraverser2D(
    typedef Meshes::Grid< 2, Real, Devices::Cuda, Index > GridType;
    typename GridType::CoordinatesType coordinates;
 
-   coordinates.x() = begin.x() + Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
-   coordinates.y() = begin.y() + Devices::Cuda::getGlobalThreadIdx_y( gridIdx );
+   coordinates.x() = begin.x() + Cuda::getGlobalThreadIdx_x( gridIdx );
+   coordinates.y() = begin.y() + Cuda::getGlobalThreadIdx_y( gridIdx );
    
    if( coordinates <= end )
    {
@@ -186,7 +186,7 @@ GridTraverser2DBoundaryAlongX(
    typedef Meshes::Grid< 2, Real, Devices::Cuda, Index > GridType;
    typename GridType::CoordinatesType coordinates;
 
-   coordinates.x() = beginX + Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
+   coordinates.x() = beginX + Cuda::getGlobalThreadIdx_x( gridIdx );
    coordinates.y() = fixedY;
    
    if( coordinates.x() <= endX )
@@ -222,7 +222,7 @@ GridTraverser2DBoundaryAlongY(
    typename GridType::CoordinatesType coordinates;
 
    coordinates.x() = fixedX;
-   coordinates.y() = beginY + Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
+   coordinates.y() = beginY + Cuda::getGlobalThreadIdx_x( gridIdx );
    
    if( coordinates.y() <= endY )
    {
@@ -291,10 +291,10 @@ GridTraverser2DBoundary(
    
    
    /*const Index aux = max( entitiesAlongX, entitiesAlongY );
-   const Index& warpSize = Devices::Cuda::getWarpSize();
+   const Index& warpSize = Cuda::getWarpSize();
    const Index threadsPerAxis = warpSize * ( aux / warpSize + ( aux % warpSize != 0 ) );
    
-   Index threadId = Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
+   Index threadId = Cuda::getGlobalThreadIdx_x( gridIdx );
    GridEntity entity( *grid, 
          CoordinatesType( 0, 0 ),
          gridEntityParameters... );
@@ -414,10 +414,10 @@ processEntities(
       dim3 cudaBlockSize( 256 );
       dim3 cudaBlocksCountAlongX, cudaGridsCountAlongX,
            cudaBlocksCountAlongY, cudaGridsCountAlongY;
-      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongX, cudaGridsCountAlongX, end.x() - begin.x() + 1 );
-      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongY, cudaGridsCountAlongY, end.y() - begin.y() - 1 );
+      Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongX, cudaGridsCountAlongX, end.x() - begin.x() + 1 );
+      Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongY, cudaGridsCountAlongY, end.y() - begin.y() - 1 );
             
-      auto& pool = CudaStreamPool::getInstance();
+      auto& pool = Cuda::StreamPool::getInstance();
       Devices::Cuda::synchronizeDevice();
       
       const cudaStream_t& s1 = pool.getStream( stream );
@@ -425,8 +425,8 @@ processEntities(
       dim3 gridIdx, cudaGridSize;
       for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongX.x; gridIdx.x++ )
       {
-         Devices::Cuda::setupGrid( cudaBlocksCountAlongX, cudaGridsCountAlongX, gridIdx, cudaGridSize );
-         //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCountAlongX, cudaGridSize, cudaGridsCountAlongX );
+         Cuda::setupGrid( cudaBlocksCountAlongX, cudaGridsCountAlongX, gridIdx, cudaGridSize );
+         //Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCountAlongX, cudaGridSize, cudaGridsCountAlongX );
          GridTraverser2DBoundaryAlongX< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                <<< cudaGridSize, cudaBlockSize, 0, s1 >>>
                ( &gridPointer.template getData< Devices::Cuda >(),
@@ -450,7 +450,7 @@ processEntities(
       const cudaStream_t& s4 = pool.getStream( stream + 3 );
       for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongY.x; gridIdx.x++ )
       {
-         Devices::Cuda::setupGrid( cudaBlocksCountAlongY, cudaGridsCountAlongY, gridIdx, cudaGridSize );
+         Cuda::setupGrid( cudaBlocksCountAlongY, cudaGridsCountAlongY, gridIdx, cudaGridSize );
          GridTraverser2DBoundaryAlongY< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                <<< cudaGridSize, cudaBlockSize, 0, s3 >>>
                ( &gridPointer.template getData< Devices::Cuda >(),
@@ -482,15 +482,15 @@ processEntities(
       const IndexType maxFaceSize = max( entitiesAlongX, entitiesAlongY );
       const IndexType blocksPerFace = maxFaceSize / cudaBlockSize.x + ( maxFaceSize % cudaBlockSize.x != 0 );
       IndexType cudaThreadsCount = 4 * cudaBlockSize.x * blocksPerFace;
-      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, cudaThreadsCount );
+      Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, cudaThreadsCount );
       //std::cerr << "blocksPerFace = " << blocksPerFace << "Threads count = " << cudaThreadsCount 
       //          << "cudaBlockCount = " << cudaBlocksCount.x << std::endl;      
       dim3 gridIdx, cudaGridSize;
       Devices::Cuda::synchronizeDevice();
       for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x++ )
       {
-         Devices::Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
-         //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCountAlongX, cudaGridSize, cudaGridsCountAlongX );
+         Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
+         //Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCountAlongX, cudaGridSize, cudaGridsCountAlongX );
          GridTraverser2DBoundary< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                <<< cudaGridSize, cudaBlockSize >>>
                ( &gridPointer.template getData< Devices::Cuda >(),
@@ -511,11 +511,11 @@ processEntities(
    {
       dim3 cudaBlockSize( 16, 16 );
       dim3 cudaBlocksCount, cudaGridsCount;
-      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount,
-                                   end.x() - begin.x() + 1,
-                                   end.y() - begin.y() + 1 );
+      Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount,
+                          end.x() - begin.x() + 1,
+                          end.y() - begin.y() + 1 );
       
-      auto& pool = CudaStreamPool::getInstance();
+      auto& pool = Cuda::StreamPool::getInstance();
       const cudaStream_t& s = pool.getStream( stream );
 
       Devices::Cuda::synchronizeDevice();
@@ -523,8 +523,8 @@ processEntities(
       for( gridIdx.y = 0; gridIdx.y < cudaGridsCount.y; gridIdx.y ++ )
          for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x ++ )
          {
-            Devices::Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
-	    //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCount, cudaGridSize, cudaGridsCount );
+            Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
+	    //Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCount, cudaGridSize, cudaGridsCount );
             GridTraverser2D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                <<< cudaGridSize, cudaBlockSize, 0, s >>>
                ( &gridPointer.template getData< Devices::Cuda >(),
diff --git a/src/TNL/Meshes/GridDetails/GridTraverser_3D.hpp b/src/TNL/Meshes/GridDetails/GridTraverser_3D.hpp
index 24200c15d..489e58d77 100644
--- a/src/TNL/Meshes/GridDetails/GridTraverser_3D.hpp
+++ b/src/TNL/Meshes/GridDetails/GridTraverser_3D.hpp
@@ -12,7 +12,7 @@
 
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Pointers/SharedPointer.h>
-#include <TNL/CudaStreamPool.h>
+#include <TNL/Cuda/StreamPool.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 #include <TNL/Meshes/GridDetails/GridTraverser.h>
 #include <TNL/Exceptions/NotImplementedError.h>
@@ -177,9 +177,9 @@ GridTraverser3D(
    typedef Meshes::Grid< 3, Real, Devices::Cuda, Index > GridType;
    typename GridType::CoordinatesType coordinates;
 
-   coordinates.x() = begin.x() + Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
-   coordinates.y() = begin.y() + Devices::Cuda::getGlobalThreadIdx_y( gridIdx );
-   coordinates.z() = begin.z() + Devices::Cuda::getGlobalThreadIdx_z( gridIdx );
+   coordinates.x() = begin.x() + Cuda::getGlobalThreadIdx_x( gridIdx );
+   coordinates.y() = begin.y() + Cuda::getGlobalThreadIdx_y( gridIdx );
+   coordinates.z() = begin.z() + Cuda::getGlobalThreadIdx_z( gridIdx );
 
    if( coordinates <= end )
    {
@@ -217,8 +217,8 @@ GridTraverser3DBoundaryAlongXY(
    typedef Meshes::Grid< 3, Real, Devices::Cuda, Index > GridType;
    typename GridType::CoordinatesType coordinates;
 
-   coordinates.x() = beginX + Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
-   coordinates.y() = beginY + Devices::Cuda::getGlobalThreadIdx_y( gridIdx );
+   coordinates.x() = beginX + Cuda::getGlobalThreadIdx_x( gridIdx );
+   coordinates.y() = beginY + Cuda::getGlobalThreadIdx_y( gridIdx );
    coordinates.z() = fixedZ;  
    
    if( coordinates.x() <= endX && coordinates.y() <= endY )
@@ -254,9 +254,9 @@ GridTraverser3DBoundaryAlongXZ(
    typedef Meshes::Grid< 3, Real, Devices::Cuda, Index > GridType;
    typename GridType::CoordinatesType coordinates;
 
-   coordinates.x() = beginX + Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
+   coordinates.x() = beginX + Cuda::getGlobalThreadIdx_x( gridIdx );
    coordinates.y() = fixedY;
-   coordinates.z() = beginZ + Devices::Cuda::getGlobalThreadIdx_y( gridIdx );
+   coordinates.z() = beginZ + Cuda::getGlobalThreadIdx_y( gridIdx );
    
    if( coordinates.x() <= endX && coordinates.z() <= endZ )
    {
@@ -292,8 +292,8 @@ GridTraverser3DBoundaryAlongYZ(
    typename GridType::CoordinatesType coordinates;
 
    coordinates.x() = fixedX;
-   coordinates.y() = beginY + Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
-   coordinates.z() = beginZ + Devices::Cuda::getGlobalThreadIdx_y( gridIdx );
+   coordinates.y() = beginY + Cuda::getGlobalThreadIdx_x( gridIdx );
+   coordinates.z() = beginZ + Cuda::getGlobalThreadIdx_y( gridIdx );
    
    if( coordinates.y() <= endY && coordinates.z() <= endZ )
    {
@@ -341,11 +341,11 @@ processEntities(
       dim3 cudaBlocksCountAlongXY, cudaBlocksCountAlongXZ, cudaBlocksCountAlongYZ,
            cudaGridsCountAlongXY, cudaGridsCountAlongXZ, cudaGridsCountAlongYZ;
       
-      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongXY, cudaGridsCountAlongXY, entitiesAlongX, entitiesAlongY );
-      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongXZ, cudaGridsCountAlongXZ, entitiesAlongX, entitiesAlongZ - 2 );
-      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongYZ, cudaGridsCountAlongYZ, entitiesAlongY - 2, entitiesAlongZ - 2 );
+      Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongXY, cudaGridsCountAlongXY, entitiesAlongX, entitiesAlongY );
+      Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongXZ, cudaGridsCountAlongXZ, entitiesAlongX, entitiesAlongZ - 2 );
+      Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongYZ, cudaGridsCountAlongYZ, entitiesAlongY - 2, entitiesAlongZ - 2 );
 
-      auto& pool = CudaStreamPool::getInstance();
+      auto& pool = Cuda::StreamPool::getInstance();
       Devices::Cuda::synchronizeDevice();
       
       const cudaStream_t& s1 = pool.getStream( stream );
@@ -359,7 +359,7 @@ processEntities(
       for( gridIdx.y = 0; gridIdx.y < cudaGridsCountAlongXY.y; gridIdx.y++ )
          for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongXY.x; gridIdx.x++ )
          {
-            Devices::Cuda::setupGrid( cudaBlocksCountAlongXY, cudaGridsCountAlongXY, gridIdx, gridSize );
+            Cuda::setupGrid( cudaBlocksCountAlongXY, cudaGridsCountAlongXY, gridIdx, gridSize );
             GridTraverser3DBoundaryAlongXY< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                   <<< cudaBlocksCountAlongXY, cudaBlockSize, 0 , s1 >>>
                   ( &gridPointer.template getData< Devices::Cuda >(),
@@ -386,7 +386,7 @@ processEntities(
       for( gridIdx.y = 0; gridIdx.y < cudaGridsCountAlongXZ.y; gridIdx.y++ )
          for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongXZ.x; gridIdx.x++ )
          {
-            Devices::Cuda::setupGrid( cudaBlocksCountAlongXZ, cudaGridsCountAlongXZ, gridIdx, gridSize );
+            Cuda::setupGrid( cudaBlocksCountAlongXZ, cudaGridsCountAlongXZ, gridIdx, gridSize );
             GridTraverser3DBoundaryAlongXZ< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                   <<< cudaBlocksCountAlongXZ, cudaBlockSize, 0, s3 >>>
                   ( &gridPointer.template getData< Devices::Cuda >(),
@@ -413,7 +413,7 @@ processEntities(
       for( gridIdx.y = 0; gridIdx.y < cudaGridsCountAlongYZ.y; gridIdx.y++ )
          for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongYZ.x; gridIdx.x++ )
          {
-            Devices::Cuda::setupGrid( cudaBlocksCountAlongYZ, cudaGridsCountAlongYZ, gridIdx, gridSize );
+            Cuda::setupGrid( cudaBlocksCountAlongYZ, cudaGridsCountAlongYZ, gridIdx, gridSize );
             GridTraverser3DBoundaryAlongYZ< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                   <<< cudaBlocksCountAlongYZ, cudaBlockSize, 0, s5 >>>
                   ( &gridPointer.template getData< Devices::Cuda >(),
@@ -450,12 +450,12 @@ processEntities(
       dim3 cudaBlockSize( 8, 8, 8 );
       dim3 cudaBlocksCount, cudaGridsCount;
       
-      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount,
-                                   end.x() - begin.x() + 1,
-                                   end.y() - begin.y() + 1,
-                                   end.z() - begin.z() + 1 );
+      Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount,
+                          end.x() - begin.x() + 1,
+                          end.y() - begin.y() + 1,
+                          end.z() - begin.z() + 1 );
 
-      auto& pool = CudaStreamPool::getInstance();
+      auto& pool = Cuda::StreamPool::getInstance();
       const cudaStream_t& s = pool.getStream( stream );
 
       Devices::Cuda::synchronizeDevice();
@@ -464,7 +464,7 @@ processEntities(
          for( gridIdx.y = 0; gridIdx.y < cudaGridsCount.y; gridIdx.y ++ )
             for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x ++ )
             {
-               Devices::Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, gridSize );
+               Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, gridSize );
                GridTraverser3D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                   <<< gridSize, cudaBlockSize, 0, s >>>
                   ( &gridPointer.template getData< Devices::Cuda >(),
diff --git a/src/TNL/Meshes/GridDetails/NeighborGridEntitiesStorage.h b/src/TNL/Meshes/GridDetails/NeighborGridEntitiesStorage.h
index dd9562add..29fe8ffd6 100644
--- a/src/TNL/Meshes/GridDetails/NeighborGridEntitiesStorage.h
+++ b/src/TNL/Meshes/GridDetails/NeighborGridEntitiesStorage.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/CudaCallable.h>
 #include <TNL/Meshes/DimensionTag.h>
 #include <TNL/Meshes/GridEntityConfig.h>
 #include <TNL/Meshes/GridDetails/NeighborGridEntityGetter.h>
diff --git a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter.h b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter.h
index 84a9c56d9..f7a3cc180 100644
--- a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter.h
+++ b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter.h
@@ -11,7 +11,7 @@
 #pragma once
 
 #include <TNL/Assert.h>
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/CudaCallable.h>
 #include <TNL/Meshes/GridEntityConfig.h>
 
 namespace TNL {
diff --git a/src/TNL/Meshes/MeshDetails/MeshEntityIndex.h b/src/TNL/Meshes/MeshDetails/MeshEntityIndex.h
index 64485dc3c..110fa9eef 100644
--- a/src/TNL/Meshes/MeshDetails/MeshEntityIndex.h
+++ b/src/TNL/Meshes/MeshDetails/MeshEntityIndex.h
@@ -17,7 +17,7 @@
 #pragma once
 
 #include <TNL/Assert.h>
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/CudaCallable.h>
 
 namespace TNL {
 namespace Meshes {
diff --git a/src/TNL/Meshes/MeshDetails/Traverser_impl.h b/src/TNL/Meshes/MeshDetails/Traverser_impl.h
index 5dedf58fd..27cbba714 100644
--- a/src/TNL/Meshes/MeshDetails/Traverser_impl.h
+++ b/src/TNL/Meshes/MeshDetails/Traverser_impl.h
@@ -13,6 +13,8 @@
 #include <TNL/Meshes/Traverser.h>
 
 #include <TNL/Exceptions/CudaSupportMissing.h>
+#include <TNL/Cuda/DeviceInfo.h>
+#include <TNL/Cuda/LaunchHelpers.h>
 
 namespace TNL {
 namespace Meshes {
@@ -159,8 +161,8 @@ processBoundaryEntities( const MeshPointer& meshPointer,
 
    dim3 blockSize( 256 );
    dim3 gridSize;
-   const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
-   gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( entitiesCount, blockSize.x ) );
+   const int desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() );
+   gridSize.x = min( desGridSize, Cuda::getNumberOfBlocks( entitiesCount, blockSize.x ) );
 
    Devices::Cuda::synchronizeDevice();
    MeshTraverserBoundaryEntitiesKernel< EntitiesDimension, EntitiesProcessor >
@@ -190,8 +192,8 @@ processInteriorEntities( const MeshPointer& meshPointer,
 
    dim3 blockSize( 256 );
    dim3 gridSize;
-   const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
-   gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( entitiesCount, blockSize.x ) );
+   const int desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() );
+   gridSize.x = min( desGridSize, Cuda::getNumberOfBlocks( entitiesCount, blockSize.x ) );
 
    Devices::Cuda::synchronizeDevice();
    MeshTraverserInteriorEntitiesKernel< EntitiesDimension, EntitiesProcessor >
@@ -221,8 +223,8 @@ processAllEntities( const MeshPointer& meshPointer,
 
    dim3 blockSize( 256 );
    dim3 gridSize;
-   const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
-   gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( entitiesCount, blockSize.x ) );
+   const int desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() );
+   gridSize.x = min( desGridSize, Cuda::getNumberOfBlocks( entitiesCount, blockSize.x ) );
 
    Devices::Cuda::synchronizeDevice();
    MeshTraverserAllEntitiesKernel< EntitiesDimension, EntitiesProcessor >
diff --git a/src/TNL/Object.h b/src/TNL/Object.h
index ba4151095..ff7432635 100644
--- a/src/TNL/Object.h
+++ b/src/TNL/Object.h
@@ -12,7 +12,6 @@
 
 #include <vector>
 
-#include <TNL/Devices/CudaCallable.h>
 #include <TNL/String.h>
 #include <TNL/File.h>
 
diff --git a/src/TNL/ParallelFor.h b/src/TNL/ParallelFor.h
index 04af27408..cc9ce7080 100644
--- a/src/TNL/ParallelFor.h
+++ b/src/TNL/ParallelFor.h
@@ -12,7 +12,9 @@
 
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/Devices/CudaDeviceInfo.h>
+#include <TNL/Cuda/CheckDevice.h>
+#include <TNL/Cuda/DeviceInfo.h>
+#include <TNL/Cuda/LaunchHelpers.h>
 #include <TNL/Math.h>
 
 /****
@@ -203,14 +205,14 @@ struct ParallelFor< Devices::Cuda, Mode >
       if( end > start ) {
          dim3 blockSize( 256 );
          dim3 gridSize;
-         gridSize.x = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( end - start, blockSize.x ) );
+         gridSize.x = TNL::min( Cuda::getMaxGridSize(), Cuda::getNumberOfBlocks( end - start, blockSize.x ) );
 
          if( (std::size_t) blockSize.x * gridSize.x >= (std::size_t) end - start )
             ParallelForKernel< false ><<< gridSize, blockSize >>>( start, end, f, args... );
          else {
             // decrease the grid size and align to the number of multiprocessors
-            const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
-            gridSize.x = TNL::min( desGridSize, Devices::Cuda::getNumberOfBlocks( end - start, blockSize.x ) );
+            const int desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() );
+            gridSize.x = TNL::min( desGridSize, Cuda::getNumberOfBlocks( end - start, blockSize.x ) );
             ParallelForKernel< true ><<< gridSize, blockSize >>>( start, end, f, args... );
          }
 
@@ -253,8 +255,8 @@ struct ParallelFor2D< Devices::Cuda, Mode >
             blockSize.y = TNL::min( 8, sizeY );
          }
          dim3 gridSize;
-         gridSize.x = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( sizeX, blockSize.x ) );
-         gridSize.y = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( sizeY, blockSize.y ) );
+         gridSize.x = TNL::min( Cuda::getMaxGridSize(), Cuda::getNumberOfBlocks( sizeX, blockSize.x ) );
+         gridSize.y = TNL::min( Cuda::getMaxGridSize(), Cuda::getNumberOfBlocks( sizeY, blockSize.y ) );
 
          dim3 gridCount;
          gridCount.x = roundUpDivision( sizeX, blockSize.x * gridSize.x );
@@ -337,9 +339,9 @@ struct ParallelFor3D< Devices::Cuda, Mode >
             blockSize.z = TNL::min( 4, sizeZ );
          }
          dim3 gridSize;
-         gridSize.x = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( sizeX, blockSize.x ) );
-         gridSize.y = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( sizeY, blockSize.y ) );
-         gridSize.z = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( sizeZ, blockSize.z ) );
+         gridSize.x = TNL::min( Cuda::getMaxGridSize(), Cuda::getNumberOfBlocks( sizeX, blockSize.x ) );
+         gridSize.y = TNL::min( Cuda::getMaxGridSize(), Cuda::getNumberOfBlocks( sizeY, blockSize.y ) );
+         gridSize.z = TNL::min( Cuda::getMaxGridSize(), Cuda::getNumberOfBlocks( sizeZ, blockSize.z ) );
 
          dim3 gridCount;
          gridCount.x = roundUpDivision( sizeX, blockSize.x * gridSize.x );
diff --git a/src/TNL/Pointers/DevicePointer.h b/src/TNL/Pointers/DevicePointer.h
index 136c809cd..d3bf9b07f 100644
--- a/src/TNL/Pointers/DevicePointer.h
+++ b/src/TNL/Pointers/DevicePointer.h
@@ -17,6 +17,7 @@
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Pointers/SmartPointer.h>
 #include <TNL/TypeInfo.h>
+#include <TNL/Cuda/MemoryHelpers.h>
 
 #include <cstring>  // std::memcpy, std::memcmp
 
@@ -422,7 +423,7 @@ class DevicePointer< Object, Devices::Cuda > : public SmartPointer
          this->pointer = &obj;
          this->pd = new PointerData();
          // pass to device
-         this->cuda_pointer = Devices::Cuda::passToDevice( *this->pointer );
+         this->cuda_pointer = Cuda::passToDevice( *this->pointer );
          // set last-sync state
          this->set_last_sync_state();
          Devices::Cuda::insertSmartPointer( this );
@@ -456,7 +457,7 @@ class DevicePointer< Object, Devices::Cuda > : public SmartPointer
                delete this->pd;
                this->pd = nullptr;
                if( this->cuda_pointer )
-                  Devices::Cuda::freeFromDevice( this->cuda_pointer );
+                  Cuda::freeFromDevice( this->cuda_pointer );
             }
          }
       }
diff --git a/src/TNL/Pointers/SharedPointerCuda.h b/src/TNL/Pointers/SharedPointerCuda.h
index 9c883c23a..27a4aabef 100644
--- a/src/TNL/Pointers/SharedPointerCuda.h
+++ b/src/TNL/Pointers/SharedPointerCuda.h
@@ -16,6 +16,7 @@
 
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Pointers/SmartPointer.h>
+#include <TNL/Cuda/MemoryHelpers.h>
 
 #include <cstring>   // std::memcpy, std::memcmp
 #include <cstddef>   // std::nullptr_t
@@ -570,7 +571,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
       {
          this->pd = new PointerData( args... );
          // pass to device
-         this->cuda_pointer = Devices::Cuda::passToDevice( this->pd->data );
+         this->cuda_pointer = Cuda::passToDevice( this->pd->data );
          // set last-sync state
          this->set_last_sync_state();
 #ifdef TNL_DEBUG_SHARED_POINTERS
@@ -608,7 +609,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
                delete this->pd;
                this->pd = nullptr;
                if( this->cuda_pointer )
-                  Devices::Cuda::freeFromDevice( this->cuda_pointer );
+                  Cuda::freeFromDevice( this->cuda_pointer );
 #ifdef TNL_DEBUG_SHARED_POINTERS
                std::cerr << "...deleted data." << std::endl;
 #endif
diff --git a/src/TNL/Pointers/SharedPointerHost.h b/src/TNL/Pointers/SharedPointerHost.h
index 087cfd79e..39a6d4da4 100644
--- a/src/TNL/Pointers/SharedPointerHost.h
+++ b/src/TNL/Pointers/SharedPointerHost.h
@@ -15,7 +15,7 @@
 #include "SharedPointer.h"
 
 #include <TNL/Devices/Host.h>
-#include <TNL/Devices/CudaCallable.h>
+#include <TNL/Cuda/CudaCallable.h>
 #include <TNL/Pointers/SmartPointer.h>
 
 #include <cstddef>   // std::nullptr_t
diff --git a/src/TNL/Pointers/UniquePointer.h b/src/TNL/Pointers/UniquePointer.h
index 53f2dac5b..9a7a2b677 100644
--- a/src/TNL/Pointers/UniquePointer.h
+++ b/src/TNL/Pointers/UniquePointer.h
@@ -16,6 +16,7 @@
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Pointers/SmartPointer.h>
+#include <TNL/Cuda/MemoryHelpers.h>
 
 #include <cstring>  // std::memcpy, std::memcmp
 #include <cstddef>  // std::nullptr_t
@@ -272,7 +273,7 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer
       {
          this->pd = new PointerData( args... );
          // pass to device
-         this->cuda_pointer = Devices::Cuda::passToDevice( this->pd->data );
+         this->cuda_pointer = Cuda::passToDevice( this->pd->data );
          // set last-sync state
          this->set_last_sync_state();
          Devices::Cuda::insertSmartPointer( this );
@@ -300,7 +301,7 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer
          if( this->pd )
             delete this->pd;
          if( this->cuda_pointer )
-            Devices::Cuda::freeFromDevice( this->cuda_pointer );
+            Cuda::freeFromDevice( this->cuda_pointer );
       }
 
       PointerData* pd;
diff --git a/src/TNL/Solvers/PDE/BoundaryConditionsSetter.h b/src/TNL/Solvers/PDE/BoundaryConditionsSetter.h
index a28a64cf5..d1b871c25 100644
--- a/src/TNL/Solvers/PDE/BoundaryConditionsSetter.h
+++ b/src/TNL/Solvers/PDE/BoundaryConditionsSetter.h
@@ -11,7 +11,7 @@
 
 #pragma once
 
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/CudaCallable.h>
 #include <TNL/Functions/FunctionAdapter.h>
 #include <TNL/Pointers/SharedPointer.h>
 #include <TNL/Meshes/Traverser.h>
diff --git a/src/TNL/StaticFor.h b/src/TNL/StaticFor.h
index 990036dfc..c37763aaa 100644
--- a/src/TNL/StaticFor.h
+++ b/src/TNL/StaticFor.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/CudaCallable.h>
 
 namespace TNL {
 
diff --git a/src/TNL/TemplateStaticFor.h b/src/TNL/TemplateStaticFor.h
index 88ad764fd..efd9d1ad9 100644
--- a/src/TNL/TemplateStaticFor.h
+++ b/src/TNL/TemplateStaticFor.h
@@ -13,7 +13,7 @@
 #include <utility>
 #include <type_traits>
 
-#include <TNL/Devices/CudaCallable.h>
+#include <TNL/Cuda/CudaCallable.h>
 
 namespace TNL {
 namespace detail {
diff --git a/src/UnitTests/AssertCudaTest.cu b/src/UnitTests/AssertCudaTest.cu
index 9d4865eb9..8f42da677 100644
--- a/src/UnitTests/AssertCudaTest.cu
+++ b/src/UnitTests/AssertCudaTest.cu
@@ -13,7 +13,7 @@
 #endif
 
 #include <TNL/Assert.h>
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/CheckDevice.h>
 #include <TNL/Exceptions/CudaRuntimeError.h>
 
 #ifdef HAVE_GTEST
diff --git a/src/UnitTests/Containers/ArrayTest.h b/src/UnitTests/Containers/ArrayTest.h
index 25c7fda49..69dd2d252 100644
--- a/src/UnitTests/Containers/ArrayTest.h
+++ b/src/UnitTests/Containers/ArrayTest.h
@@ -15,6 +15,7 @@
 
 #include <TNL/Containers/Array.h>
 #include <TNL/Containers/Vector.h>
+#include <TNL/Pointers/DevicePointer.h>
 
 #include "gtest/gtest.h"
 
@@ -312,9 +313,9 @@ void testArrayElementwiseAccess( Array< Value, Devices::Cuda, Index >&& u )
 #ifdef HAVE_CUDA
    u.setSize( 10 );
    using ArrayType = Array< Value, Devices::Cuda, Index >;
-   ArrayType* kernel_u = Devices::Cuda::passToDevice( u );
-   testSetGetElementKernel<<< 1, 16 >>>( kernel_u );
-   Devices::Cuda::freeFromDevice( kernel_u );
+   Pointers::DevicePointer< ArrayType > kernel_u( u );
+   testSetGetElementKernel<<< 1, 16 >>>( &kernel_u.template modifyData< Devices::Cuda >() );
+   cudaDeviceSynchronize();
    TNL_CHECK_CUDA_DEVICE;
    for( int i = 0; i < 10; i++ ) {
       EXPECT_EQ( u.getElement( i ), i );
-- 
GitLab


From 1743358a01404a575b09b06474e4ef8fb8bb9ded Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Wed, 21 Aug 2019 19:50:39 +0200
Subject: [PATCH 10/35] Moved synchronization of smart pointers from
 Devices::Cuda into TNL::Pointers namespace as free functions

synchronizeDevice() was renamed to synchronizeSmartPointersOnDevice()
for clarity - there are many similarly named functions in CUDA (e.g.
cudaDeviceSynchronize()).
---
 .../HeatEquationBenchmarkProblem_impl.h       |  4 +-
 .../HeatEquation/Tuning/GridTraverser_impl.h  |  4 +-
 .../tnl-benchmark-linear-solvers.h            |  2 +-
 src/TNL/Devices/Cuda.h                        | 21 ------
 src/TNL/Devices/Cuda_impl.h                   | 43 ++-----------
 .../tnlDirectEikonalMethodBase1D_impl.h       |  2 +-
 .../tnlDirectEikonalMethodBase2D_impl.h       |  2 +-
 .../tnlDirectEikonalMethodBase3D_impl.h       |  2 +-
 .../tnlFastSweepingMethod2D_impl.h            |  6 +-
 .../tnlFastSweepingMethod3D_impl.h            | 10 +--
 src/TNL/Matrices/SparseOperations_impl.h      |  4 +-
 .../Meshes/GridDetails/GridTraverser_1D.hpp   |  2 +-
 .../Meshes/GridDetails/GridTraverser_2D.hpp   |  8 +--
 .../Meshes/GridDetails/GridTraverser_3D.hpp   |  6 +-
 src/TNL/Meshes/MeshDetails/Traverser_impl.h   |  6 +-
 src/TNL/Pointers/DevicePointer.h              |  5 +-
 src/TNL/Pointers/SharedPointerCuda.h          |  5 +-
 src/TNL/Pointers/SmartPointersRegister.h      | 64 +++++++++++++++++--
 src/TNL/Pointers/UniquePointer.h              |  5 +-
 .../Linear/Preconditioners/ILU0_impl.h        |  4 +-
 src/TNL/Solvers/SolverStarter_impl.h          |  2 +-
 .../Pointers/SharedPointerCudaTest.cu         |  4 +-
 22 files changed, 107 insertions(+), 104 deletions(-)

diff --git a/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h
index e3f472923..3f0c91948 100644
--- a/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h
+++ b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h
@@ -490,7 +490,7 @@ getExplicitUpdate( const RealType& time,
          
          //std::cerr << "Setting boundary conditions..." << std::endl;
 
-         Devices::Cuda::synchronizeDevice();
+         Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
          for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ )
             for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
                boundaryConditionsTemplatedCompact< MeshType, CellType, BoundaryCondition, MeshFunctionType >
@@ -594,7 +594,7 @@ getExplicitUpdate( const RealType& time,
                                gridYSize / 16 + ( gridYSize % 16 != 0 ) );
             */
 
-            TNL::Devices::Cuda::synchronizeDevice();
+            Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
             int cudaErr;
             Meshes::Traverser< MeshType, Cell > meshTraverser;
             meshTraverser.template processInteriorEntities< UserData,
diff --git a/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h b/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h
index 2a77f8bb5..c9fe0e43b 100644
--- a/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h
+++ b/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h
@@ -246,7 +246,7 @@ processEntities(
       IndexType cudaThreadsCount = 2 * ( end.x() - begin.x() + end.y() - begin.y() + 1 );
       Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, cudaThreadsCount );
       dim3 gridIdx, cudaGridSize;
-      Devices::Cuda::synchronizeDevice();
+      Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
       for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x++ )
       {
          Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
@@ -273,7 +273,7 @@ processEntities(
       auto& pool = Cuda::StreamPool::getInstance();
       const cudaStream_t& s = pool.getStream( stream );
 
-      Devices::Cuda::synchronizeDevice();
+      Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
       dim3 gridIdx, cudaGridSize;
       for( gridIdx.y = 0; gridIdx.y < cudaGridsCount.y; gridIdx.y ++ )
          for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x ++ )
diff --git a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
index 6661c5f6a..ffb2f121a 100644
--- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
+++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
@@ -130,7 +130,7 @@ benchmarkIterativeSolvers( Benchmark& benchmark,
    *cudaMatrixPointer = *matrixPointer;
 
    // synchronize shared pointers
-   Devices::Cuda::synchronizeDevice();
+   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
 #endif
 
    using namespace Solvers::Linear;
diff --git a/src/TNL/Devices/Cuda.h b/src/TNL/Devices/Cuda.h
index 853cd2e03..fc924ccc9 100644
--- a/src/TNL/Devices/Cuda.h
+++ b/src/TNL/Devices/Cuda.h
@@ -10,13 +10,7 @@
 
 #pragma once
 
-#include <iostream>
-
 #include <TNL/String.h>
-#include <TNL/Assert.h>
-#include <TNL/Pointers/SmartPointersRegister.h>
-#include <TNL/Timer.h>
-#include <TNL/Cuda/CudaCallable.h>
 #include <TNL/Config/ConfigDescription.h>
 #include <TNL/Config/ParameterContainer.h>
 
@@ -33,16 +27,6 @@ public:
 
    static inline constexpr int getGPUTransferBufferSize();
 
-   static inline void insertSmartPointer( Pointers::SmartPointer* pointer );
-
-   static inline void removeSmartPointer( Pointers::SmartPointer* pointer );
-
-   // Negative deviceId means that CudaDeviceInfo::getActiveDevice will be
-   // called to get the device ID.
-   static inline bool synchronizeDevice( int deviceId = -1 );
-
-   static inline Timer& getSmartPointersSynchronizationTimer();
-
    ////
    // When we transfer data between the GPU and the CPU we use 5 MB buffer. This
    // size should ensure good performance -- see.
@@ -50,11 +34,6 @@ public:
    // We use the same buffer size even for retyping data during IO operations.
    //
    static constexpr std::size_t TransferBufferSize = 5 * 2<<20;
-
-
-   protected:
-
-   static inline Pointers::SmartPointersRegister& getSmartPointersRegister();
 };
 
 #ifdef HAVE_CUDA
diff --git a/src/TNL/Devices/Cuda_impl.h b/src/TNL/Devices/Cuda_impl.h
index 6d3daa356..7a4d59fcc 100644
--- a/src/TNL/Devices/Cuda_impl.h
+++ b/src/TNL/Devices/Cuda_impl.h
@@ -10,12 +10,15 @@
 
 #pragma once
 
+#include <iostream>
+
 #include <TNL/Math.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Cuda/DeviceInfo.h>
 #include <TNL/Exceptions/CudaBadAlloc.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 #include <TNL/Exceptions/CudaRuntimeError.h>
+#include <TNL/Pointers/SmartPointersRegister.h>
 
 namespace TNL {
 namespace Devices {
@@ -42,8 +45,8 @@ Cuda::setup( const Config::ParameterContainer& parameters,
       std::cerr << "I cannot activate CUDA device number " << cudaDevice << "." << std::endl;
       return false;
    }
-   getSmartPointersSynchronizationTimer().reset();
-   getSmartPointersSynchronizationTimer().stop();
+   Pointers::getSmartPointersSynchronizationTimer< Devices::Cuda >().reset();
+   Pointers::getSmartPointersSynchronizationTimer< Devices::Cuda >().stop();
 #endif
    return true;
 }
@@ -53,42 +56,6 @@ inline constexpr int Cuda::getGPUTransferBufferSize()
    return 1 << 20;
 }
 
-inline void Cuda::insertSmartPointer( Pointers::SmartPointer* pointer )
-{
-   getSmartPointersRegister().insert( pointer, TNL::Cuda::DeviceInfo::getActiveDevice() );
-}
-
-inline void Cuda::removeSmartPointer( Pointers::SmartPointer* pointer )
-{
-   getSmartPointersRegister().remove( pointer, TNL::Cuda::DeviceInfo::getActiveDevice() );
-}
-
-inline bool Cuda::synchronizeDevice( int deviceId )
-{
-#ifdef HAVE_CUDA
-   if( deviceId < 0 )
-      deviceId = TNL::Cuda::DeviceInfo::getActiveDevice();
-   getSmartPointersSynchronizationTimer().start();
-   bool b = getSmartPointersRegister().synchronizeDevice( deviceId );
-   getSmartPointersSynchronizationTimer().stop();
-   return b;
-#else
-   return true;
-#endif
-}
-
-inline Timer& Cuda::getSmartPointersSynchronizationTimer()
-{
-   static Timer timer;
-   return timer;
-}
-
-inline Pointers::SmartPointersRegister& Cuda::getSmartPointersRegister()
-{
-   static Pointers::SmartPointersRegister reg;
-   return reg;
-}
-
 // double-precision atomicAdd function for Maxwell and older GPUs
 // copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions
 #ifdef HAVE_CUDA
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase1D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase1D_impl.h
index 4dac64a23..49cda643c 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase1D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase1D_impl.h
@@ -25,7 +25,7 @@ initInterface( const MeshFunctionPointer& _input,
     int numBlocksX = Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize );
     dim3 blockSize( cudaBlockSize );
     dim3 gridSize( numBlocksX );
-    Devices::Cuda::synchronizeDevice();
+    Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
     CudaInitCaller<<< gridSize, blockSize >>>( _input.template getData< Device >(),
             _output.template modifyData< Device >(),
             _interfaceMap.template modifyData< Device >() );
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h
index 947a4be06..b18252cb0 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h
@@ -29,7 +29,7 @@ initInterface( const MeshFunctionPointer& _input,
     int numBlocksY = Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize );
     dim3 blockSize( cudaBlockSize, cudaBlockSize );
     dim3 gridSize( numBlocksX, numBlocksY );
-    Devices::Cuda::synchronizeDevice();
+    Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
     CudaInitCaller<<< gridSize, blockSize >>>( _input.template getData< Device >(),
             _output.template modifyData< Device >(),
             _interfaceMap.template modifyData< Device >(),
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h
index eb0665c7e..fd7dc9381 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h
@@ -30,7 +30,7 @@ initInterface( const MeshFunctionPointer& _input,
       std::cout << "Invalid kernel call. Dimensions of grid are max: [1024,1024,64], and maximum threads per block are 1024!" << std::endl;
     dim3 blockSize( cudaBlockSize, cudaBlockSize, cudaBlockSize );
     dim3 gridSize( numBlocksX, numBlocksY, numBlocksZ );
-    Devices::Cuda::synchronizeDevice();
+    Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
     CudaInitCaller3d<<< gridSize, blockSize >>>( _input.template getData< Device >(),
             _output.template modifyData< Device >(),
             _interfaceMap.template modifyData< Device >(), vLower, vUpper );
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h
index c5a0f74cc..1b1666a02 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h
@@ -316,7 +316,7 @@ solve( const MeshPointer& mesh,
           
           
   /** HERE IS FIM FOR MPI AND WITHOUT MPI **/
-          Devices::Cuda::synchronizeDevice();
+          Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
           CudaUpdateCellCaller<18><<< gridSize, blockSize >>>( ptr, interfaceMapPtr.template getData< Device >(),
                   auxPtr.template getData< Device>(), helpFunc.template modifyData< Device>(),
                   blockCalculationIndicator.getView(), vecLowerOverlaps, vecUpperOverlaps );
@@ -327,7 +327,7 @@ solve( const MeshPointer& mesh,
           auxPtr.swap( helpFunc );
           
           // Getting blocks that should calculate in next passage. These blocks are neighbours of those that were calculated now.
-          Devices::Cuda::synchronizeDevice(); 
+          Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
           GetNeighbours<<< nBlocksNeigh, 1024 >>>( blockCalculationIndicator.getView(), blockCalculationIndicatorHelp.getView(), numBlocksX, numBlocksY );
           cudaDeviceSynchronize();
           TNL_CHECK_CUDA_DEVICE;
@@ -349,7 +349,7 @@ solve( const MeshPointer& mesh,
         if( numIter%2 == 1 ) // Need to check parity for MPI overlaps to synchronize ( otherwise doesnt work )
         {
           helpFunc.swap( auxPtr );
-          Devices::Cuda::synchronizeDevice();
+          Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
           cudaDeviceSynchronize();
           TNL_CHECK_CUDA_DEVICE;
         }
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h
index 3fce5564e..82185a937 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h
@@ -295,14 +295,14 @@ solve( const MeshPointer& mesh,
         //MeshFunctionPointer helpFunc1( mesh );      
         MeshFunctionPointer helpFunc( mesh );
         helpFunc.template modifyData() = auxPtr.template getData();
-        Devices::Cuda::synchronizeDevice(); 
+        Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
                 
         int numIter = 0; // number of passages of following while cycle
         
         while( BlockIterD ) //main body of cuda code
         {
           
-          Devices::Cuda::synchronizeDevice();          
+          Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
           // main function that calculates all values in each blocks
           // calculated values are in helpFunc
           CudaUpdateCellCaller< 10 ><<< gridSize, blockSize >>>( ptr,
@@ -315,14 +315,14 @@ solve( const MeshPointer& mesh,
           // Switching pointers to helpFunc and auxPtr so real results are in memory of helpFunc but here under variable auxPtr
           auxPtr.swap( helpFunc );
           
-          Devices::Cuda::synchronizeDevice();
+          Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
           // Neighbours of blocks that calculatedBefore in this passage should calculate in the next!
           // BlockIterDevice contains blocks that calculatedBefore in this passage and BlockIterPom those that should calculate in next (are neighbours)
           GetNeighbours<<< nBlocksNeigh, 1024 >>>( BlockIterDevice.getView(), BlockIterPom.getView(), numBlocksX, numBlocksY, numBlocksZ );
           cudaDeviceSynchronize();
           TNL_CHECK_CUDA_DEVICE;
           BlockIterDevice = BlockIterPom;
-          Devices::Cuda::synchronizeDevice();
+          Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
           
           // .containsValue(1) is actually parallel reduction implemented in TNL
           BlockIterD = BlockIterDevice.containsValue(1);
@@ -340,7 +340,7 @@ solve( const MeshPointer& mesh,
           // We need auxPtr to point on memory of original auxPtr (not to helpFunc)
           // last passage of previous while cycle didnt calculate any number anyway so switching names doesnt effect values
           auxPtr.swap( helpFunc ); 
-          Devices::Cuda::synchronizeDevice();
+          Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
         }
         cudaDeviceSynchronize();
         TNL_CHECK_CUDA_DEVICE;
diff --git a/src/TNL/Matrices/SparseOperations_impl.h b/src/TNL/Matrices/SparseOperations_impl.h
index b6d118bf2..ce7caaf32 100644
--- a/src/TNL/Matrices/SparseOperations_impl.h
+++ b/src/TNL/Matrices/SparseOperations_impl.h
@@ -140,7 +140,7 @@ copySparseMatrix_impl( Matrix1& A, const Matrix2& B )
       const Pointers::DevicePointer< const Matrix2 > Bpointer( B );
 
       // set row lengths
-      Devices::Cuda::synchronizeDevice();
+      Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
       SparseMatrixSetRowLengthsVectorKernel<<< gridSize, blockSize >>>(
             rowLengths.getData(),
             &Bpointer.template getData< TNL::Devices::Cuda >(),
@@ -150,7 +150,7 @@ copySparseMatrix_impl( Matrix1& A, const Matrix2& B )
       Apointer->setCompressedRowLengths( rowLengths );
 
       // copy rows
-      Devices::Cuda::synchronizeDevice();
+      Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
       SparseMatrixCopyKernel<<< gridSize, blockSize >>>(
             &Apointer.template modifyData< TNL::Devices::Cuda >(),
             &Bpointer.template getData< TNL::Devices::Cuda >(),
diff --git a/src/TNL/Meshes/GridDetails/GridTraverser_1D.hpp b/src/TNL/Meshes/GridDetails/GridTraverser_1D.hpp
index 796ffe491..c1aab9660 100644
--- a/src/TNL/Meshes/GridDetails/GridTraverser_1D.hpp
+++ b/src/TNL/Meshes/GridDetails/GridTraverser_1D.hpp
@@ -185,7 +185,7 @@ processEntities(
    auto& pool = Cuda::StreamPool::getInstance();
    const cudaStream_t& s = pool.getStream( stream );
 
-   Devices::Cuda::synchronizeDevice();
+   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
    if( processOnlyBoundaryEntities )
    {
       dim3 cudaBlockSize( 2 );
diff --git a/src/TNL/Meshes/GridDetails/GridTraverser_2D.hpp b/src/TNL/Meshes/GridDetails/GridTraverser_2D.hpp
index 15c5a0eda..721ec96d2 100644
--- a/src/TNL/Meshes/GridDetails/GridTraverser_2D.hpp
+++ b/src/TNL/Meshes/GridDetails/GridTraverser_2D.hpp
@@ -418,8 +418,8 @@ processEntities(
       Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongY, cudaGridsCountAlongY, end.y() - begin.y() - 1 );
             
       auto& pool = Cuda::StreamPool::getInstance();
-      Devices::Cuda::synchronizeDevice();
-      
+      Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
+
       const cudaStream_t& s1 = pool.getStream( stream );
       const cudaStream_t& s2 = pool.getStream( stream + 1 );
       dim3 gridIdx, cudaGridSize;
@@ -486,7 +486,7 @@ processEntities(
       //std::cerr << "blocksPerFace = " << blocksPerFace << "Threads count = " << cudaThreadsCount 
       //          << "cudaBlockCount = " << cudaBlocksCount.x << std::endl;      
       dim3 gridIdx, cudaGridSize;
-      Devices::Cuda::synchronizeDevice();
+      Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
       for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x++ )
       {
          Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
@@ -518,7 +518,7 @@ processEntities(
       auto& pool = Cuda::StreamPool::getInstance();
       const cudaStream_t& s = pool.getStream( stream );
 
-      Devices::Cuda::synchronizeDevice();
+      Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
       dim3 gridIdx, cudaGridSize;
       for( gridIdx.y = 0; gridIdx.y < cudaGridsCount.y; gridIdx.y ++ )
          for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x ++ )
diff --git a/src/TNL/Meshes/GridDetails/GridTraverser_3D.hpp b/src/TNL/Meshes/GridDetails/GridTraverser_3D.hpp
index 489e58d77..a9aad8c95 100644
--- a/src/TNL/Meshes/GridDetails/GridTraverser_3D.hpp
+++ b/src/TNL/Meshes/GridDetails/GridTraverser_3D.hpp
@@ -346,8 +346,8 @@ processEntities(
       Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongYZ, cudaGridsCountAlongYZ, entitiesAlongY - 2, entitiesAlongZ - 2 );
 
       auto& pool = Cuda::StreamPool::getInstance();
-      Devices::Cuda::synchronizeDevice();
-      
+      Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
+
       const cudaStream_t& s1 = pool.getStream( stream );
       const cudaStream_t& s2 = pool.getStream( stream + 1 );
       const cudaStream_t& s3 = pool.getStream( stream + 2 );
@@ -458,7 +458,7 @@ processEntities(
       auto& pool = Cuda::StreamPool::getInstance();
       const cudaStream_t& s = pool.getStream( stream );
 
-      Devices::Cuda::synchronizeDevice();
+      Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
       dim3 gridIdx, gridSize;
       for( gridIdx.z = 0; gridIdx.z < cudaGridsCount.z; gridIdx.z ++ )
          for( gridIdx.y = 0; gridIdx.y < cudaGridsCount.y; gridIdx.y ++ )
diff --git a/src/TNL/Meshes/MeshDetails/Traverser_impl.h b/src/TNL/Meshes/MeshDetails/Traverser_impl.h
index 27cbba714..33832d4f1 100644
--- a/src/TNL/Meshes/MeshDetails/Traverser_impl.h
+++ b/src/TNL/Meshes/MeshDetails/Traverser_impl.h
@@ -164,7 +164,7 @@ processBoundaryEntities( const MeshPointer& meshPointer,
    const int desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() );
    gridSize.x = min( desGridSize, Cuda::getNumberOfBlocks( entitiesCount, blockSize.x ) );
 
-   Devices::Cuda::synchronizeDevice();
+   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
    MeshTraverserBoundaryEntitiesKernel< EntitiesDimension, EntitiesProcessor >
       <<< gridSize, blockSize >>>
       ( &meshPointer.template getData< Devices::Cuda >(),
@@ -195,7 +195,7 @@ processInteriorEntities( const MeshPointer& meshPointer,
    const int desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() );
    gridSize.x = min( desGridSize, Cuda::getNumberOfBlocks( entitiesCount, blockSize.x ) );
 
-   Devices::Cuda::synchronizeDevice();
+   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
    MeshTraverserInteriorEntitiesKernel< EntitiesDimension, EntitiesProcessor >
       <<< gridSize, blockSize >>>
       ( &meshPointer.template getData< Devices::Cuda >(),
@@ -226,7 +226,7 @@ processAllEntities( const MeshPointer& meshPointer,
    const int desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() );
    gridSize.x = min( desGridSize, Cuda::getNumberOfBlocks( entitiesCount, blockSize.x ) );
 
-   Devices::Cuda::synchronizeDevice();
+   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
    MeshTraverserAllEntitiesKernel< EntitiesDimension, EntitiesProcessor >
       <<< gridSize, blockSize >>>
       ( &meshPointer.template getData< Devices::Cuda >(),
diff --git a/src/TNL/Pointers/DevicePointer.h b/src/TNL/Pointers/DevicePointer.h
index d3bf9b07f..5276c3ed4 100644
--- a/src/TNL/Pointers/DevicePointer.h
+++ b/src/TNL/Pointers/DevicePointer.h
@@ -16,6 +16,7 @@
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Pointers/SmartPointer.h>
+#include <TNL/Pointers/SmartPointersRegister.h>
 #include <TNL/TypeInfo.h>
 #include <TNL/Cuda/MemoryHelpers.h>
 
@@ -406,7 +407,7 @@ class DevicePointer< Object, Devices::Cuda > : public SmartPointer
       ~DevicePointer()
       {
          this->free();
-         Devices::Cuda::removeSmartPointer( this );
+         getSmartPointersRegister< DeviceType >().remove( this );
       }
 
    protected:
@@ -426,7 +427,7 @@ class DevicePointer< Object, Devices::Cuda > : public SmartPointer
          this->cuda_pointer = Cuda::passToDevice( *this->pointer );
          // set last-sync state
          this->set_last_sync_state();
-         Devices::Cuda::insertSmartPointer( this );
+         getSmartPointersRegister< DeviceType >().insert( this );
          return true;
       }
 
diff --git a/src/TNL/Pointers/SharedPointerCuda.h b/src/TNL/Pointers/SharedPointerCuda.h
index 27a4aabef..54dd4ee3c 100644
--- a/src/TNL/Pointers/SharedPointerCuda.h
+++ b/src/TNL/Pointers/SharedPointerCuda.h
@@ -16,6 +16,7 @@
 
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Pointers/SmartPointer.h>
+#include <TNL/Pointers/SmartPointersRegister.h>
 #include <TNL/Cuda/MemoryHelpers.h>
 
 #include <cstring>   // std::memcpy, std::memcmp
@@ -546,7 +547,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
       ~SharedPointer()
       {
          this->free();
-         Devices::Cuda::removeSmartPointer( this );
+         getSmartPointersRegister< DeviceType >().remove( this );
       }
 
    protected:
@@ -577,7 +578,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
 #ifdef TNL_DEBUG_SHARED_POINTERS
          std::cerr << "Created shared pointer to " << getType< ObjectType >() << " (cuda_pointer = " << this->cuda_pointer << ")" << std::endl;
 #endif
-         Devices::Cuda::insertSmartPointer( this );
+         getSmartPointersRegister< DeviceType >().insert( this );
          return true;
       }
 
diff --git a/src/TNL/Pointers/SmartPointersRegister.h b/src/TNL/Pointers/SmartPointersRegister.h
index ad716b9c0..5094c1c0e 100644
--- a/src/TNL/Pointers/SmartPointersRegister.h
+++ b/src/TNL/Pointers/SmartPointersRegister.h
@@ -2,7 +2,7 @@
                           SmartPointersRegister.h  -  description
                              -------------------
     begin                : Apr 29, 2016
-    copyright            : (C) 2016 by Tomas Oberhuber
+    copyright            : (C) 2016 by Tomas Oberhuber et al.
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
 
@@ -12,24 +12,43 @@
 
 #include <unordered_set>
 #include <unordered_map>
+
 #include <TNL/Pointers/SmartPointer.h>
-#include <TNL/Assert.h>
+#include <TNL/Timer.h>
+#include <TNL/Cuda/DeviceInfo.h>
 
 namespace TNL {
 namespace Pointers {
 
+// Since TNL currently supports only execution on host (which does not need
+// to register and synchronize smart pointers) and CUDA GPU's, the smart
+// pointers register is implemented only for CUDA. If more execution types
+// which need to register smart pointers are implemented in the future, this
+// should beome a class template specialization.
 class SmartPointersRegister
 {
 
    public:
 
-      void insert( SmartPointer* pointer, int deviceId )
+      /**
+       * Negative deviceId means that \ref Cuda::DeviceInfo::getActiveDevice will be
+       * called to get the device ID.
+       */
+      void insert( SmartPointer* pointer, int deviceId = -1 )
       {
+         if( deviceId < 0 )
+            deviceId = Cuda::DeviceInfo::getActiveDevice();
          pointersOnDevices[ deviceId ].insert( pointer );
       }
 
-      void remove( SmartPointer* pointer, int deviceId )
+      /**
+       * Negative deviceId means that \ref Cuda::DeviceInfo::getActiveDevice will be
+       * called to get the device ID.
+       */
+      void remove( SmartPointer* pointer, int deviceId = -1 )
       {
+         if( deviceId < 0 )
+            deviceId = Cuda::DeviceInfo::getActiveDevice();
          try {
             pointersOnDevices.at( deviceId ).erase( pointer );
          }
@@ -41,8 +60,14 @@ class SmartPointersRegister
          }
       }
 
-      bool synchronizeDevice( int deviceId )
+      /**
+       * Negative deviceId means that \ref Cuda::DeviceInfo::getActiveDevice will be
+       * called to get the device ID.
+       */
+      bool synchronizeDevice( int deviceId = -1 )
       {
+         if( deviceId < 0 )
+            deviceId = Cuda::DeviceInfo::getActiveDevice();
          try {
             const auto & set = pointersOnDevices.at( deviceId );
             for( auto&& it : set )
@@ -61,5 +86,34 @@ class SmartPointersRegister
       std::unordered_map< int, SetType > pointersOnDevices;
 };
 
+
+// TODO: Device -> Allocator (in all smart pointers)
+template< typename Device >
+SmartPointersRegister& getSmartPointersRegister()
+{
+   static SmartPointersRegister reg;
+   return reg;
+}
+
+template< typename Device >
+Timer& getSmartPointersSynchronizationTimer()
+{
+   static Timer timer;
+   return timer;
+}
+
+/**
+ * Negative deviceId means that the ID of the currently active device will be
+ * determined automatically.
+ */
+template< typename Device >
+bool synchronizeSmartPointersOnDevice( int deviceId = -1 )
+{
+   getSmartPointersSynchronizationTimer< Device >().start();
+   bool b = getSmartPointersRegister< Device >().synchronizeDevice( deviceId );
+   getSmartPointersSynchronizationTimer< Device >().stop();
+   return b;
+}
+
 } // namespace Pointers
 } // namespace TNL
diff --git a/src/TNL/Pointers/UniquePointer.h b/src/TNL/Pointers/UniquePointer.h
index 9a7a2b677..071de4d51 100644
--- a/src/TNL/Pointers/UniquePointer.h
+++ b/src/TNL/Pointers/UniquePointer.h
@@ -16,6 +16,7 @@
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Pointers/SmartPointer.h>
+#include <TNL/Pointers/SmartPointersRegister.h>
 #include <TNL/Cuda/MemoryHelpers.h>
 
 #include <cstring>  // std::memcpy, std::memcmp
@@ -250,7 +251,7 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer
       ~UniquePointer()
       {
          this->free();
-         Devices::Cuda::removeSmartPointer( this );
+         getSmartPointersRegister< DeviceType >().remove( this );
       }
 
    protected:
@@ -276,7 +277,7 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer
          this->cuda_pointer = Cuda::passToDevice( this->pd->data );
          // set last-sync state
          this->set_last_sync_state();
-         Devices::Cuda::insertSmartPointer( this );
+         getSmartPointersRegister< DeviceType >().insert( this );
          return true;
       }
 
diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h b/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h
index 626469920..be9e37f23 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h
@@ -282,7 +282,7 @@ allocate_LU()
    U->setDimensions( N, N );
 
    // extract raw pointer
-   Devices::Cuda::synchronizeDevice();
+   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
    const CSR* kernel_A = &A.template getData< DeviceType >();
 
    // copy row lengths
@@ -329,7 +329,7 @@ copy_triangular_factors()
    const int N = A->getRows();
 
    // extract raw pointers
-   Devices::Cuda::synchronizeDevice();
+   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
    CSR* kernel_L = &L.template modifyData< DeviceType >();
    CSR* kernel_U = &U.template modifyData< DeviceType >();
    const CSR* kernel_A = &A.template getData< DeviceType >();
diff --git a/src/TNL/Solvers/SolverStarter_impl.h b/src/TNL/Solvers/SolverStarter_impl.h
index e52d03a4f..8b323d5d7 100644
--- a/src/TNL/Solvers/SolverStarter_impl.h
+++ b/src/TNL/Solvers/SolverStarter_impl.h
@@ -406,7 +406,7 @@ bool SolverStarter< ConfigTag > :: writeEpilog( std::ostream& str, const Solver&
    if( std::is_same< typename Solver::DeviceType, TNL::Devices::Cuda >::value )
    {
       logger.writeParameter< const char* >( "GPU synchronization time:", "" );
-      TNL::Devices::Cuda::getSmartPointersSynchronizationTimer().writeLog( logger, 1 );
+      Pointers::getSmartPointersSynchronizationTimer< Devices::Cuda >().writeLog( logger, 1 );
    }
    logger.writeParameter< const char* >( "I/O time:", "" );
    this->ioTimer.writeLog( logger, 1 );
diff --git a/src/UnitTests/Pointers/SharedPointerCudaTest.cu b/src/UnitTests/Pointers/SharedPointerCudaTest.cu
index c0d76b2cc..83b6b4793 100644
--- a/src/UnitTests/Pointers/SharedPointerCudaTest.cu
+++ b/src/UnitTests/Pointers/SharedPointerCudaTest.cu
@@ -55,7 +55,7 @@ TEST( SharedPointerCudaTest, getDataTest )
    ASSERT_EQ( ptr1->y(), 2 );
 #else
 
-   Devices::Cuda::synchronizeDevice();
+   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
 
    TestType aux;
 
@@ -89,7 +89,7 @@ TEST( SharedPointerCudaTest, getDataArrayTest )
    ptr->setElement( 0, 1 );
    ptr->setElement( 1, 2 );
 
-   Devices::Cuda::synchronizeDevice();
+   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
 
    int *testArray_device, *testArray_host;
    cudaMalloc( ( void** ) &testArray_device, 2 * sizeof( int ) );
-- 
GitLab


From 15b5e2c40d9c1a68518c0f4408bb4ecdd1b56a36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Thu, 22 Aug 2019 19:31:20 +0200
Subject: [PATCH 11/35] Moved atomicAdd function from Devices/Cuda.h into
 Atomic.h

---
 src/TNL/Atomic.h            | 29 +++++++++++++++++++++++++++--
 src/TNL/Devices/Cuda.h      |  8 --------
 src/TNL/Devices/Cuda_impl.h | 26 --------------------------
 3 files changed, 27 insertions(+), 36 deletions(-)

diff --git a/src/TNL/Atomic.h b/src/TNL/Atomic.h
index 4855b8f90..e84236287 100644
--- a/src/TNL/Atomic.h
+++ b/src/TNL/Atomic.h
@@ -17,11 +17,36 @@
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 
+// double-precision atomicAdd function for Maxwell and older GPUs
+// copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions
+#ifdef HAVE_CUDA
+#if __CUDA_ARCH__ < 600
+namespace {
+   __device__ double atomicAdd(double* address, double val)
+   {
+       unsigned long long int* address_as_ull =
+                                 (unsigned long long int*)address;
+       unsigned long long int old = *address_as_ull, assumed;
+
+       do {
+           assumed = old;
+           old = atomicCAS(address_as_ull, assumed,
+                           __double_as_longlong(val +
+                                  __longlong_as_double(assumed)));
+
+       // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
+       } while (assumed != old);
+
+       return __longlong_as_double(old);
+   }
+} // namespace
+#endif
+#endif
+
 namespace TNL {
 
 template< typename T, typename Device >
-class Atomic
-{};
+class Atomic;
 
 template< typename T >
 class Atomic< T, Devices::Host >
diff --git a/src/TNL/Devices/Cuda.h b/src/TNL/Devices/Cuda.h
index fc924ccc9..6784da34d 100644
--- a/src/TNL/Devices/Cuda.h
+++ b/src/TNL/Devices/Cuda.h
@@ -36,14 +36,6 @@ public:
    static constexpr std::size_t TransferBufferSize = 5 * 2<<20;
 };
 
-#ifdef HAVE_CUDA
-#if __CUDA_ARCH__ < 600
-namespace {
-   __device__ double atomicAdd(double* address, double val);
-}
-#endif
-#endif
-
 } // namespace Devices
 } // namespace TNL
 
diff --git a/src/TNL/Devices/Cuda_impl.h b/src/TNL/Devices/Cuda_impl.h
index 7a4d59fcc..5109f689e 100644
--- a/src/TNL/Devices/Cuda_impl.h
+++ b/src/TNL/Devices/Cuda_impl.h
@@ -56,31 +56,5 @@ inline constexpr int Cuda::getGPUTransferBufferSize()
    return 1 << 20;
 }
 
-// double-precision atomicAdd function for Maxwell and older GPUs
-// copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions
-#ifdef HAVE_CUDA
-#if __CUDA_ARCH__ < 600
-namespace {
-   __device__ double atomicAdd(double* address, double val)
-   {
-       unsigned long long int* address_as_ull =
-                                 (unsigned long long int*)address;
-       unsigned long long int old = *address_as_ull, assumed;
-
-       do {
-           assumed = old;
-           old = atomicCAS(address_as_ull, assumed,
-                           __double_as_longlong(val +
-                                  __longlong_as_double(assumed)));
-
-       // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
-       } while (assumed != old);
-
-       return __longlong_as_double(old);
-   }
-} // namespace
-#endif
-#endif
-
 } // namespace Devices
 } // namespace TNL
-- 
GitLab


From a1a054bf877e5ed7f879a9f4786b13e6a412b234 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Thu, 22 Aug 2019 19:55:27 +0200
Subject: [PATCH 12/35] Removed duplicate TransferBufferSize constants

Also set the buffer size to 1 MiB, because larger buffer size slows down
memory copies significantly (e.g. MeshTest would take about 10x longer).

Addresses #26
---
 .../Algorithms/ArrayOperationsCuda.hpp         | 18 +++++++++---------
 src/TNL/Cuda/LaunchHelpers.h                   |  8 ++++++++
 src/TNL/Devices/Cuda.h                         | 10 ----------
 src/TNL/Devices/Cuda_impl.h                    |  5 -----
 src/TNL/File.h                                 |  8 --------
 src/TNL/File.hpp                               | 13 +++++++------
 6 files changed, 24 insertions(+), 38 deletions(-)

diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp b/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp
index b81fd7f2b..5e97f1ac2 100644
--- a/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp
+++ b/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp
@@ -106,11 +106,11 @@ copyFromIterator( DestinationElement* destination,
                   SourceIterator last )
 {
    using BaseType = typename std::remove_cv< DestinationElement >::type;
-   std::unique_ptr< BaseType[] > buffer{ new BaseType[ Devices::Cuda::getGPUTransferBufferSize() ] };
+   std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] };
    Index copiedElements = 0;
    while( copiedElements < destinationSize && first != last ) {
       Index i = 0;
-      while( i < Devices::Cuda::getGPUTransferBufferSize() && first != last )
+      while( i < Cuda::getTransferBufferSize() && first != last )
          buffer[ i++ ] = *first++;
       ArrayOperations< Devices::Cuda, Devices::Host >::copy( &destination[ copiedElements ], buffer.get(), i );
       copiedElements += i;
@@ -197,18 +197,18 @@ copy( DestinationElement* destination,
    else
    {
       using BaseType = typename std::remove_cv< SourceElement >::type;
-      std::unique_ptr< BaseType[] > buffer{ new BaseType[ Devices::Cuda::getGPUTransferBufferSize() ] };
+      std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] };
       Index i( 0 );
       while( i < size )
       {
          if( cudaMemcpy( (void*) buffer.get(),
                          (void*) &source[ i ],
-                         TNL::min( size - i, Devices::Cuda::getGPUTransferBufferSize() ) * sizeof( SourceElement ),
+                         TNL::min( size - i, Cuda::getTransferBufferSize() ) * sizeof( SourceElement ),
                          cudaMemcpyDeviceToHost ) != cudaSuccess )
             std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
          TNL_CHECK_CUDA_DEVICE;
          Index j( 0 );
-         while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size )
+         while( j < Cuda::getTransferBufferSize() && i + j < size )
          {
             destination[ i + j ] = buffer[ j ];
             j++;
@@ -239,11 +239,11 @@ compare( const Element1* destination,
    TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
    TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." );
 #ifdef HAVE_CUDA
-   std::unique_ptr< Element2[] > host_buffer{ new Element2[ Devices::Cuda::getGPUTransferBufferSize() ] };
+   std::unique_ptr< Element2[] > host_buffer{ new Element2[ Cuda::getTransferBufferSize() ] };
    Index compared( 0 );
    while( compared < size )
    {
-      Index transfer = min( size - compared, Devices::Cuda::getGPUTransferBufferSize() );
+      Index transfer = min( size - compared, Cuda::getTransferBufferSize() );
       if( cudaMemcpy( (void*) host_buffer.get(),
                       (void*) &source[ compared ],
                       transfer * sizeof( Element2 ),
@@ -288,12 +288,12 @@ copy( DestinationElement* destination,
    }
    else
    {
-      std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ Devices::Cuda::getGPUTransferBufferSize() ] };
+      std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ Cuda::getTransferBufferSize() ] };
       Index i( 0 );
       while( i < size )
       {
          Index j( 0 );
-         while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size )
+         while( j < Cuda::getTransferBufferSize() && i + j < size )
          {
             buffer[ j ] = source[ i + j ];
             j++;
diff --git a/src/TNL/Cuda/LaunchHelpers.h b/src/TNL/Cuda/LaunchHelpers.h
index aaca4a67d..6e5d3c975 100644
--- a/src/TNL/Cuda/LaunchHelpers.h
+++ b/src/TNL/Cuda/LaunchHelpers.h
@@ -30,6 +30,14 @@ inline constexpr int getWarpSize()
    return 32;
 }
 
+// When we transfer data between the GPU and the CPU we use 1 MiB buffer. This
+// size should ensure good performance.
+// We use the same buffer size even for retyping data during IO operations.
+inline constexpr int getTransferBufferSize()
+{
+   return 1 << 20;
+}
+
 #ifdef HAVE_CUDA
 __device__ inline int getGlobalThreadIdx( const int gridIdx = 0,
                                           const int gridSize = getMaxGridSize() )
diff --git a/src/TNL/Devices/Cuda.h b/src/TNL/Devices/Cuda.h
index 6784da34d..e1dd264b4 100644
--- a/src/TNL/Devices/Cuda.h
+++ b/src/TNL/Devices/Cuda.h
@@ -24,16 +24,6 @@ public:
 
    static inline bool setup( const Config::ParameterContainer& parameters,
                              const String& prefix = "" );
-
-   static inline constexpr int getGPUTransferBufferSize();
-
-   ////
-   // When we transfer data between the GPU and the CPU we use 5 MB buffer. This
-   // size should ensure good performance -- see.
-   // http://wiki.accelereyes.com/wiki/index.php/GPU_Memory_Transfer .
-   // We use the same buffer size even for retyping data during IO operations.
-   //
-   static constexpr std::size_t TransferBufferSize = 5 * 2<<20;
 };
 
 } // namespace Devices
diff --git a/src/TNL/Devices/Cuda_impl.h b/src/TNL/Devices/Cuda_impl.h
index 5109f689e..ae6bbcd17 100644
--- a/src/TNL/Devices/Cuda_impl.h
+++ b/src/TNL/Devices/Cuda_impl.h
@@ -51,10 +51,5 @@ Cuda::setup( const Config::ParameterContainer& parameters,
    return true;
 }
 
-inline constexpr int Cuda::getGPUTransferBufferSize()
-{
-   return 1 << 20;
-}
-
 } // namespace Devices
 } // namespace TNL
diff --git a/src/TNL/File.h b/src/TNL/File.h
index 70eb013b7..747f4f4e7 100644
--- a/src/TNL/File.h
+++ b/src/TNL/File.h
@@ -168,14 +168,6 @@ class File
 
       std::fstream file;
       String fileName;
-
-      ////
-      // When we transfer data between the GPU and the CPU we use 5 MB buffer. This
-      // size should ensure good performance -- see.
-      // http://wiki.accelereyes.com/wiki/index.php/GPU_Memory_Transfer .
-      // We use the same buffer size even for retyping data during IO operations.
-      //
-      static constexpr std::streamsize TransferBufferSize = 5 * 2<<20;
 };
 
 /**
diff --git a/src/TNL/File.hpp b/src/TNL/File.hpp
index a3eb66066..d00903703 100644
--- a/src/TNL/File.hpp
+++ b/src/TNL/File.hpp
@@ -18,6 +18,7 @@
 #include <TNL/File.h>
 #include <TNL/Assert.h>
 #include <TNL/Cuda/CheckDevice.h>
+#include <TNL/Cuda/LaunchHelpers.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 #include <TNL/Exceptions/FileSerializationError.h>
 #include <TNL/Exceptions/FileDeserializationError.h>
@@ -101,7 +102,7 @@ void File::load_impl( Type* buffer, std::streamsize elements )
       file.read( reinterpret_cast<char*>(buffer), sizeof(Type) * elements );
    else
    {
-      const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(SourceType), elements );
+      const std::streamsize cast_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(SourceType), elements );
       using BaseType = typename std::remove_cv< SourceType >::type;
       std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] };
       std::streamsize readElements = 0;
@@ -124,7 +125,7 @@ template< typename Type,
 void File::load_impl( Type* buffer, std::streamsize elements )
 {
 #ifdef HAVE_CUDA
-   const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements );
+   const std::streamsize host_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(Type), elements );
    using BaseType = typename std::remove_cv< Type >::type;
    std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] };
 
@@ -145,7 +146,7 @@ void File::load_impl( Type* buffer, std::streamsize elements )
    }
    else
    {
-      const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(SourceType), elements );
+      const std::streamsize cast_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(SourceType), elements );
       using BaseType = typename std::remove_cv< SourceType >::type;
       std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] };
 
@@ -192,7 +193,7 @@ void File::save_impl( const Type* buffer, std::streamsize elements )
       file.write( reinterpret_cast<const char*>(buffer), sizeof(Type) * elements );
    else
    {
-      const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(TargetType), elements );
+      const std::streamsize cast_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(TargetType), elements );
       using BaseType = typename std::remove_cv< TargetType >::type;
       std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] };
       std::streamsize writtenElements = 0;
@@ -216,7 +217,7 @@ template< typename Type,
 void File::save_impl( const Type* buffer, std::streamsize elements )
 {
 #ifdef HAVE_CUDA
-   const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements );
+   const std::streamsize host_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(Type), elements );
    using BaseType = typename std::remove_cv< Type >::type;
    std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] };
 
@@ -237,7 +238,7 @@ void File::save_impl( const Type* buffer, std::streamsize elements )
    }
    else
    {
-      const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(TargetType), elements );
+      const std::streamsize cast_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(TargetType), elements );
       using BaseType = typename std::remove_cv< TargetType >::type;
       std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] };
 
-- 
GitLab


From e2ac7194b542a44145b5b5f0666c1904c7ddd350 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sat, 12 Oct 2019 12:33:56 +0200
Subject: [PATCH 13/35] Cleaned up Devices::Cuda

---
 src/TNL/Devices/Cuda.h      | 24 +++++++++++++---
 src/TNL/Devices/Cuda_impl.h | 55 -------------------------------------
 2 files changed, 20 insertions(+), 59 deletions(-)
 delete mode 100644 src/TNL/Devices/Cuda_impl.h

diff --git a/src/TNL/Devices/Cuda.h b/src/TNL/Devices/Cuda.h
index e1dd264b4..2b3bf8c66 100644
--- a/src/TNL/Devices/Cuda.h
+++ b/src/TNL/Devices/Cuda.h
@@ -20,13 +20,29 @@ namespace Devices {
 class Cuda
 {
 public:
-   static inline void configSetup( Config::ConfigDescription& config, const String& prefix = "" );
+   static inline void configSetup( Config::ConfigDescription& config, const String& prefix = "" )
+   {
+#ifdef HAVE_CUDA
+      config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device to run the computation.", 0 );
+#else
+      config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device to run the computation (not supported on this system).", 0 );
+#endif
+   }
 
    static inline bool setup( const Config::ParameterContainer& parameters,
-                             const String& prefix = "" );
+                             const String& prefix = "" )
+   {
+#ifdef HAVE_CUDA
+      int cudaDevice = parameters.getParameter< int >( prefix + "cuda-device" );
+      if( cudaSetDevice( cudaDevice ) != cudaSuccess )
+      {
+         std::cerr << "I cannot activate CUDA device number " << cudaDevice << "." << std::endl;
+         return false;
+      }
+#endif
+      return true;
+   }
 };
 
 } // namespace Devices
 } // namespace TNL
-
-#include <TNL/Devices/Cuda_impl.h>
diff --git a/src/TNL/Devices/Cuda_impl.h b/src/TNL/Devices/Cuda_impl.h
deleted file mode 100644
index ae6bbcd17..000000000
--- a/src/TNL/Devices/Cuda_impl.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/***************************************************************************
-                          Cuda_impl.h  -  description
-                             -------------------
-    begin                : Jan 21, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <iostream>
-
-#include <TNL/Math.h>
-#include <TNL/Devices/Cuda.h>
-#include <TNL/Cuda/DeviceInfo.h>
-#include <TNL/Exceptions/CudaBadAlloc.h>
-#include <TNL/Exceptions/CudaSupportMissing.h>
-#include <TNL/Exceptions/CudaRuntimeError.h>
-#include <TNL/Pointers/SmartPointersRegister.h>
-
-namespace TNL {
-namespace Devices {
-
-inline void
-Cuda::configSetup( Config::ConfigDescription& config,
-                   const String& prefix )
-{
-#ifdef HAVE_CUDA
-   config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device to run the computation.", 0 );
-#else
-   config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device to run the computation (not supported on this system).", 0 );
-#endif
-}
-
-inline bool
-Cuda::setup( const Config::ParameterContainer& parameters,
-             const String& prefix )
-{
-#ifdef HAVE_CUDA
-   int cudaDevice = parameters.getParameter< int >( prefix + "cuda-device" );
-   if( cudaSetDevice( cudaDevice ) != cudaSuccess )
-   {
-      std::cerr << "I cannot activate CUDA device number " << cudaDevice << "." << std::endl;
-      return false;
-   }
-   Pointers::getSmartPointersSynchronizationTimer< Devices::Cuda >().reset();
-   Pointers::getSmartPointersSynchronizationTimer< Devices::Cuda >().stop();
-#endif
-   return true;
-}
-
-} // namespace Devices
-} // namespace TNL
-- 
GitLab


From dacc171122a72c2bacbfbb8909187ce2925044b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Wed, 21 Aug 2019 16:58:56 +0200
Subject: [PATCH 14/35] Moved SystemInfo class out of the Devices namespace

It has nothing to do with devices.
---
 src/Benchmarks/Benchmarks.h                   | 22 +++++++++----------
 src/TNL/Logger_impl.h                         | 22 +++++++++----------
 src/TNL/{Devices => }/SystemInfo.h            |  4 +---
 .../SystemInfo_impl.h => SystemInfo.hpp}      |  4 +---
 4 files changed, 24 insertions(+), 28 deletions(-)
 rename src/TNL/{Devices => }/SystemInfo.h (95%)
 rename src/TNL/{Devices/SystemInfo_impl.h => SystemInfo.hpp} (98%)

diff --git a/src/Benchmarks/Benchmarks.h b/src/Benchmarks/Benchmarks.h
index 3822fef28..67010118e 100644
--- a/src/Benchmarks/Benchmarks.h
+++ b/src/Benchmarks/Benchmarks.h
@@ -23,7 +23,7 @@
 #include <TNL/String.h>
 
 #include <TNL/Devices/Host.h>
-#include <TNL/Devices/SystemInfo.h>
+#include <TNL/SystemInfo.h>
 #include <TNL/Cuda/DeviceInfo.h>
 #include <TNL/Config/ConfigDescription.h>
 #include <TNL/Communicators/MpiCommunicator.h>
@@ -333,7 +333,7 @@ protected:
 Benchmark::MetadataMap getHardwareMetadata()
 {
    const int cpu_id = 0;
-   Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id );
+   const CacheSizes cacheSizes = SystemInfo::getCPUCacheSizes( cpu_id );
    String cacheInfo = convertToString( cacheSizes.L1data ) + ", "
                        + convertToString( cacheSizes.L1instruction ) + ", "
                        + convertToString( cacheSizes.L2 ) + ", "
@@ -344,11 +344,11 @@ Benchmark::MetadataMap getHardwareMetadata()
                              convertToString( Cuda::DeviceInfo::getArchitectureMinor( activeGPU ) );
 #endif
    Benchmark::MetadataMap metadata {
-       { "host name", Devices::SystemInfo::getHostname() },
-       { "architecture", Devices::SystemInfo::getArchitecture() },
-       { "system", Devices::SystemInfo::getSystemName() },
-       { "system release", Devices::SystemInfo::getSystemRelease() },
-       { "start time", Devices::SystemInfo::getCurrentTime() },
+       { "host name", SystemInfo::getHostname() },
+       { "architecture", SystemInfo::getArchitecture() },
+       { "system", SystemInfo::getSystemName() },
+       { "system release", SystemInfo::getSystemRelease() },
+       { "start time", SystemInfo::getCurrentTime() },
 #ifdef HAVE_MPI
        { "number of MPI processes", convertToString( (Communicators::MpiCommunicator::IsInitialized())
                                        ? Communicators::MpiCommunicator::GetSize( Communicators::MpiCommunicator::AllGroup )
@@ -356,10 +356,10 @@ Benchmark::MetadataMap getHardwareMetadata()
 #endif
        { "OpenMP enabled", convertToString( Devices::Host::isOMPEnabled() ) },
        { "OpenMP threads", convertToString( Devices::Host::getMaxThreadsCount() ) },
-       { "CPU model name", Devices::SystemInfo::getCPUModelName( cpu_id ) },
-       { "CPU cores", convertToString( Devices::SystemInfo::getNumberOfCores( cpu_id ) ) },
-       { "CPU threads per core", convertToString( Devices::SystemInfo::getNumberOfThreads( cpu_id ) / Devices::SystemInfo::getNumberOfCores( cpu_id ) ) },
-       { "CPU max frequency (MHz)", convertToString( Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 ) },
+       { "CPU model name", SystemInfo::getCPUModelName( cpu_id ) },
+       { "CPU cores", convertToString( SystemInfo::getNumberOfCores( cpu_id ) ) },
+       { "CPU threads per core", convertToString( SystemInfo::getNumberOfThreads( cpu_id ) / SystemInfo::getNumberOfCores( cpu_id ) ) },
+       { "CPU max frequency (MHz)", convertToString( SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 ) },
        { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo },
 #ifdef HAVE_CUDA
        { "GPU name", Cuda::DeviceInfo::getDeviceName( activeGPU ) },
diff --git a/src/TNL/Logger_impl.h b/src/TNL/Logger_impl.h
index 6a3da0f96..6f71b4027 100644
--- a/src/TNL/Logger_impl.h
+++ b/src/TNL/Logger_impl.h
@@ -15,7 +15,7 @@
 
 #include <TNL/Logger.h>
 #include <TNL/Cuda/DeviceInfo.h>
-#include <TNL/Devices/SystemInfo.h>
+#include <TNL/SystemInfo.h>
 
 namespace TNL {
 
@@ -61,24 +61,24 @@ Logger::writeSystemInformation( const Config::ParameterContainer& parameters )
    const char* compiler_name = "(unknown)";
 #endif
 
-   writeParameter< String >( "Host name:", Devices::SystemInfo::getHostname() );
-   writeParameter< String >( "System:", Devices::SystemInfo::getSystemName() );
-   writeParameter< String >( "Release:", Devices::SystemInfo::getSystemRelease() );
-   writeParameter< String >( "Architecture:", Devices::SystemInfo::getArchitecture() );
+   writeParameter< String >( "Host name:", SystemInfo::getHostname() );
+   writeParameter< String >( "System:", SystemInfo::getSystemName() );
+   writeParameter< String >( "Release:", SystemInfo::getSystemRelease() );
+   writeParameter< String >( "Architecture:", SystemInfo::getArchitecture() );
    writeParameter< String >( "TNL compiler:", compiler_name );
    // FIXME: generalize for multi-socket systems, here we consider only the first found CPU
    const int cpu_id = 0;
-   const int threads = Devices::SystemInfo::getNumberOfThreads( cpu_id );
-   const int cores = Devices::SystemInfo::getNumberOfCores( cpu_id );
+   const int threads = SystemInfo::getNumberOfThreads( cpu_id );
+   const int cores = SystemInfo::getNumberOfCores( cpu_id );
    int threadsPerCore = 0;
    if( cores > 0 )
       threadsPerCore = threads / cores;
    writeParameter< String >( "CPU info", "" );
-   writeParameter< String >( "Model name:", Devices::SystemInfo::getCPUModelName( cpu_id ), 1 );
+   writeParameter< String >( "Model name:", SystemInfo::getCPUModelName( cpu_id ), 1 );
    writeParameter< int >( "Cores:", cores, 1 );
    writeParameter< int >( "Threads per core:", threadsPerCore, 1 );
-   writeParameter< double >( "Max clock rate (in MHz):", Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1000, 1 );
-   const Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id );
+   writeParameter< double >( "Max clock rate (in MHz):", SystemInfo::getCPUMaxFrequency( cpu_id ) / 1000, 1 );
+   const CacheSizes cacheSizes = SystemInfo::getCPUCacheSizes( cpu_id );
    const String cacheInfo = convertToString( cacheSizes.L1data ) + ", "
                           + convertToString( cacheSizes.L1instruction ) + ", "
                           + convertToString( cacheSizes.L2 ) + ", "
@@ -116,7 +116,7 @@ Logger::writeSystemInformation( const Config::ParameterContainer& parameters )
 inline void
 Logger::writeCurrentTime( const char* label )
 {
-   writeParameter< String >( label, Devices::SystemInfo::getCurrentTime() );
+   writeParameter< String >( label, SystemInfo::getCurrentTime() );
 }
 
 template< typename T >
diff --git a/src/TNL/Devices/SystemInfo.h b/src/TNL/SystemInfo.h
similarity index 95%
rename from src/TNL/Devices/SystemInfo.h
rename to src/TNL/SystemInfo.h
index f62321d6f..e64418a7c 100644
--- a/src/TNL/Devices/SystemInfo.h
+++ b/src/TNL/SystemInfo.h
@@ -15,7 +15,6 @@
 #include <TNL/String.h>
 
 namespace TNL {
-namespace Devices {
 
 struct CacheSizes {
    int L1instruction = 0;
@@ -68,7 +67,6 @@ protected:
    }
 };
 
-} // namespace Devices
 } // namespace TNL
 
-#include <TNL/Devices/SystemInfo_impl.h>
+#include <TNL/SystemInfo.hpp>
diff --git a/src/TNL/Devices/SystemInfo_impl.h b/src/TNL/SystemInfo.hpp
similarity index 98%
rename from src/TNL/Devices/SystemInfo_impl.h
rename to src/TNL/SystemInfo.hpp
index 0bc426011..b46234418 100644
--- a/src/TNL/Devices/SystemInfo_impl.h
+++ b/src/TNL/SystemInfo.hpp
@@ -18,10 +18,9 @@
 #include <sys/utsname.h>
 #include <sys/stat.h>
 
-#include <TNL/Devices/SystemInfo.h>
+#include <TNL/SystemInfo.h>
 
 namespace TNL {
-namespace Devices {
 
 inline String
 SystemInfo::getHostname( void )
@@ -215,5 +214,4 @@ SystemInfo::parseCPUInfo( void )
    return info;
 }
 
-} // namespace Devices
 } // namespace TNL
-- 
GitLab


From 7a5840de5c466077a69257931baaad173c7100c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Thu, 22 Aug 2019 18:08:43 +0200
Subject: [PATCH 15/35] Benchmarks: added benchmarks for array copy and compare
 using memcpy and memcmp

---
 src/Benchmarks/BLAS/array-operations.h   | 32 ++++++++++++++++++++++++
 src/Benchmarks/BLAS/tnl-benchmark-blas.h |  2 +-
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/src/Benchmarks/BLAS/array-operations.h b/src/Benchmarks/BLAS/array-operations.h
index cff60c8cc..84767a7b1 100644
--- a/src/Benchmarks/BLAS/array-operations.h
+++ b/src/Benchmarks/BLAS/array-operations.h
@@ -12,6 +12,8 @@
 
 #pragma once
 
+#include <cstring>
+
 #include "../Benchmarks.h"
 
 #include <TNL/Containers/Array.h>
@@ -66,6 +68,36 @@ benchmarkArrayOperations( Benchmark & benchmark,
    reset12();
 
 
+   if( std::is_fundamental< Real >::value ) {
+      // std::memcmp
+      auto compareHost = [&]() {
+         if( std::memcmp( hostArray.getData(), hostArray2.getData(), hostArray.getSize() * sizeof(Real) ) == 0 )
+            resultHost = true;
+         else
+            resultHost = false;
+      };
+      benchmark.setOperation( "comparison (memcmp)", 2 * datasetSize );
+      benchmark.time< Devices::Host >( reset12, "CPU", compareHost );
+
+      // std::memcpy and cudaMemcpy
+      auto copyHost = [&]() {
+         std::memcpy( hostArray.getData(), hostArray2.getData(), hostArray.getSize() * sizeof(Real) );
+      };
+      benchmark.setOperation( "copy (memcpy)", 2 * datasetSize );
+      benchmark.time< Devices::Host >( reset12, "CPU", copyHost );
+#ifdef HAVE_CUDA
+      auto copyCuda = [&]() {
+         cudaMemcpy( deviceArray.getData(),
+                     deviceArray2.getData(),
+                     deviceArray.getSize() * sizeof(Real),
+                     cudaMemcpyDeviceToDevice );
+         TNL_CHECK_CUDA_DEVICE;
+      };
+      benchmark.time< Devices::Cuda >( reset12, "GPU", copyCuda );
+#endif
+   }
+
+
    auto compareHost = [&]() {
       resultHost = (int) ( hostArray == hostArray2 );
    };
diff --git a/src/Benchmarks/BLAS/tnl-benchmark-blas.h b/src/Benchmarks/BLAS/tnl-benchmark-blas.h
index b510c3837..a1bd3e92b 100644
--- a/src/Benchmarks/BLAS/tnl-benchmark-blas.h
+++ b/src/Benchmarks/BLAS/tnl-benchmark-blas.h
@@ -182,7 +182,7 @@ main( int argc, char* argv[] )
       runBlasBenchmarks< double >( benchmark, metadata, minSize, maxSize, sizeStepFactor, elementsPerRow );
 
    if( ! benchmark.save( logFile ) ) {
-      std::cerr << "Failed to write the benchmark results to file '" << parameters.getParameter< String >( "log-file" ) << "'." << std::endl;
+      std::cerr << "Failed to write the benchmark results to file '" << logFileName << "'." << std::endl;
       return EXIT_FAILURE;
    }
 
-- 
GitLab


From f8c8673d5c8533c3ceb3d99a3915bbbf3067f941 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Thu, 22 Aug 2019 18:09:51 +0200
Subject: [PATCH 16/35] ArrayOperations: added missing methods for the
 static/sequential specialization

---
 .../Containers/Algorithms/ArrayOperations.h   | 22 ++++++++
 .../Algorithms/ArrayOperationsStatic.hpp      | 55 +++++++++++++++++++
 2 files changed, 77 insertions(+)

diff --git a/src/TNL/Containers/Algorithms/ArrayOperations.h b/src/TNL/Containers/Algorithms/ArrayOperations.h
index d4c35f5b1..646fb03f1 100644
--- a/src/TNL/Containers/Algorithms/ArrayOperations.h
+++ b/src/TNL/Containers/Algorithms/ArrayOperations.h
@@ -48,6 +48,14 @@ struct ArrayOperations< void >
                      const SourceElement* source,
                      const Index size );
 
+   template< typename DestinationElement,
+             typename Index,
+             typename SourceIterator >
+   static void copyFromIterator( DestinationElement* destination,
+                                 Index destinationSize,
+                                 SourceIterator first,
+                                 SourceIterator last );
+
    template< typename Element1,
              typename Element2,
              typename Index >
@@ -55,6 +63,20 @@ struct ArrayOperations< void >
    static bool compare( const Element1* destination,
                         const Element2* source,
                         const Index size );
+
+   template< typename Element,
+             typename Index >
+   __cuda_callable__
+   static bool containsValue( const Element* data,
+                              const Index size,
+                              const Element& value );
+
+   template< typename Element,
+             typename Index >
+   __cuda_callable__
+   static bool containsOnlyValue( const Element* data,
+                                  const Index size,
+                                  const Element& value );
 };
 
 template<>
diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsStatic.hpp b/src/TNL/Containers/Algorithms/ArrayOperationsStatic.hpp
index d84933bde..42ecf0f86 100644
--- a/src/TNL/Containers/Algorithms/ArrayOperationsStatic.hpp
+++ b/src/TNL/Containers/Algorithms/ArrayOperationsStatic.hpp
@@ -61,6 +61,23 @@ copy( DestinationElement* destination,
       destination[ i ] = source[ i ];
 }
 
+template< typename DestinationElement,
+          typename Index,
+          typename SourceIterator >
+void
+ArrayOperations< void >::
+copyFromIterator( DestinationElement* destination,
+                  Index destinationSize,
+                  SourceIterator first,
+                  SourceIterator last )
+{
+   Index i = 0;
+   while( i < destinationSize && first != last )
+      destination[ i++ ] = *first++;
+   if( first != last )
+      throw std::length_error( "Source iterator is larger than the destination array." );
+}
+
 template< typename Element1,
           typename Element2,
           typename Index >
@@ -77,6 +94,44 @@ compare( const Element1* destination,
    return true;
 }
 
+template< typename Element,
+          typename Index >
+__cuda_callable__
+bool
+ArrayOperations< void >::
+containsValue( const Element* data,
+               const Index size,
+               const Element& value )
+{
+   if( size == 0 ) return false;
+   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
+   TNL_ASSERT_GE( size, 0, "" );
+
+   for( Index i = 0; i < size; i++ )
+      if( data[ i ] == value )
+         return true;
+   return false;
+}
+
+template< typename Element,
+          typename Index >
+__cuda_callable__
+bool
+ArrayOperations< void >::
+containsOnlyValue( const Element* data,
+                   const Index size,
+                   const Element& value )
+{
+   if( size == 0 ) return false;
+   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
+   TNL_ASSERT_GE( size, 0, "" );
+
+   for( Index i = 0; i < size; i++ )
+      if( ! ( data[ i ] == value ) )
+         return false;
+   return true;
+}
+
 } // namespace Algorithms
 } // namespace Containers
 } // namespace TNL
-- 
GitLab


From 986e25fc252fcf0b9300632789fb9d439d7e6370 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Thu, 22 Aug 2019 18:11:22 +0200
Subject: [PATCH 17/35] ArrayOperations: using more parallel algorithms and
 suitable sequential fallbacks

- cudaMemcpy is slower than our ParallelFor kernel for CUDA
- use std::copy and std::equal instead of memcpy and memcmp, but only as
  sequential fallbacks
- use parallel algorithms for containsValue and containsOnlyValue (again
  with sequential fallbacks)
---
 .../Algorithms/ArrayOperationsCuda.hpp        | 25 ++----
 .../Algorithms/ArrayOperationsHost.hpp        | 77 +++++++++----------
 2 files changed, 41 insertions(+), 61 deletions(-)

diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp b/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp
index 5e97f1ac2..6c9dbc55d 100644
--- a/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp
+++ b/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp
@@ -73,26 +73,13 @@ copy( DestinationElement* destination,
    if( size == 0 ) return;
    TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
    TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
-   if( std::is_same< DestinationElement, SourceElement >::value )
-   {
-#ifdef HAVE_CUDA
-      cudaMemcpy( destination,
-                  source,
-                  size * sizeof( DestinationElement ),
-                  cudaMemcpyDeviceToDevice );
-      TNL_CHECK_CUDA_DEVICE;
-#else
-      throw Exceptions::CudaSupportMissing();
-#endif
-   }
-   else
+
+   // our ParallelFor kernel is faster than cudaMemcpy
+   auto kernel = [destination, source] __cuda_callable__ ( Index i )
    {
-      auto kernel = [destination, source] __cuda_callable__ ( Index i )
-      {
-         destination[ i ] = source[ i ];
-      };
-      ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel );
-   }
+      destination[ i ] = source[ i ];
+   };
+   ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel );
 }
 
 template< typename DestinationElement,
diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsHost.hpp b/src/TNL/Containers/Algorithms/ArrayOperationsHost.hpp
index 335144414..98a1c364e 100644
--- a/src/TNL/Containers/Algorithms/ArrayOperationsHost.hpp
+++ b/src/TNL/Containers/Algorithms/ArrayOperationsHost.hpp
@@ -12,7 +12,7 @@
 
 #include <type_traits>
 #include <stdexcept>
-#include <string.h>
+#include <algorithm>  // std::copy, std::equal
 
 #include <TNL/ParallelFor.h>
 #include <TNL/Containers/Algorithms/ArrayOperations.h>
@@ -67,29 +67,21 @@ copy( DestinationElement* destination,
       const Index size )
 {
    if( size == 0 ) return;
-   if( std::is_same< DestinationElement, SourceElement >::value &&
-       ( std::is_fundamental< DestinationElement >::value ||
-         std::is_pointer< DestinationElement >::value ) )
-   {
-      // GCC 8.1 complains that we bypass a non-trivial copy-constructor
-      // (in C++17 we could use constexpr if to avoid compiling this branch in that case)
-      #if defined(__GNUC__) && ( __GNUC__ > 8 || ( __GNUC__ == 8 && __GNUC_MINOR__ > 0 ) ) && !defined(__clang__)
-         #pragma GCC diagnostic push
-         #pragma GCC diagnostic ignored "-Wclass-memaccess"
-      #endif
-      memcpy( destination, source, size * sizeof( DestinationElement ) );
-      #if defined(__GNUC__) && !defined(__clang__) && !defined(__NVCC__)
-         #pragma GCC diagnostic pop
-      #endif
-   }
-   else
-   {
+   TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
+   TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
+
+   // our ParallelFor version is faster than std::copy iff we use more than 1 thread
+   if( Devices::Host::isOMPEnabled() && Devices::Host::getMaxThreadsCount() > 1 ) {
       auto kernel = [destination, source]( Index i )
       {
          destination[ i ] = source[ i ];
       };
       ParallelFor< Devices::Host >::exec( (Index) 0, size, kernel );
    }
+   else {
+      // std::copy usually uses std::memcpy for TriviallyCopyable types
+      std::copy( source, source + size, destination );
+   }
 }
 
 template< typename DestinationElement,
@@ -102,11 +94,7 @@ copyFromIterator( DestinationElement* destination,
                   SourceIterator first,
                   SourceIterator last )
 {
-   Index i = 0;
-   while( i < destinationSize && first != last )
-      destination[ i++ ] = *first++;
-   if( first != last )
-      throw std::length_error( "Source iterator is larger than the destination array." );
+   ArrayOperations< void >::copyFromIterator( destination, destinationSize, first, last );
 }
 
 
@@ -122,18 +110,15 @@ compare( const DestinationElement* destination,
    if( size == 0 ) return true;
    TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
    TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
-   if( std::is_same< DestinationElement, SourceElement >::value &&
-       ( std::is_fundamental< DestinationElement >::value ||
-         std::is_pointer< DestinationElement >::value ) )
-   {
-      if( memcmp( destination, source, size * sizeof( DestinationElement ) ) != 0 )
-         return false;
+
+   if( Devices::Host::isOMPEnabled() && Devices::Host::getMaxThreadsCount() > 1 ) {
+      auto fetch = [destination, source] ( Index i ) -> bool { return destination[ i ] == source[ i ]; };
+      return Reduction< Devices::Host >::reduce( size, std::logical_and<>{}, fetch, true );
+   }
+   else {
+      // sequential algorithm can return as soon as it finds a mismatch
+      return std::equal( source, source + size, destination );
    }
-   else
-      for( Index i = 0; i < size; i++ )
-         if( ! ( destination[ i ] == source[ i ] ) )
-            return false;
-   return true;
 }
 
 template< typename Element,
@@ -148,10 +133,14 @@ containsValue( const Element* data,
    TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
    TNL_ASSERT_GE( size, 0, "" );
 
-   for( Index i = 0; i < size; i++ )
-      if( data[ i ] == value )
-         return true;
-   return false;
+   if( Devices::Host::isOMPEnabled() && Devices::Host::getMaxThreadsCount() > 1 ) {
+      auto fetch = [=] ( Index i ) -> bool { return data[ i ] == value; };
+      return Reduction< Devices::Host >::reduce( size, std::logical_or<>{}, fetch, false );
+   }
+   else {
+      // sequential algorithm can return as soon as it finds a match
+      return ArrayOperations< void >::containsValue( data, size, value );
+   }
 }
 
 template< typename Element,
@@ -166,10 +155,14 @@ containsOnlyValue( const Element* data,
    TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
    TNL_ASSERT_GE( size, 0, "" );
 
-   for( Index i = 0; i < size; i++ )
-      if( ! ( data[ i ] == value ) )
-         return false;
-   return true;
+   if( Devices::Host::isOMPEnabled() && Devices::Host::getMaxThreadsCount() > 1 ) {
+      auto fetch = [data, value] ( Index i ) -> bool { return data[ i ] == value; };
+      return Reduction< Devices::Host >::reduce( size, std::logical_and<>{}, fetch, true );
+   }
+   else {
+      // sequential algorithm can return as soon as it finds a mismatch
+      return ArrayOperations< void >::containsOnlyValue( data, size, value );
+   }
 }
 
 } // namespace Algorithms
-- 
GitLab


From 57db358c278127f124e5f5f1a281054cca588b64 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sun, 1 Sep 2019 18:42:10 +0200
Subject: [PATCH 18/35] Split ArrayOperations into MemoryOperations and
 MultiDeviceMemoryOperations

This will be necessary to avoid code bloat with more than 2 devices
(execution types).
---
 .../Containers/Algorithms/ArrayAssignment.h   |   7 +-
 .../Algorithms/ArrayOperationsCuda.hpp        | 320 ------------------
 .../Algorithms/CudaReductionKernel.h          |   8 +-
 .../{ArrayOperations.h => MemoryOperations.h} |  58 +---
 .../Algorithms/MemoryOperationsCuda.hpp       | 160 +++++++++
 ...tionsHost.hpp => MemoryOperationsHost.hpp} |  27 +-
 ...tic.hpp => MemoryOperationsSequential.hpp} |  20 +-
 .../Algorithms/MultiDeviceMemoryOperations.h  | 277 +++++++++++++++
 .../Containers/Algorithms/Multireduction.hpp  |   4 +-
 src/TNL/Containers/Algorithms/Reduction.hpp   |   8 +-
 src/TNL/Containers/Array.hpp                  |  35 +-
 src/TNL/Containers/ArrayView.hpp              |  17 +-
 src/TNL/Containers/Expressions/Comparison.h   |   4 +-
 src/TNL/Containers/NDArrayView.h              |  11 +-
 src/TNL/Matrices/MatrixOperations.h           |   2 +-
 src/TNL/Solvers/Linear/GMRES_impl.h           |   4 +-
 src/UnitTests/AllocatorsTest.h                |   6 +-
 .../Containers/ArrayOperationsTest.cpp        |  11 -
 .../Containers/ArrayOperationsTest.cu         |  11 -
 src/UnitTests/Containers/CMakeLists.txt       |  12 +-
 .../Containers/MemoryOperationsTest.cpp       |   1 +
 .../Containers/MemoryOperationsTest.cu        |   1 +
 ...perationsTest.h => MemoryOperationsTest.h} | 173 +++++-----
 23 files changed, 621 insertions(+), 556 deletions(-)
 delete mode 100644 src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp
 rename src/TNL/Containers/Algorithms/{ArrayOperations.h => MemoryOperations.h} (77%)
 create mode 100644 src/TNL/Containers/Algorithms/MemoryOperationsCuda.hpp
 rename src/TNL/Containers/Algorithms/{ArrayOperationsHost.hpp => MemoryOperationsHost.hpp} (87%)
 rename src/TNL/Containers/Algorithms/{ArrayOperationsStatic.hpp => MemoryOperationsSequential.hpp} (89%)
 create mode 100644 src/TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h
 delete mode 100644 src/UnitTests/Containers/ArrayOperationsTest.cpp
 delete mode 100644 src/UnitTests/Containers/ArrayOperationsTest.cu
 create mode 100644 src/UnitTests/Containers/MemoryOperationsTest.cpp
 create mode 100644 src/UnitTests/Containers/MemoryOperationsTest.cu
 rename src/UnitTests/Containers/{ArrayOperationsTest.h => MemoryOperationsTest.h} (58%)

diff --git a/src/TNL/Containers/Algorithms/ArrayAssignment.h b/src/TNL/Containers/Algorithms/ArrayAssignment.h
index 9a67a36b9..402ebce5f 100644
--- a/src/TNL/Containers/Algorithms/ArrayAssignment.h
+++ b/src/TNL/Containers/Algorithms/ArrayAssignment.h
@@ -11,7 +11,8 @@
 #pragma once
 
 #include <TNL/TypeTraits.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
+#include <TNL/Containers/Algorithms/MemoryOperations.h>
+#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
 
 namespace TNL {
 namespace Containers {
@@ -39,7 +40,7 @@ struct ArrayAssignment< Array, T, true >
    {
       TNL_ASSERT_EQ( a.getSize(), t.getSize(), "The sizes of the arrays must be equal." );
       if( t.getSize() > 0 ) // we allow even assignment of empty arrays
-         ArrayOperations< typename Array::DeviceType, typename T::DeviceType >::template
+         MultiDeviceMemoryOperations< typename Array::DeviceType, typename T::DeviceType >::template
             copy< typename Array::ValueType, typename T::ValueType, typename Array::IndexType >
             ( a.getArrayData(), t.getArrayData(), t.getSize() );
    }
@@ -60,7 +61,7 @@ struct ArrayAssignment< Array, T, false >
    static void assign( Array& a, const T& t )
    {
       TNL_ASSERT_FALSE( a.empty(), "Cannot assign value to empty array." );
-      ArrayOperations< typename Array::DeviceType >::template
+      MemoryOperations< typename Array::DeviceType >::template
          set< typename Array::ValueType, typename Array::IndexType >
          ( a.getArrayData(), ( typename Array::ValueType ) t, a.getSize() );
    }
diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp b/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp
deleted file mode 100644
index 6c9dbc55d..000000000
--- a/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp
+++ /dev/null
@@ -1,320 +0,0 @@
-/***************************************************************************
-                          ArrayOperationsCuda.hpp  -  description
-                             -------------------
-    begin                : Jul 16, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <iostream>
-#include <memory>
-#include <stdexcept>
-
-#include <TNL/Math.h>
-#include <TNL/ParallelFor.h>
-#include <TNL/Exceptions/CudaSupportMissing.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
-
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-template< typename Element >
-void
-ArrayOperations< Devices::Cuda >::
-setElement( Element* data,
-            const Element& value )
-{
-   TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
-   ArrayOperations< Devices::Cuda >::set( data, value, 1 );
-}
-
-template< typename Element >
-Element
-ArrayOperations< Devices::Cuda >::
-getElement( const Element* data )
-{
-   TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." );
-   Element result;
-   ArrayOperations< Devices::Host, Devices::Cuda >::copy< Element, Element, int >( &result, data, 1 );
-   return result;
-}
-
-template< typename Element, typename Index >
-void
-ArrayOperations< Devices::Cuda >::
-set( Element* data,
-     const Element& value,
-     const Index size )
-{
-   if( size == 0 ) return;
-   TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
-   auto kernel = [data, value] __cuda_callable__ ( Index i )
-   {
-      data[ i ] = value;
-   };
-   ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel );
-}
-
-template< typename DestinationElement,
-          typename SourceElement,
-          typename Index >
-void
-ArrayOperations< Devices::Cuda >::
-copy( DestinationElement* destination,
-      const SourceElement* source,
-      const Index size )
-{
-   if( size == 0 ) return;
-   TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
-   TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
-
-   // our ParallelFor kernel is faster than cudaMemcpy
-   auto kernel = [destination, source] __cuda_callable__ ( Index i )
-   {
-      destination[ i ] = source[ i ];
-   };
-   ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel );
-}
-
-template< typename DestinationElement,
-          typename Index,
-          typename SourceIterator >
-void
-ArrayOperations< Devices::Cuda >::
-copyFromIterator( DestinationElement* destination,
-                  Index destinationSize,
-                  SourceIterator first,
-                  SourceIterator last )
-{
-   using BaseType = typename std::remove_cv< DestinationElement >::type;
-   std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] };
-   Index copiedElements = 0;
-   while( copiedElements < destinationSize && first != last ) {
-      Index i = 0;
-      while( i < Cuda::getTransferBufferSize() && first != last )
-         buffer[ i++ ] = *first++;
-      ArrayOperations< Devices::Cuda, Devices::Host >::copy( &destination[ copiedElements ], buffer.get(), i );
-      copiedElements += i;
-   }
-   if( first != last )
-      throw std::length_error( "Source iterator is larger than the destination array." );
-}
-
-template< typename Element1,
-          typename Element2,
-          typename Index >
-bool
-ArrayOperations< Devices::Cuda >::
-compare( const Element1* destination,
-         const Element2* source,
-         const Index size )
-{
-   if( size == 0 ) return true;
-   TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
-   TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
-
-   auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return destination[ i ] == source[ i ]; };
-   return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true );
-}
-
-template< typename Element,
-          typename Index >
-bool
-ArrayOperations< Devices::Cuda >::
-containsValue( const Element* data,
-               const Index size,
-               const Element& value )
-{
-   if( size == 0 ) return false;
-   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
-   TNL_ASSERT_GE( size, (Index) 0, "" );
-
-   auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; };
-   return Reduction< Devices::Cuda >::reduce( size, std::logical_or<>{}, fetch, false );
-}
-
-template< typename Element,
-          typename Index >
-bool
-ArrayOperations< Devices::Cuda >::
-containsOnlyValue( const Element* data,
-                   const Index size,
-                   const Element& value )
-{
-   if( size == 0 ) return false;
-   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
-   TNL_ASSERT_GE( size, 0, "" );
-
-   auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; };
-   return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true );
-}
-
-
-/****
- * Operations CUDA -> Host
- */
-template< typename DestinationElement,
-          typename SourceElement,
-          typename Index >
-void
-ArrayOperations< Devices::Host, Devices::Cuda >::
-copy( DestinationElement* destination,
-      const SourceElement* source,
-      const Index size )
-{
-   if( size == 0 ) return;
-   TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
-   TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
-#ifdef HAVE_CUDA
-   if( std::is_same< DestinationElement, SourceElement >::value )
-   {
-      if( cudaMemcpy( destination,
-                      source,
-                      size * sizeof( DestinationElement ),
-                      cudaMemcpyDeviceToHost ) != cudaSuccess )
-         std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
-      TNL_CHECK_CUDA_DEVICE;
-   }
-   else
-   {
-      using BaseType = typename std::remove_cv< SourceElement >::type;
-      std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] };
-      Index i( 0 );
-      while( i < size )
-      {
-         if( cudaMemcpy( (void*) buffer.get(),
-                         (void*) &source[ i ],
-                         TNL::min( size - i, Cuda::getTransferBufferSize() ) * sizeof( SourceElement ),
-                         cudaMemcpyDeviceToHost ) != cudaSuccess )
-            std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
-         TNL_CHECK_CUDA_DEVICE;
-         Index j( 0 );
-         while( j < Cuda::getTransferBufferSize() && i + j < size )
-         {
-            destination[ i + j ] = buffer[ j ];
-            j++;
-         }
-         i += j;
-      }
-   }
-#else
-   throw Exceptions::CudaSupportMissing();
-#endif
-}
-
-
-template< typename Element1,
-          typename Element2,
-          typename Index >
-bool
-ArrayOperations< Devices::Host, Devices::Cuda >::
-compare( const Element1* destination,
-         const Element2* source,
-         const Index size )
-{
-   if( size == 0 ) return true;
-   /***
-    * Here, destination is on host and source is on CUDA device.
-    */
-   TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
-   TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
-   TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." );
-#ifdef HAVE_CUDA
-   std::unique_ptr< Element2[] > host_buffer{ new Element2[ Cuda::getTransferBufferSize() ] };
-   Index compared( 0 );
-   while( compared < size )
-   {
-      Index transfer = min( size - compared, Cuda::getTransferBufferSize() );
-      if( cudaMemcpy( (void*) host_buffer.get(),
-                      (void*) &source[ compared ],
-                      transfer * sizeof( Element2 ),
-                      cudaMemcpyDeviceToHost ) != cudaSuccess )
-         std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
-      TNL_CHECK_CUDA_DEVICE;
-      if( ! ArrayOperations< Devices::Host >::compare( &destination[ compared ], host_buffer.get(), transfer ) )
-         return false;
-      compared += transfer;
-   }
-   return true;
-#else
-   throw Exceptions::CudaSupportMissing();
-#endif
-}
-
-/****
- * Operations Host -> CUDA
- */
-template< typename DestinationElement,
-          typename SourceElement,
-          typename Index >
-void
-ArrayOperations< Devices::Cuda, Devices::Host >::
-copy( DestinationElement* destination,
-      const SourceElement* source,
-      const Index size )
-{
-   if( size == 0 ) return;
-   TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
-   TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
-   TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." );
-#ifdef HAVE_CUDA
-   if( std::is_same< DestinationElement, SourceElement >::value )
-   {
-      if( cudaMemcpy( destination,
-                      source,
-                      size * sizeof( DestinationElement ),
-                      cudaMemcpyHostToDevice ) != cudaSuccess )
-         std::cerr << "Transfer of data from host to CUDA device failed." << std::endl;
-      TNL_CHECK_CUDA_DEVICE;
-   }
-   else
-   {
-      std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ Cuda::getTransferBufferSize() ] };
-      Index i( 0 );
-      while( i < size )
-      {
-         Index j( 0 );
-         while( j < Cuda::getTransferBufferSize() && i + j < size )
-         {
-            buffer[ j ] = source[ i + j ];
-            j++;
-         }
-         if( cudaMemcpy( (void*) &destination[ i ],
-                         (void*) buffer.get(),
-                         j * sizeof( DestinationElement ),
-                         cudaMemcpyHostToDevice ) != cudaSuccess )
-            std::cerr << "Transfer of data from host to CUDA device failed." << std::endl;
-         TNL_CHECK_CUDA_DEVICE;
-         i += j;
-      }
-   }
-#else
-   throw Exceptions::CudaSupportMissing();
-#endif
-}
-
-template< typename Element1,
-          typename Element2,
-          typename Index >
-bool
-ArrayOperations< Devices::Cuda, Devices::Host >::
-compare( const Element1* hostData,
-         const Element2* deviceData,
-         const Index size )
-{
-   if( size == 0 ) return true;
-   TNL_ASSERT_TRUE( hostData, "Attempted to compare data through a nullptr." );
-   TNL_ASSERT_TRUE( deviceData, "Attempted to compare data through a nullptr." );
-   TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." );
-   return ArrayOperations< Devices::Host, Devices::Cuda >::compare( deviceData, hostData, size );
-}
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/CudaReductionKernel.h b/src/TNL/Containers/Algorithms/CudaReductionKernel.h
index 3e948a906..2a959cb9c 100644
--- a/src/TNL/Containers/Algorithms/CudaReductionKernel.h
+++ b/src/TNL/Containers/Algorithms/CudaReductionKernel.h
@@ -17,7 +17,7 @@
 #include <TNL/Cuda/DeviceInfo.h>
 #include <TNL/Cuda/SharedMemory.h>
 #include <TNL/Containers/Algorithms/CudaReductionBuffer.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
+#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 
 namespace TNL {
@@ -352,7 +352,7 @@ struct CudaReductionKernelLauncher
 
       // Copy result on CPU
       Result result;
-      ArrayOperations< Devices::Host, Devices::Cuda >::copy( &result, output, 1 );
+      MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result, output, 1 );
       return result;
    }
 
@@ -385,8 +385,8 @@ struct CudaReductionKernelLauncher
       ////
       // Copy result on CPU
       std::pair< Index, Result > result;
-      ArrayOperations< Devices::Host, Devices::Cuda >::copy( &result.first, idxOutput, 1 );
-      ArrayOperations< Devices::Host, Devices::Cuda >::copy( &result.second, output, 1 );
+      MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result.first, idxOutput, 1 );
+      MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result.second, output, 1 );
       return result;
    }
 
diff --git a/src/TNL/Containers/Algorithms/ArrayOperations.h b/src/TNL/Containers/Algorithms/MemoryOperations.h
similarity index 77%
rename from src/TNL/Containers/Algorithms/ArrayOperations.h
rename to src/TNL/Containers/Algorithms/MemoryOperations.h
index 646fb03f1..de588484f 100644
--- a/src/TNL/Containers/Algorithms/ArrayOperations.h
+++ b/src/TNL/Containers/Algorithms/MemoryOperations.h
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          ArrayOperations.h  -  description
+                          MemoryOperations.h  -  description
                              -------------------
     begin                : Jul 15, 2013
     copyright            : (C) 2013 by Tomas Oberhuber
@@ -12,18 +12,18 @@
 
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/CudaCallable.h>
 
 namespace TNL {
 namespace Containers {
 namespace Algorithms {
 
-template< typename DestinationDevice,
-          typename SourceDevice = DestinationDevice >
-struct ArrayOperations;
+template< typename DestinationExecution >
+struct MemoryOperations;
 
-// TODO: establish the concept of a "void device" for static computations in the whole TNL
+// TODO: change "void" to "Execution::Sequential"
 template<>
-struct ArrayOperations< void >
+struct MemoryOperations< void >
 {
    template< typename Element >
    __cuda_callable__
@@ -80,7 +80,7 @@ struct ArrayOperations< void >
 };
 
 template<>
-struct ArrayOperations< Devices::Host >
+struct MemoryOperations< Devices::Host >
 {
    template< typename Element >
    static void setElement( Element* data,
@@ -130,7 +130,7 @@ struct ArrayOperations< Devices::Host >
 };
 
 template<>
-struct ArrayOperations< Devices::Cuda >
+struct MemoryOperations< Devices::Cuda >
 {
    template< typename Element >
    static void setElement( Element* data,
@@ -179,46 +179,10 @@ struct ArrayOperations< Devices::Cuda >
                                   const Element& value );
 };
 
-template<>
-struct ArrayOperations< Devices::Cuda, Devices::Host >
-{
-   template< typename DestinationElement,
-             typename SourceElement,
-             typename Index >
-   static void copy( DestinationElement* destination,
-                     const SourceElement* source,
-                     const Index size );
-
-   template< typename DestinationElement,
-             typename SourceElement,
-             typename Index >
-   static bool compare( const DestinationElement* destination,
-                        const SourceElement* source,
-                        const Index size );
-};
-
-template<>
-struct ArrayOperations< Devices::Host, Devices::Cuda >
-{
-   template< typename DestinationElement,
-             typename SourceElement,
-             typename Index >
-   static void copy( DestinationElement* destination,
-                     const SourceElement* source,
-                     const Index size );
-
-   template< typename Element1,
-             typename Element2,
-             typename Index >
-   static bool compare( const Element1* destination,
-                        const Element2* source,
-                        const Index size );
-};
-
 } // namespace Algorithms
 } // namespace Containers
 } // namespace TNL
 
-#include <TNL/Containers/Algorithms/ArrayOperationsStatic.hpp>
-#include <TNL/Containers/Algorithms/ArrayOperationsHost.hpp>
-#include <TNL/Containers/Algorithms/ArrayOperationsCuda.hpp>
+#include <TNL/Containers/Algorithms/MemoryOperationsSequential.hpp>
+#include <TNL/Containers/Algorithms/MemoryOperationsHost.hpp>
+#include <TNL/Containers/Algorithms/MemoryOperationsCuda.hpp>
diff --git a/src/TNL/Containers/Algorithms/MemoryOperationsCuda.hpp b/src/TNL/Containers/Algorithms/MemoryOperationsCuda.hpp
new file mode 100644
index 000000000..a504b5b76
--- /dev/null
+++ b/src/TNL/Containers/Algorithms/MemoryOperationsCuda.hpp
@@ -0,0 +1,160 @@
+/***************************************************************************
+                          MemoryOperationsCuda.hpp  -  description
+                             -------------------
+    begin                : Jul 16, 2013
+    copyright            : (C) 2013 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <iostream>
+#include <memory>  // std::unique_ptr
+#include <stdexcept>
+
+#include <TNL/Containers/Algorithms/MemoryOperations.h>
+#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
+#include <TNL/ParallelFor.h>
+#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Exceptions/CudaSupportMissing.h>
+
+namespace TNL {
+namespace Containers {
+namespace Algorithms {
+
+template< typename Element >
+void
+MemoryOperations< Devices::Cuda >::
+setElement( Element* data,
+            const Element& value )
+{
+   TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
+   MemoryOperations< Devices::Cuda >::set( data, value, 1 );
+}
+
+template< typename Element >
+Element
+MemoryOperations< Devices::Cuda >::
+getElement( const Element* data )
+{
+   TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." );
+   Element result;
+   MultiDeviceMemoryOperations< void, Devices::Cuda >::template copy< Element, Element, int >( &result, data, 1 );
+   return result;
+}
+
+template< typename Element, typename Index >
+void
+MemoryOperations< Devices::Cuda >::
+set( Element* data,
+     const Element& value,
+     const Index size )
+{
+   if( size == 0 ) return;
+   TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
+   auto kernel = [data, value] __cuda_callable__ ( Index i )
+   {
+      data[ i ] = value;
+   };
+   ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel );
+}
+
+template< typename DestinationElement,
+          typename SourceElement,
+          typename Index >
+void
+MemoryOperations< Devices::Cuda >::
+copy( DestinationElement* destination,
+      const SourceElement* source,
+      const Index size )
+{
+   if( size == 0 ) return;
+   TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
+   TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
+
+   // our ParallelFor kernel is faster than cudaMemcpy
+   auto kernel = [destination, source] __cuda_callable__ ( Index i )
+   {
+      destination[ i ] = source[ i ];
+   };
+   ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel );
+}
+
+template< typename DestinationElement,
+          typename Index,
+          typename SourceIterator >
+void
+MemoryOperations< Devices::Cuda >::
+copyFromIterator( DestinationElement* destination,
+                  Index destinationSize,
+                  SourceIterator first,
+                  SourceIterator last )
+{
+   using BaseType = typename std::remove_cv< DestinationElement >::type;
+   std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] };
+   Index copiedElements = 0;
+   while( copiedElements < destinationSize && first != last ) {
+      Index i = 0;
+      while( i < Cuda::getTransferBufferSize() && first != last )
+         buffer[ i++ ] = *first++;
+      MultiDeviceMemoryOperations< Devices::Cuda, void >::copy( &destination[ copiedElements ], buffer.get(), i );
+      copiedElements += i;
+   }
+   if( first != last )
+      throw std::length_error( "Source iterator is larger than the destination array." );
+}
+
+template< typename Element1,
+          typename Element2,
+          typename Index >
+bool
+MemoryOperations< Devices::Cuda >::
+compare( const Element1* destination,
+         const Element2* source,
+         const Index size )
+{
+   if( size == 0 ) return true;
+   TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
+   TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
+
+   auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return destination[ i ] == source[ i ]; };
+   return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true );
+}
+
+template< typename Element,
+          typename Index >
+bool
+MemoryOperations< Devices::Cuda >::
+containsValue( const Element* data,
+               const Index size,
+               const Element& value )
+{
+   if( size == 0 ) return false;
+   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
+   TNL_ASSERT_GE( size, (Index) 0, "" );
+
+   auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; };
+   return Reduction< Devices::Cuda >::reduce( size, std::logical_or<>{}, fetch, false );
+}
+
+template< typename Element,
+          typename Index >
+bool
+MemoryOperations< Devices::Cuda >::
+containsOnlyValue( const Element* data,
+                   const Index size,
+                   const Element& value )
+{
+   if( size == 0 ) return false;
+   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
+   TNL_ASSERT_GE( size, 0, "" );
+
+   auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; };
+   return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true );
+}
+
+} // namespace Algorithms
+} // namespace Containers
+} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsHost.hpp b/src/TNL/Containers/Algorithms/MemoryOperationsHost.hpp
similarity index 87%
rename from src/TNL/Containers/Algorithms/ArrayOperationsHost.hpp
rename to src/TNL/Containers/Algorithms/MemoryOperationsHost.hpp
index 98a1c364e..80be4cc3d 100644
--- a/src/TNL/Containers/Algorithms/ArrayOperationsHost.hpp
+++ b/src/TNL/Containers/Algorithms/MemoryOperationsHost.hpp
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          ArrayOperationsHost.hpp  -  description
+                          MemoryOperationsHost.hpp  -  description
                              -------------------
     begin                : Jul 16, 2013
     copyright            : (C) 2013 by Tomas Oberhuber
@@ -14,8 +14,8 @@
 #include <stdexcept>
 #include <algorithm>  // std::copy, std::equal
 
+#include <TNL/Containers/Algorithms/MemoryOperations.h>
 #include <TNL/ParallelFor.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
 #include <TNL/Containers/Algorithms/Reduction.h>
 
 namespace TNL {
@@ -24,7 +24,7 @@ namespace Algorithms {
 
 template< typename Element >
 void
-ArrayOperations< Devices::Host >::
+MemoryOperations< Devices::Host >::
 setElement( Element* data,
             const Element& value )
 {
@@ -34,7 +34,7 @@ setElement( Element* data,
 
 template< typename Element >
 Element
-ArrayOperations< Devices::Host >::
+MemoryOperations< Devices::Host >::
 getElement( const Element* data )
 {
    TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." );
@@ -43,7 +43,7 @@ getElement( const Element* data )
 
 template< typename Element, typename Index >
 void
-ArrayOperations< Devices::Host >::
+MemoryOperations< Devices::Host >::
 set( Element* data,
      const Element& value,
      const Index size )
@@ -61,7 +61,7 @@ template< typename DestinationElement,
           typename SourceElement,
           typename Index >
 void
-ArrayOperations< Devices::Host >::
+MemoryOperations< Devices::Host >::
 copy( DestinationElement* destination,
       const SourceElement* source,
       const Index size )
@@ -88,21 +88,20 @@ template< typename DestinationElement,
           typename Index,
           typename SourceIterator >
 void
-ArrayOperations< Devices::Host >::
+MemoryOperations< Devices::Host >::
 copyFromIterator( DestinationElement* destination,
                   Index destinationSize,
                   SourceIterator first,
                   SourceIterator last )
 {
-   ArrayOperations< void >::copyFromIterator( destination, destinationSize, first, last );
+   MemoryOperations< void >::copyFromIterator( destination, destinationSize, first, last );
 }
 
-
 template< typename DestinationElement,
           typename SourceElement,
           typename Index >
 bool
-ArrayOperations< Devices::Host >::
+MemoryOperations< Devices::Host >::
 compare( const DestinationElement* destination,
          const SourceElement* source,
          const Index size )
@@ -124,7 +123,7 @@ compare( const DestinationElement* destination,
 template< typename Element,
           typename Index >
 bool
-ArrayOperations< Devices::Host >::
+MemoryOperations< Devices::Host >::
 containsValue( const Element* data,
                const Index size,
                const Element& value )
@@ -139,14 +138,14 @@ containsValue( const Element* data,
    }
    else {
       // sequential algorithm can return as soon as it finds a match
-      return ArrayOperations< void >::containsValue( data, size, value );
+      return MemoryOperations< void >::containsValue( data, size, value );
    }
 }
 
 template< typename Element,
           typename Index >
 bool
-ArrayOperations< Devices::Host >::
+MemoryOperations< Devices::Host >::
 containsOnlyValue( const Element* data,
                    const Index size,
                    const Element& value )
@@ -161,7 +160,7 @@ containsOnlyValue( const Element* data,
    }
    else {
       // sequential algorithm can return as soon as it finds a mismatch
-      return ArrayOperations< void >::containsOnlyValue( data, size, value );
+      return MemoryOperations< void >::containsOnlyValue( data, size, value );
    }
 }
 
diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsStatic.hpp b/src/TNL/Containers/Algorithms/MemoryOperationsSequential.hpp
similarity index 89%
rename from src/TNL/Containers/Algorithms/ArrayOperationsStatic.hpp
rename to src/TNL/Containers/Algorithms/MemoryOperationsSequential.hpp
index 42ecf0f86..17d73cf78 100644
--- a/src/TNL/Containers/Algorithms/ArrayOperationsStatic.hpp
+++ b/src/TNL/Containers/Algorithms/MemoryOperationsSequential.hpp
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          ArrayOperationsStatic.hpp  -  description
+                          MemoryOperationsSequential.hpp  -  description
                              -------------------
     begin                : Apr 8, 2019
     copyright            : (C) 2019 by Tomas Oberhuber et al.
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
+#include <TNL/Containers/Algorithms/MemoryOperations.h>
 
 namespace TNL {
 namespace Containers {
@@ -19,7 +19,7 @@ namespace Algorithms {
 template< typename Element >
 __cuda_callable__
 void
-ArrayOperations< void >::
+MemoryOperations< void >::
 setElement( Element* data,
             const Element& value )
 {
@@ -29,7 +29,7 @@ setElement( Element* data,
 template< typename Element >
 __cuda_callable__
 Element
-ArrayOperations< void >::
+MemoryOperations< void >::
 getElement( const Element* data )
 {
    return *data;
@@ -38,7 +38,7 @@ getElement( const Element* data )
 template< typename Element, typename Index >
 __cuda_callable__
 void
-ArrayOperations< void >::
+MemoryOperations< void >::
 set( Element* data,
      const Element& value,
      const Index size )
@@ -52,7 +52,7 @@ template< typename DestinationElement,
           typename Index >
 __cuda_callable__
 void
-ArrayOperations< void >::
+MemoryOperations< void >::
 copy( DestinationElement* destination,
       const SourceElement* source,
       const Index size )
@@ -65,7 +65,7 @@ template< typename DestinationElement,
           typename Index,
           typename SourceIterator >
 void
-ArrayOperations< void >::
+MemoryOperations< void >::
 copyFromIterator( DestinationElement* destination,
                   Index destinationSize,
                   SourceIterator first,
@@ -83,7 +83,7 @@ template< typename Element1,
           typename Index >
 __cuda_callable__
 bool
-ArrayOperations< void >::
+MemoryOperations< void >::
 compare( const Element1* destination,
          const Element2* source,
          const Index size )
@@ -98,7 +98,7 @@ template< typename Element,
           typename Index >
 __cuda_callable__
 bool
-ArrayOperations< void >::
+MemoryOperations< void >::
 containsValue( const Element* data,
                const Index size,
                const Element& value )
@@ -117,7 +117,7 @@ template< typename Element,
           typename Index >
 __cuda_callable__
 bool
-ArrayOperations< void >::
+MemoryOperations< void >::
 containsOnlyValue( const Element* data,
                    const Index size,
                    const Element& value )
diff --git a/src/TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h b/src/TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h
new file mode 100644
index 000000000..4809cae0d
--- /dev/null
+++ b/src/TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h
@@ -0,0 +1,277 @@
+/***************************************************************************
+                          MultiDeviceMemoryOperations.h  -  description
+                             -------------------
+    begin                : Aug 18, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Algorithms/MemoryOperations.h>
+
+namespace TNL {
+namespace Containers {
+namespace Algorithms {
+
+template< typename DestinationDevice,
+          typename SourceDevice = DestinationDevice >
+struct MultiDeviceMemoryOperations
+{
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+   static void copy( DestinationElement* destination,
+                     const SourceElement* source,
+                     const Index size )
+   {
+      // use DestinationDevice, unless it is void
+      using Device = std::conditional_t< std::is_void< DestinationDevice >::value, SourceDevice, DestinationDevice >;
+      MemoryOperations< Device >::copy( destination, source, size );
+   }
+
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+   static bool compare( const DestinationElement* destination,
+                        const SourceElement* source,
+                        const Index size )
+   {
+      // use DestinationDevice, unless it is void
+      using Device = std::conditional_t< std::is_void< DestinationDevice >::value, SourceDevice, DestinationDevice >;
+      return MemoryOperations< Device >::compare( destination, source, size );
+   }
+};
+
+
+template< typename DeviceType >
+struct MultiDeviceMemoryOperations< Devices::Cuda, DeviceType >
+{
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+   static void copy( DestinationElement* destination,
+                     const SourceElement* source,
+                     const Index size );
+
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+   static bool compare( const DestinationElement* destination,
+                        const SourceElement* source,
+                        const Index size );
+};
+
+template< typename DeviceType >
+struct MultiDeviceMemoryOperations< DeviceType, Devices::Cuda >
+{
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+   static void copy( DestinationElement* destination,
+                     const SourceElement* source,
+                     const Index size );
+
+   template< typename Element1,
+             typename Element2,
+             typename Index >
+   static bool compare( const Element1* destination,
+                        const Element2* source,
+                        const Index size );
+};
+
+
+// CUDA <-> CUDA to disambiguate from partial specializations below
+template<>
+struct MultiDeviceMemoryOperations< Devices::Cuda, Devices::Cuda >
+{
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+   static void copy( DestinationElement* destination,
+                     const SourceElement* source,
+                     const Index size )
+   {
+      MemoryOperations< Devices::Cuda >::copy( destination, source, size );
+   }
+
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+   static bool compare( const DestinationElement* destination,
+                        const SourceElement* source,
+                        const Index size )
+   {
+      return MemoryOperations< Devices::Cuda >::compare( destination, source, size );
+   }
+};
+
+
+/****
+ * Operations CUDA -> Host
+ */
+template< typename DeviceType >
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+void
+MultiDeviceMemoryOperations< DeviceType, Devices::Cuda >::
+copy( DestinationElement* destination,
+      const SourceElement* source,
+      const Index size )
+{
+   if( size == 0 ) return;
+   TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
+   TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
+#ifdef HAVE_CUDA
+   if( std::is_same< DestinationElement, SourceElement >::value )
+   {
+      if( cudaMemcpy( destination,
+                      source,
+                      size * sizeof( DestinationElement ),
+                      cudaMemcpyDeviceToHost ) != cudaSuccess )
+         std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
+      TNL_CHECK_CUDA_DEVICE;
+   }
+   else
+   {
+      using BaseType = typename std::remove_cv< SourceElement >::type;
+      std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] };
+      Index i( 0 );
+      while( i < size )
+      {
+         if( cudaMemcpy( (void*) buffer.get(),
+                         (void*) &source[ i ],
+                         TNL::min( size - i, Cuda::getTransferBufferSize() ) * sizeof( SourceElement ),
+                         cudaMemcpyDeviceToHost ) != cudaSuccess )
+            std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
+         TNL_CHECK_CUDA_DEVICE;
+         Index j( 0 );
+         while( j < Cuda::getTransferBufferSize() && i + j < size )
+         {
+            destination[ i + j ] = buffer[ j ];
+            j++;
+         }
+         i += j;
+      }
+   }
+#else
+   throw Exceptions::CudaSupportMissing();
+#endif
+}
+
+
+template< typename DeviceType >
+   template< typename Element1,
+             typename Element2,
+             typename Index >
+bool
+MultiDeviceMemoryOperations< DeviceType, Devices::Cuda >::
+compare( const Element1* destination,
+         const Element2* source,
+         const Index size )
+{
+   if( size == 0 ) return true;
+   /***
+    * Here, destination is on host and source is on CUDA device.
+    */
+   TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
+   TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
+   TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." );
+#ifdef HAVE_CUDA
+   std::unique_ptr< Element2[] > host_buffer{ new Element2[ Cuda::getTransferBufferSize() ] };
+   Index compared( 0 );
+   while( compared < size )
+   {
+      Index transfer = min( size - compared, Cuda::getTransferBufferSize() );
+      if( cudaMemcpy( (void*) host_buffer.get(),
+                      (void*) &source[ compared ],
+                      transfer * sizeof( Element2 ),
+                      cudaMemcpyDeviceToHost ) != cudaSuccess )
+         std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
+      TNL_CHECK_CUDA_DEVICE;
+      if( ! MemoryOperations< Devices::Host >::compare( &destination[ compared ], host_buffer.get(), transfer ) )
+         return false;
+      compared += transfer;
+   }
+   return true;
+#else
+   throw Exceptions::CudaSupportMissing();
+#endif
+}
+
+/****
+ * Operations Host -> CUDA
+ */
+template< typename DeviceType >
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+void
+MultiDeviceMemoryOperations< Devices::Cuda, DeviceType >::
+copy( DestinationElement* destination,
+      const SourceElement* source,
+      const Index size )
+{
+   if( size == 0 ) return;
+   TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
+   TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
+   TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." );
+#ifdef HAVE_CUDA
+   if( std::is_same< DestinationElement, SourceElement >::value )
+   {
+      if( cudaMemcpy( destination,
+                      source,
+                      size * sizeof( DestinationElement ),
+                      cudaMemcpyHostToDevice ) != cudaSuccess )
+         std::cerr << "Transfer of data from host to CUDA device failed." << std::endl;
+      TNL_CHECK_CUDA_DEVICE;
+   }
+   else
+   {
+      std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ Cuda::getTransferBufferSize() ] };
+      Index i( 0 );
+      while( i < size )
+      {
+         Index j( 0 );
+         while( j < Cuda::getTransferBufferSize() && i + j < size )
+         {
+            buffer[ j ] = source[ i + j ];
+            j++;
+         }
+         if( cudaMemcpy( (void*) &destination[ i ],
+                         (void*) buffer.get(),
+                         j * sizeof( DestinationElement ),
+                         cudaMemcpyHostToDevice ) != cudaSuccess )
+            std::cerr << "Transfer of data from host to CUDA device failed." << std::endl;
+         TNL_CHECK_CUDA_DEVICE;
+         i += j;
+      }
+   }
+#else
+   throw Exceptions::CudaSupportMissing();
+#endif
+}
+
+template< typename DeviceType >
+   template< typename Element1,
+             typename Element2,
+             typename Index >
+bool
+MultiDeviceMemoryOperations< Devices::Cuda, DeviceType >::
+compare( const Element1* hostData,
+         const Element2* deviceData,
+         const Index size )
+{
+   if( size == 0 ) return true;
+   TNL_ASSERT_TRUE( hostData, "Attempted to compare data through a nullptr." );
+   TNL_ASSERT_TRUE( deviceData, "Attempted to compare data through a nullptr." );
+   TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." );
+   return MultiDeviceMemoryOperations< DeviceType, Devices::Cuda >::compare( deviceData, hostData, size );
+}
+
+} // namespace Algorithms
+} // namespace Containers
+} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/Multireduction.hpp b/src/TNL/Containers/Algorithms/Multireduction.hpp
index 8c74ee9ac..c80f68f32 100644
--- a/src/TNL/Containers/Algorithms/Multireduction.hpp
+++ b/src/TNL/Containers/Algorithms/Multireduction.hpp
@@ -18,7 +18,7 @@
 
 #include <TNL/Assert.h>
 #include <TNL/Containers/Algorithms/Multireduction.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
+#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
 #include <TNL/Containers/Algorithms/CudaMultireductionKernel.h>
 
 #ifdef CUDA_REDUCTION_PROFILING
@@ -205,7 +205,7 @@ reduce( const Result zero,
 
    // transfer the reduced data from device to host
    std::unique_ptr< Result[] > resultArray{ new Result[ n * reducedSize ] };
-   ArrayOperations< Devices::Host, Devices::Cuda >::copy( resultArray.get(), deviceAux1, n * reducedSize );
+   MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( resultArray.get(), deviceAux1, n * reducedSize );
 
    #ifdef CUDA_REDUCTION_PROFILING
       timer.stop();
diff --git a/src/TNL/Containers/Algorithms/Reduction.hpp b/src/TNL/Containers/Algorithms/Reduction.hpp
index 229af1379..19ed3e6af 100644
--- a/src/TNL/Containers/Algorithms/Reduction.hpp
+++ b/src/TNL/Containers/Algorithms/Reduction.hpp
@@ -17,7 +17,7 @@
 //#define CUDA_REDUCTION_PROFILING
 
 #include <TNL/Containers/Algorithms/Reduction.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
+#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
 #include <TNL/Containers/Algorithms/CudaReductionKernel.h>
 
 #ifdef CUDA_REDUCTION_PROFILING
@@ -310,7 +310,7 @@ reduce( const Index size,
          new Result[ reducedSize ]
          #endif
       };
-      ArrayOperations< Devices::Host, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize );
+      MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize );
 
       #ifdef CUDA_REDUCTION_PROFILING
          timer.stop();
@@ -415,8 +415,8 @@ reduceWithArgument( const Index size,
          new Index[ reducedSize ]
          #endif
       };
-      ArrayOperations< Devices::Host, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize );
-      ArrayOperations< Devices::Host, Devices::Cuda >::copy( indexArray.get(), deviceIndexes, reducedSize );
+      MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize );
+      MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( indexArray.get(), deviceIndexes, reducedSize );
 
       #ifdef CUDA_REDUCTION_PROFILING
          timer.stop();
diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp
index 23909dd8c..7688ca194 100644
--- a/src/TNL/Containers/Array.hpp
+++ b/src/TNL/Containers/Array.hpp
@@ -16,7 +16,8 @@
 #include <TNL/Assert.h>
 #include <TNL/Math.h>
 #include <TNL/TypeInfo.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
+#include <TNL/Containers/Algorithms/MemoryOperations.h>
+#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
 #include <TNL/Containers/Algorithms/ArrayIO.h>
 #include <TNL/Containers/Algorithms/ArrayAssignment.h>
 
@@ -74,7 +75,7 @@ Array( Value* data,
 : allocator( allocator )
 {
    this->setSize( size );
-   Algorithms::ArrayOperations< Device >::copy( this->getData(), data, size );
+   Algorithms::MemoryOperations< Device >::copy( this->getData(), data, size );
 }
 
 template< typename Value,
@@ -85,7 +86,7 @@ Array< Value, Device, Index, Allocator >::
 Array( const Array< Value, Device, Index, Allocator >& array )
 {
    this->setSize( array.getSize() );
-   Algorithms::ArrayOperations< Device >::copy( this->getData(), array.getData(), array.getSize() );
+   Algorithms::MemoryOperations< Device >::copy( this->getData(), array.getData(), array.getSize() );
 }
 
 template< typename Value,
@@ -98,7 +99,7 @@ Array( const Array< Value, Device, Index, Allocator >& array,
 : allocator( allocator )
 {
    this->setSize( array.getSize() );
-   Algorithms::ArrayOperations< Device >::copy( this->getData(), array.getData(), array.getSize() );
+   Algorithms::MemoryOperations< Device >::copy( this->getData(), array.getData(), array.getSize() );
 }
 
 template< typename Value,
@@ -118,7 +119,7 @@ Array( const Array< Value, Device, Index, Allocator >& array,
    TNL_ASSERT_LE( begin + size, array.getSize(), "End of array is out of bounds." );
 
    this->setSize( size );
-   Algorithms::ArrayOperations< Device >::copy( this->getData(), &array.getData()[ begin ], size );
+   Algorithms::MemoryOperations< Device >::copy( this->getData(), &array.getData()[ begin ], size );
 }
 
 template< typename Value,
@@ -135,7 +136,7 @@ Array( const std::initializer_list< InValue >& list,
    // Here we assume that the underlying array for std::initializer_list is
    // const T[N] as noted here:
    // https://en.cppreference.com/w/cpp/utility/initializer_list
-   Algorithms::ArrayOperations< Device, Devices::Host >::copy( this->getData(), &( *list.begin() ), list.size() );
+   Algorithms::MultiDeviceMemoryOperations< Device, Devices::Host >::copy( this->getData(), &( *list.begin() ), list.size() );
 }
 
 template< typename Value,
@@ -149,7 +150,7 @@ Array( const std::list< InValue >& list,
 : allocator( allocator )
 {
    this->setSize( list.size() );
-   Algorithms::ArrayOperations< Device >::copyFromIterator( this->getData(), this->getSize(), list.cbegin(), list.cend() );
+   Algorithms::MemoryOperations< Device >::copyFromIterator( this->getData(), this->getSize(), list.cbegin(), list.cend() );
 }
 
 template< typename Value,
@@ -163,7 +164,7 @@ Array( const std::vector< InValue >& vector,
 : allocator( allocator )
 {
    this->setSize( vector.size() );
-   Algorithms::ArrayOperations< Device, Devices::Host >::copy( this->getData(), vector.data(), vector.size() );
+   Algorithms::MultiDeviceMemoryOperations< Device, Devices::Host >::copy( this->getData(), vector.data(), vector.size() );
 }
 
 template< typename Value,
@@ -485,7 +486,7 @@ setElement( const Index& i, const Value& x )
 {
    TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
-   return Algorithms::ArrayOperations< Device >::setElement( &( this->data[ i ] ), x );
+   return Algorithms::MemoryOperations< Device >::setElement( &( this->data[ i ] ), x );
 }
 
 template< typename Value,
@@ -498,7 +499,7 @@ getElement( const Index& i ) const
 {
    TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
-   return Algorithms::ArrayOperations< Device >::getElement( & ( this->data[ i ] ) );
+   return Algorithms::MemoryOperations< Device >::getElement( & ( this->data[ i ] ) );
 }
 
 template< typename Value,
@@ -541,7 +542,7 @@ operator=( const Array< Value, Device, Index, Allocator >& array )
    if( this->getSize() != array.getSize() )
       this->setLike( array );
    if( this->getSize() > 0 )
-      Algorithms::ArrayOperations< Device >::
+      Algorithms::MemoryOperations< Device >::
          copy( this->getData(),
                      array.getData(),
                      array.getSize() );
@@ -595,7 +596,7 @@ Array< Value, Device, Index, Allocator >::
 operator=( const std::list< InValue >& list )
 {
    this->setSize( list.size() );
-   Algorithms::ArrayOperations< Device >::copyFromIterator( this->getData(), this->getSize(), list.cbegin(), list.cend() );
+   Algorithms::MemoryOperations< Device >::copyFromIterator( this->getData(), this->getSize(), list.cbegin(), list.cend() );
    return *this;
 }
 
@@ -610,7 +611,7 @@ operator=( const std::vector< InValue >& vector )
 {
    if( (std::size_t) this->getSize() != vector.size() )
       this->setSize( vector.size() );
-   Algorithms::ArrayOperations< Device, Devices::Host >::copy( this->getData(), vector.data(), vector.size() );
+   Algorithms::MultiDeviceMemoryOperations< Device, Devices::Host >::copy( this->getData(), vector.data(), vector.size() );
    return *this;
 }
 
@@ -627,7 +628,7 @@ operator==( const ArrayT& array ) const
       return false;
    if( this->getSize() == 0 )
       return true;
-   return Algorithms::ArrayOperations< Device, typename ArrayT::DeviceType >::
+   return Algorithms::MultiDeviceMemoryOperations< Device, typename ArrayT::DeviceType >::
             compare( this->getData(),
                            array.getData(),
                            array.getSize() );
@@ -658,7 +659,7 @@ setValue( const ValueType& v,
    TNL_ASSERT_TRUE( this->getData(), "Attempted to set a value of an empty array." );
    if( end == 0 )
       end = this->getSize();
-   Algorithms::ArrayOperations< Device >::set( &this->getData()[ begin ], v, end - begin );
+   Algorithms::MemoryOperations< Device >::set( &this->getData()[ begin ], v, end - begin );
 }
 
 template< typename Value,
@@ -690,7 +691,7 @@ containsValue( const ValueType& v,
    if( end == 0 )
       end = this->getSize();
 
-   return Algorithms::ArrayOperations< Device >::containsValue( &this->getData()[ begin ], end - begin, v );
+   return Algorithms::MemoryOperations< Device >::containsValue( &this->getData()[ begin ], end - begin, v );
 }
 
 template< typename Value,
@@ -707,7 +708,7 @@ containsOnlyValue( const ValueType& v,
    if( end == 0 )
       end = this->getSize();
 
-   return Algorithms::ArrayOperations< Device >::containsOnlyValue( &this->getData()[ begin ], end - begin, v );
+   return Algorithms::MemoryOperations< Device >::containsOnlyValue( &this->getData()[ begin ], end - begin, v );
 }
 
 template< typename Value,
diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp
index b37831004..010a40c3a 100644
--- a/src/TNL/Containers/ArrayView.hpp
+++ b/src/TNL/Containers/ArrayView.hpp
@@ -15,7 +15,8 @@
 
 #include <TNL/TypeInfo.h>
 #include <TNL/ParallelFor.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
+#include <TNL/Containers/Algorithms/MemoryOperations.h>
+#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
 #include <TNL/Containers/Algorithms/ArrayIO.h>
 #include <TNL/Containers/Algorithms/ArrayAssignment.h>
 
@@ -100,7 +101,7 @@ operator=( const ArrayView& view )
 {
    TNL_ASSERT_EQ( getSize(), view.getSize(), "The sizes of the array views must be equal, views are not resizable." );
    if( getSize() > 0 )
-      Algorithms::ArrayOperations< Device >::copy( getData(), view.getData(), getSize() );
+      Algorithms::MemoryOperations< Device >::copy( getData(), view.getData(), getSize() );
    return *this;
 }
 
@@ -215,7 +216,7 @@ setElement( Index i, Value value )
 {
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
-   return Algorithms::ArrayOperations< Device >::setElement( &data[ i ], value );
+   return Algorithms::MemoryOperations< Device >::setElement( &data[ i ], value );
 }
 
 template< typename Value,
@@ -227,7 +228,7 @@ getElement( Index i ) const
 {
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
-   return Algorithms::ArrayOperations< Device >::getElement( &data[ i ] );
+   return Algorithms::MemoryOperations< Device >::getElement( &data[ i ] );
 }
 
 template< typename Value,
@@ -267,7 +268,7 @@ operator==( const ArrayT& array ) const
       return false;
    if( this->getSize() == 0 )
       return true;
-   return Algorithms::ArrayOperations< DeviceType, typename ArrayT::DeviceType >::
+   return Algorithms::MultiDeviceMemoryOperations< DeviceType, typename ArrayT::DeviceType >::
             compare( this->getData(),
                            array.getData(),
                            array.getSize() );
@@ -294,7 +295,7 @@ setValue( Value value, const Index begin, Index end )
    TNL_ASSERT_GT( size, 0, "Attempted to set value to an empty array view." );
    if( end == 0 )
       end = this->getSize();
-   Algorithms::ArrayOperations< Device >::set( &getData()[ begin ], value, end - begin );
+   Algorithms::MemoryOperations< Device >::set( &getData()[ begin ], value, end - begin );
 }
 
 template< typename Value,
@@ -329,7 +330,7 @@ containsValue( Value value,
 {
    if( end == 0 )
       end = this->getSize();
-   return Algorithms::ArrayOperations< Device >::containsValue( &this->getData()[ begin ], end - begin, value );
+   return Algorithms::MemoryOperations< Device >::containsValue( &this->getData()[ begin ], end - begin, value );
 }
 
 template< typename Value,
@@ -343,7 +344,7 @@ containsOnlyValue( Value value,
 {
    if( end == 0 )
       end = this->getSize();
-   return Algorithms::ArrayOperations< Device >::containsOnlyValue( &this->getData()[ begin ], end - begin, value );
+   return Algorithms::MemoryOperations< Device >::containsOnlyValue( &this->getData()[ begin ], end - begin, value );
 }
 
 template< typename Value, typename Device, typename Index >
diff --git a/src/TNL/Containers/Expressions/Comparison.h b/src/TNL/Containers/Expressions/Comparison.h
index 616ad5807..d487948a9 100644
--- a/src/TNL/Containers/Expressions/Comparison.h
+++ b/src/TNL/Containers/Expressions/Comparison.h
@@ -15,7 +15,7 @@
 #include <TNL/Assert.h>
 #include <TNL/Containers/Expressions/ExpressionVariableType.h>
 #include <TNL/Containers/Algorithms/Reduction.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
+#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
 
 namespace TNL {
 namespace Containers {
@@ -45,7 +45,7 @@ struct VectorComparison< T1, T2, true >
          return false;
       if( a.getSize() == 0 )
          return true;
-      return Algorithms::ArrayOperations< typename T1::DeviceType, typename T2::DeviceType >::compare( a.getData(), b.getData(), a.getSize() );
+      return Algorithms::MultiDeviceMemoryOperations< typename T1::DeviceType, typename T2::DeviceType >::compare( a.getData(), b.getData(), a.getSize() );
    }
 };
 
diff --git a/src/TNL/Containers/NDArrayView.h b/src/TNL/Containers/NDArrayView.h
index 3e37de372..0dc2d9d7e 100644
--- a/src/TNL/Containers/NDArrayView.h
+++ b/src/TNL/Containers/NDArrayView.h
@@ -18,7 +18,8 @@
 #include <TNL/Containers/ndarray/Executors.h>
 #include <TNL/Containers/ndarray/BoundaryExecutors.h>
 #include <TNL/Containers/ndarray/Operations.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
+#include <TNL/Containers/Algorithms/MemoryOperations.h>
+#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
 
 namespace TNL {
 namespace Containers {
@@ -75,7 +76,7 @@ public:
    {
       TNL_ASSERT_EQ( getSizes(), other.getSizes(), "The sizes of the array views must be equal, views are not resizable." );
       if( getStorageSize() > 0 )
-         Algorithms::ArrayOperations< DeviceType >::copy( array, other.array, getStorageSize() );
+         Algorithms::MemoryOperations< DeviceType >::copy( array, other.array, getStorageSize() );
       return *this;
    }
 
@@ -93,7 +94,7 @@ public:
                        "The sizes of the array views must be equal, views are not resizable." );
       if( getStorageSize() > 0 ) {
          TNL_ASSERT_TRUE( array, "Attempted to assign to an empty view." );
-         Algorithms::ArrayOperations< DeviceType, typename OtherView::DeviceType >::copy( array, other.getData(), getStorageSize() );
+         Algorithms::MultiDeviceMemoryOperations< DeviceType, typename OtherView::DeviceType >::copy( array, other.getData(), getStorageSize() );
       }
       return *this;
    }
@@ -138,7 +139,7 @@ public:
       if( getSizes() != other.getSizes() )
          return false;
       // FIXME: uninitialized data due to alignment in NDArray and padding in SlicedNDArray
-      return Algorithms::ArrayOperations< Device, Device >::compare( array, other.array, getStorageSize() );
+      return Algorithms::MemoryOperations< Device >::compare( array, other.array, getStorageSize() );
    }
 
    TNL_NVCC_HD_WARNING_DISABLE
@@ -148,7 +149,7 @@ public:
       if( getSizes() != other.getSizes() )
          return true;
       // FIXME: uninitialized data due to alignment in NDArray and padding in SlicedNDArray
-      return ! Algorithms::ArrayOperations< Device, Device >::compare( array, other.array, getStorageSize() );
+      return ! Algorithms::MemoryOperations< Device >::compare( array, other.array, getStorageSize() );
    }
 
    __cuda_callable__
diff --git a/src/TNL/Matrices/MatrixOperations.h b/src/TNL/Matrices/MatrixOperations.h
index a6ede3f7b..9cc7b477b 100644
--- a/src/TNL/Matrices/MatrixOperations.h
+++ b/src/TNL/Matrices/MatrixOperations.h
@@ -343,7 +343,7 @@ public:
       // TODO: use static storage, e.g. from the CudaReductionBuffer, to avoid frequent reallocations
       Containers::Vector< RealType, Devices::Cuda, IndexType > xDevice;
       xDevice.setSize( n );
-      Containers::Algorithms::ArrayOperations< Devices::Cuda, Devices::Host >::copy< RealType, RealType, IndexType >( xDevice.getData(), x, n );
+      Containers::Algorithms::MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::copy< RealType, RealType, IndexType >( xDevice.getData(), x, n );
 
       // desGridSize = blocksPerMultiprocessor * numberOfMultiprocessors
       const int desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() );
diff --git a/src/TNL/Solvers/Linear/GMRES_impl.h b/src/TNL/Solvers/Linear/GMRES_impl.h
index 519fcb9aa..5b0915f97 100644
--- a/src/TNL/Solvers/Linear/GMRES_impl.h
+++ b/src/TNL/Solvers/Linear/GMRES_impl.h
@@ -451,7 +451,7 @@ hauseholder_apply_trunc( HostView out,
    // The upper (m+1)x(m+1) submatrix of Y is duplicated in the YL buffer,
    // which resides on host and is broadcasted from rank 0 to all processes.
    HostView YL_i( &YL[ i * (restarting_max + 1) ], restarting_max + 1 );
-   Containers::Algorithms::ArrayOperations< Devices::Host, DeviceType >::copy( YL_i.getData(), Traits::getLocalView( y_i ).getData(), YL_i.getSize() );
+   Containers::Algorithms::MultiDeviceMemoryOperations< Devices::Host, DeviceType >::copy( YL_i.getData(), Traits::getLocalView( y_i ).getData(), YL_i.getSize() );
    // no-op if the problem is not distributed
    CommunicatorType::Bcast( YL_i.getData(), YL_i.getSize(), 0, Traits::getCommunicationGroup( *this->matrix ) );
 
@@ -466,7 +466,7 @@ hauseholder_apply_trunc( HostView out,
       }
       if( std::is_same< DeviceType, Devices::Cuda >::value ) {
          RealType host_z[ i + 1 ];
-         Containers::Algorithms::ArrayOperations< Devices::Host, Devices::Cuda >::copy( host_z, Traits::getConstLocalView( z ).getData(), i + 1 );
+         Containers::Algorithms::MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( host_z, Traits::getConstLocalView( z ).getData(), i + 1 );
          for( int k = 0; k <= i; k++ )
             out[ k ] = host_z[ k ] - YL_i[ k ] * aux;
       }
diff --git a/src/UnitTests/AllocatorsTest.h b/src/UnitTests/AllocatorsTest.h
index 5434a4950..30d904b7a 100644
--- a/src/UnitTests/AllocatorsTest.h
+++ b/src/UnitTests/AllocatorsTest.h
@@ -15,7 +15,7 @@
 #include <TNL/Allocators/Cuda.h>
 #include <TNL/Allocators/CudaHost.h>
 #include <TNL/Allocators/CudaManaged.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
+#include <TNL/Containers/Algorithms/MemoryOperations.h>
 
 #include "gtest/gtest.h"
 
@@ -83,7 +83,7 @@ TYPED_TEST( AllocatorsTest, CudaManaged )
    ASSERT_NE( data, nullptr );
 
    // set data on the device
-   Containers::Algorithms::ArrayOperations< Devices::Cuda >::set( data, (ValueType) 0, ARRAY_TEST_SIZE );
+   Containers::Algorithms::MemoryOperations< Devices::Cuda >::set( data, (ValueType) 0, ARRAY_TEST_SIZE );
    ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE );
 
    // check values on the host
@@ -103,7 +103,7 @@ TYPED_TEST( AllocatorsTest, Cuda )
    ASSERT_NE( data, nullptr );
 
    // set data on the device
-   Containers::Algorithms::ArrayOperations< Devices::Cuda >::set( data, (ValueType) 0, ARRAY_TEST_SIZE );
+   Containers::Algorithms::MemoryOperations< Devices::Cuda >::set( data, (ValueType) 0, ARRAY_TEST_SIZE );
    ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE );
 
    allocator.deallocate( data, ARRAY_TEST_SIZE );
diff --git a/src/UnitTests/Containers/ArrayOperationsTest.cpp b/src/UnitTests/Containers/ArrayOperationsTest.cpp
deleted file mode 100644
index c499a61b2..000000000
--- a/src/UnitTests/Containers/ArrayOperationsTest.cpp
+++ /dev/null
@@ -1,11 +0,0 @@
-/***************************************************************************
-                          ArrayOperationsTest.cpp  -  description
-                             -------------------
-    begin                : Jul 15, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include "ArrayOperationsTest.h"
diff --git a/src/UnitTests/Containers/ArrayOperationsTest.cu b/src/UnitTests/Containers/ArrayOperationsTest.cu
deleted file mode 100644
index 497b40f17..000000000
--- a/src/UnitTests/Containers/ArrayOperationsTest.cu
+++ /dev/null
@@ -1,11 +0,0 @@
-/***************************************************************************
-                          ArrayOperationsTest.cu  -  description
-                             -------------------
-    begin                : Jul 16, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include "ArrayOperationsTest.h"
diff --git a/src/UnitTests/Containers/CMakeLists.txt b/src/UnitTests/Containers/CMakeLists.txt
index c8cd88af9..3a0643274 100644
--- a/src/UnitTests/Containers/CMakeLists.txt
+++ b/src/UnitTests/Containers/CMakeLists.txt
@@ -3,13 +3,13 @@ TARGET_COMPILE_OPTIONS( ListTest PRIVATE ${CXX_TESTS_FLAGS} )
 TARGET_LINK_LIBRARIES( ListTest ${GTEST_BOTH_LIBRARIES} )
 
 IF( BUILD_CUDA )
-   CUDA_ADD_EXECUTABLE( ArrayOperationsTest ArrayOperationsTest.cu
+   CUDA_ADD_EXECUTABLE( MemoryOperationsTest MemoryOperationsTest.cu
                         OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( ArrayOperationsTest ${GTEST_BOTH_LIBRARIES} )
+   TARGET_LINK_LIBRARIES( MemoryOperationsTest ${GTEST_BOTH_LIBRARIES} )
 ELSE( BUILD_CUDA )
-   ADD_EXECUTABLE( ArrayOperationsTest ArrayOperationsTest.cpp )
-   TARGET_COMPILE_OPTIONS( ArrayOperationsTest PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( ArrayOperationsTest ${GTEST_BOTH_LIBRARIES} )
+   ADD_EXECUTABLE( MemoryOperationsTest MemoryOperationsTest.cpp )
+   TARGET_COMPILE_OPTIONS( MemoryOperationsTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MemoryOperationsTest ${GTEST_BOTH_LIBRARIES} )
 ENDIF( BUILD_CUDA )
 
 ADD_EXECUTABLE( ArrayTest ArrayTest.cpp )
@@ -94,7 +94,7 @@ TARGET_LINK_LIBRARIES( StaticVectorOperationsTest ${GTEST_BOTH_LIBRARIES} )
 
 
 ADD_TEST( ListTest ${EXECUTABLE_OUTPUT_PATH}/ListTest${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( ArrayOperationsTest ${EXECUTABLE_OUTPUT_PATH}/ArrayOperationsTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( MemoryOperationsTest ${EXECUTABLE_OUTPUT_PATH}/MemoryOperationsTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( ArrayTest ${EXECUTABLE_OUTPUT_PATH}/ArrayTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( ArrayViewTest ${EXECUTABLE_OUTPUT_PATH}/ArrayViewTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( VectorTest ${EXECUTABLE_OUTPUT_PATH}/VectorTest${CMAKE_EXECUTABLE_SUFFIX} )
diff --git a/src/UnitTests/Containers/MemoryOperationsTest.cpp b/src/UnitTests/Containers/MemoryOperationsTest.cpp
new file mode 100644
index 000000000..976447eef
--- /dev/null
+++ b/src/UnitTests/Containers/MemoryOperationsTest.cpp
@@ -0,0 +1 @@
+#include "MemoryOperationsTest.h"
diff --git a/src/UnitTests/Containers/MemoryOperationsTest.cu b/src/UnitTests/Containers/MemoryOperationsTest.cu
new file mode 100644
index 000000000..976447eef
--- /dev/null
+++ b/src/UnitTests/Containers/MemoryOperationsTest.cu
@@ -0,0 +1 @@
+#include "MemoryOperationsTest.h"
diff --git a/src/UnitTests/Containers/ArrayOperationsTest.h b/src/UnitTests/Containers/MemoryOperationsTest.h
similarity index 58%
rename from src/UnitTests/Containers/ArrayOperationsTest.h
rename to src/UnitTests/Containers/MemoryOperationsTest.h
index 4a48261be..6049e09bc 100644
--- a/src/UnitTests/Containers/ArrayOperationsTest.h
+++ b/src/UnitTests/Containers/MemoryOperationsTest.h
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          ArrayOperationsTest.h  -  description
+                          MemoryOperationsTest.h  -  description
                              -------------------
     begin                : Jul 15, 2013
     copyright            : (C) 2013 by Tomas Oberhuber
@@ -13,7 +13,8 @@
 #ifdef HAVE_GTEST
 #include <TNL/Allocators/Host.h>
 #include <TNL/Allocators/Cuda.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
+#include <TNL/Containers/Algorithms/MemoryOperations.h>
+#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
 
 #include "gtest/gtest.h"
 
@@ -25,7 +26,7 @@ constexpr int ARRAY_TEST_SIZE = 5000;
 
 // test fixture for typed tests
 template< typename Value >
-class ArrayOperationsTest : public ::testing::Test
+class MemoryOperationsTest : public ::testing::Test
 {
 protected:
    using ValueType = Value;
@@ -34,9 +35,9 @@ protected:
 // types for which ArrayTest is instantiated
 using ValueTypes = ::testing::Types< short int, int, long, float, double >;
 
-TYPED_TEST_SUITE( ArrayOperationsTest, ValueTypes );
+TYPED_TEST_SUITE( MemoryOperationsTest, ValueTypes );
 
-TYPED_TEST( ArrayOperationsTest, allocateMemory_host )
+TYPED_TEST( MemoryOperationsTest, allocateMemory_host )
 {
    using ValueType = typename TestFixture::ValueType;
    using Allocator = Allocators::Host< ValueType >;
@@ -48,7 +49,7 @@ TYPED_TEST( ArrayOperationsTest, allocateMemory_host )
    allocator.deallocate( data, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, setElement_host )
+TYPED_TEST( MemoryOperationsTest, setElement_host )
 {
    using ValueType = typename TestFixture::ValueType;
    using Allocator = Allocators::Host< ValueType >;
@@ -56,27 +57,27 @@ TYPED_TEST( ArrayOperationsTest, setElement_host )
    Allocator allocator;
    ValueType* data = allocator.allocate( ARRAY_TEST_SIZE );
    for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) {
-      ArrayOperations< Devices::Host >::setElement( data + i, (ValueType) i );
+      MemoryOperations< Devices::Host >::setElement( data + i, (ValueType) i );
       EXPECT_EQ( data[ i ], i );
-      EXPECT_EQ( ArrayOperations< Devices::Host >::getElement( data + i ), i );
+      EXPECT_EQ( MemoryOperations< Devices::Host >::getElement( data + i ), i );
    }
    allocator.deallocate( data, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, set_host )
+TYPED_TEST( MemoryOperationsTest, set_host )
 {
    using ValueType = typename TestFixture::ValueType;
    using Allocator = Allocators::Host< ValueType >;
 
    Allocator allocator;
    ValueType* data = allocator.allocate( ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::set( data, (ValueType) 13, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Host >::set( data, (ValueType) 13, ARRAY_TEST_SIZE );
    for( int i = 0; i < ARRAY_TEST_SIZE; i ++ )
       EXPECT_EQ( data[ i ], 13 );
    allocator.deallocate( data, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, copy_host )
+TYPED_TEST( MemoryOperationsTest, copy_host )
 {
    using ValueType = typename TestFixture::ValueType;
    using Allocator = Allocators::Host< ValueType >;
@@ -84,15 +85,15 @@ TYPED_TEST( ArrayOperationsTest, copy_host )
    Allocator allocator;
    ValueType* data1 = allocator.allocate( ARRAY_TEST_SIZE );
    ValueType* data2 = allocator.allocate( ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::set( data1, (ValueType) 13, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::copy< ValueType, ValueType >( data2, data1, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Host >::set( data1, (ValueType) 13, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Host >::copy< ValueType, ValueType >( data2, data1, ARRAY_TEST_SIZE );
    for( int i = 0; i < ARRAY_TEST_SIZE; i ++ )
       EXPECT_EQ( data1[ i ], data2[ i ]);
    allocator.deallocate( data1, ARRAY_TEST_SIZE );
    allocator.deallocate( data2, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, copyWithConversion_host )
+TYPED_TEST( MemoryOperationsTest, copyWithConversion_host )
 {
    using Allocator1 = Allocators::Host< int >;
    using Allocator2 = Allocators::Host< float >;
@@ -101,15 +102,15 @@ TYPED_TEST( ArrayOperationsTest, copyWithConversion_host )
    Allocator2 allocator2;
    int* data1 = allocator1.allocate( ARRAY_TEST_SIZE );
    float* data2 = allocator2.allocate( ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::set( data1, 13, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::copy< float, int >( data2, data1, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Host >::set( data1, 13, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Host >::copy< float, int >( data2, data1, ARRAY_TEST_SIZE );
    for( int i = 0; i < ARRAY_TEST_SIZE; i ++ )
       EXPECT_EQ( data1[ i ], data2[ i ] );
    allocator1.deallocate( data1, ARRAY_TEST_SIZE );
    allocator2.deallocate( data2, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, compare_host )
+TYPED_TEST( MemoryOperationsTest, compare_host )
 {
    using ValueType = typename TestFixture::ValueType;
    using Allocator = Allocators::Host< ValueType >;
@@ -117,16 +118,16 @@ TYPED_TEST( ArrayOperationsTest, compare_host )
    Allocator allocator;
    ValueType* data1 = allocator.allocate( ARRAY_TEST_SIZE );
    ValueType* data2 = allocator.allocate( ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::set( data1, (ValueType) 7, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::set( data2, (ValueType) 0, ARRAY_TEST_SIZE );
-   EXPECT_FALSE( ( ArrayOperations< Devices::Host >::compare< ValueType, ValueType >( data1, data2, ARRAY_TEST_SIZE ) ) );
-   ArrayOperations< Devices::Host >::set( data2, (ValueType) 7, ARRAY_TEST_SIZE );
-   EXPECT_TRUE( ( ArrayOperations< Devices::Host >::compare< ValueType, ValueType >( data1, data2, ARRAY_TEST_SIZE ) ) );
+   MemoryOperations< Devices::Host >::set( data1, (ValueType) 7, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Host >::set( data2, (ValueType) 0, ARRAY_TEST_SIZE );
+   EXPECT_FALSE( ( MemoryOperations< Devices::Host >::compare< ValueType, ValueType >( data1, data2, ARRAY_TEST_SIZE ) ) );
+   MemoryOperations< Devices::Host >::set( data2, (ValueType) 7, ARRAY_TEST_SIZE );
+   EXPECT_TRUE( ( MemoryOperations< Devices::Host >::compare< ValueType, ValueType >( data1, data2, ARRAY_TEST_SIZE ) ) );
    allocator.deallocate( data1, ARRAY_TEST_SIZE );
    allocator.deallocate( data2, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, compareWithConversion_host )
+TYPED_TEST( MemoryOperationsTest, compareWithConversion_host )
 {
    using Allocator1 = Allocators::Host< int >;
    using Allocator2 = Allocators::Host< float >;
@@ -135,16 +136,16 @@ TYPED_TEST( ArrayOperationsTest, compareWithConversion_host )
    Allocator2 allocator2;
    int* data1 = allocator1.allocate( ARRAY_TEST_SIZE );
    float* data2 = allocator2.allocate( ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::set( data1, 7, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::set( data2, (float) 0.0, ARRAY_TEST_SIZE );
-   EXPECT_FALSE( ( ArrayOperations< Devices::Host >::compare< int, float >( data1, data2, ARRAY_TEST_SIZE ) ) );
-   ArrayOperations< Devices::Host >::set( data2, (float) 7.0, ARRAY_TEST_SIZE );
-   EXPECT_TRUE( ( ArrayOperations< Devices::Host >::compare< int, float >( data1, data2, ARRAY_TEST_SIZE ) ) );
+   MemoryOperations< Devices::Host >::set( data1, 7, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Host >::set( data2, (float) 0.0, ARRAY_TEST_SIZE );
+   EXPECT_FALSE( ( MemoryOperations< Devices::Host >::compare< int, float >( data1, data2, ARRAY_TEST_SIZE ) ) );
+   MemoryOperations< Devices::Host >::set( data2, (float) 7.0, ARRAY_TEST_SIZE );
+   EXPECT_TRUE( ( MemoryOperations< Devices::Host >::compare< int, float >( data1, data2, ARRAY_TEST_SIZE ) ) );
    allocator1.deallocate( data1, ARRAY_TEST_SIZE );
    allocator2.deallocate( data2, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, containsValue_host )
+TYPED_TEST( MemoryOperationsTest, containsValue_host )
 {
    using ValueType = typename TestFixture::ValueType;
    using Allocator = Allocators::Host< ValueType >;
@@ -155,14 +156,14 @@ TYPED_TEST( ArrayOperationsTest, containsValue_host )
    for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
       data[ i ] = i % 10;
    for( int i = 0; i < 10; i++ )
-      EXPECT_TRUE( ( ArrayOperations< Devices::Host >::containsValue( data, ARRAY_TEST_SIZE, (ValueType) i ) ) );
+      EXPECT_TRUE( ( MemoryOperations< Devices::Host >::containsValue( data, ARRAY_TEST_SIZE, (ValueType) i ) ) );
    for( int i = 10; i < 20; i++ )
-      EXPECT_FALSE( ( ArrayOperations< Devices::Host >::containsValue( data, ARRAY_TEST_SIZE, (ValueType) i ) ) );
+      EXPECT_FALSE( ( MemoryOperations< Devices::Host >::containsValue( data, ARRAY_TEST_SIZE, (ValueType) i ) ) );
 
    allocator.deallocate( data, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, containsOnlyValue_host )
+TYPED_TEST( MemoryOperationsTest, containsOnlyValue_host )
 {
    using ValueType = typename TestFixture::ValueType;
    using Allocator = Allocators::Host< ValueType >;
@@ -173,18 +174,18 @@ TYPED_TEST( ArrayOperationsTest, containsOnlyValue_host )
    for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
       data[ i ] = i % 10;
    for( int i = 0; i < 20; i++ )
-      EXPECT_FALSE( ( ArrayOperations< Devices::Host >::containsOnlyValue( data, ARRAY_TEST_SIZE, (ValueType) i ) ) );
+      EXPECT_FALSE( ( MemoryOperations< Devices::Host >::containsOnlyValue( data, ARRAY_TEST_SIZE, (ValueType) i ) ) );
 
    for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
       data[ i ] = 10;
-   EXPECT_TRUE( ( ArrayOperations< Devices::Host >::containsOnlyValue( data, ARRAY_TEST_SIZE, (ValueType) 10 ) ) );
+   EXPECT_TRUE( ( MemoryOperations< Devices::Host >::containsOnlyValue( data, ARRAY_TEST_SIZE, (ValueType) 10 ) ) );
 
    allocator.deallocate( data, ARRAY_TEST_SIZE );
 }
 
 
 #ifdef HAVE_CUDA
-TYPED_TEST( ArrayOperationsTest, allocateMemory_cuda )
+TYPED_TEST( MemoryOperationsTest, allocateMemory_cuda )
 {
    using ValueType = typename TestFixture::ValueType;
    using Allocator = Allocators::Cuda< ValueType >;
@@ -198,7 +199,7 @@ TYPED_TEST( ArrayOperationsTest, allocateMemory_cuda )
    ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE );
 }
 
-TYPED_TEST( ArrayOperationsTest, setElement_cuda )
+TYPED_TEST( MemoryOperationsTest, setElement_cuda )
 {
    using ValueType = typename TestFixture::ValueType;
    using Allocator = Allocators::Cuda< ValueType >;
@@ -208,21 +209,21 @@ TYPED_TEST( ArrayOperationsTest, setElement_cuda )
    ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE );
 
    for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
-      ArrayOperations< Devices::Cuda >::setElement( &data[ i ], (ValueType) i );
+      MemoryOperations< Devices::Cuda >::setElement( &data[ i ], (ValueType) i );
 
    for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
    {
       ValueType d;
       ASSERT_EQ( cudaMemcpy( &d, &data[ i ], sizeof( ValueType ), cudaMemcpyDeviceToHost ), cudaSuccess );
       EXPECT_EQ( d, i );
-      EXPECT_EQ( ArrayOperations< Devices::Cuda >::getElement( &data[ i ] ), i );
+      EXPECT_EQ( MemoryOperations< Devices::Cuda >::getElement( &data[ i ] ), i );
    }
 
    allocator.deallocate( data, ARRAY_TEST_SIZE );
    ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE );
 }
 
-TYPED_TEST( ArrayOperationsTest, set_cuda )
+TYPED_TEST( MemoryOperationsTest, set_cuda )
 {
    using ValueType = typename TestFixture::ValueType;
    using HostAllocator = Allocators::Host< ValueType >;
@@ -232,10 +233,10 @@ TYPED_TEST( ArrayOperationsTest, set_cuda )
    CudaAllocator cudaAllocator;
    ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE );
    ValueType* deviceData = cudaAllocator.allocate( ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::set( hostData, (ValueType) 0, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Cuda >::set( deviceData, (ValueType) 13, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Host >::set( hostData, (ValueType) 0, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Cuda >::set( deviceData, (ValueType) 13, ARRAY_TEST_SIZE );
    ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE );
-   ArrayOperations< Devices::Host, Devices::Cuda >::copy< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE );
+   MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE );
    ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE );
    for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
       EXPECT_EQ( hostData[ i ], 13 );
@@ -243,7 +244,7 @@ TYPED_TEST( ArrayOperationsTest, set_cuda )
    cudaAllocator.deallocate( deviceData, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, copy_cuda )
+TYPED_TEST( MemoryOperationsTest, copy_cuda )
 {
    using ValueType = typename TestFixture::ValueType;
    using HostAllocator = Allocators::Host< ValueType >;
@@ -255,18 +256,18 @@ TYPED_TEST( ArrayOperationsTest, copy_cuda )
    ValueType* hostData2 = hostAllocator.allocate( ARRAY_TEST_SIZE );
    ValueType* deviceData = cudaAllocator.allocate( ARRAY_TEST_SIZE );
    ValueType* deviceData2 = cudaAllocator.allocate( ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::set( hostData, (ValueType) 13, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Cuda, Devices::Host >::copy< ValueType >( deviceData, hostData, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Cuda >::copy< ValueType, ValueType >( deviceData2, deviceData, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host, Devices::Cuda >::copy< ValueType, ValueType >( hostData2, deviceData2, ARRAY_TEST_SIZE );
-   EXPECT_TRUE( ( ArrayOperations< Devices::Host >::compare< ValueType, ValueType >( hostData, hostData2, ARRAY_TEST_SIZE) ) );
+   MemoryOperations< Devices::Host >::set( hostData, (ValueType) 13, ARRAY_TEST_SIZE );
+   MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::copy< ValueType >( deviceData, hostData, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Cuda >::copy< ValueType, ValueType >( deviceData2, deviceData, ARRAY_TEST_SIZE );
+   MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy< ValueType, ValueType >( hostData2, deviceData2, ARRAY_TEST_SIZE );
+   EXPECT_TRUE( ( MemoryOperations< Devices::Host >::compare< ValueType, ValueType >( hostData, hostData2, ARRAY_TEST_SIZE) ) );
    hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE );
    hostAllocator.deallocate( hostData2, ARRAY_TEST_SIZE );
    cudaAllocator.deallocate( deviceData, ARRAY_TEST_SIZE );
    cudaAllocator.deallocate( deviceData2, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, copyWithConversions_cuda )
+TYPED_TEST( MemoryOperationsTest, copyWithConversions_cuda )
 {
    using HostAllocator1 = Allocators::Host< int >;
    using HostAllocator2 = Allocators::Host< double >;
@@ -281,10 +282,10 @@ TYPED_TEST( ArrayOperationsTest, copyWithConversions_cuda )
    double* hostData2 = hostAllocator2.allocate( ARRAY_TEST_SIZE );
    long* deviceData = cudaAllocator1.allocate( ARRAY_TEST_SIZE );
    float* deviceData2 = cudaAllocator2.allocate( ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::set( hostData, 13, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Cuda, Devices::Host >::copy< long, int >( deviceData, hostData, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Cuda >::copy< float, long >( deviceData2, deviceData, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host, Devices::Cuda >::copy< double, float >( hostData2, deviceData2, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Host >::set( hostData, 13, ARRAY_TEST_SIZE );
+   MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::copy< long, int >( deviceData, hostData, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Cuda >::copy< float, long >( deviceData2, deviceData, ARRAY_TEST_SIZE );
+   MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy< double, float >( hostData2, deviceData2, ARRAY_TEST_SIZE );
    for( int i = 0; i < ARRAY_TEST_SIZE; i ++ )
       EXPECT_EQ( hostData[ i ], hostData2[ i ] );
    hostAllocator1.deallocate( hostData, ARRAY_TEST_SIZE );
@@ -293,7 +294,7 @@ TYPED_TEST( ArrayOperationsTest, copyWithConversions_cuda )
    cudaAllocator2.deallocate( deviceData2, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, compare_cuda )
+TYPED_TEST( MemoryOperationsTest, compare_cuda )
 {
    using ValueType = typename TestFixture::ValueType;
    using HostAllocator = Allocators::Host< ValueType >;
@@ -305,25 +306,25 @@ TYPED_TEST( ArrayOperationsTest, compare_cuda )
    ValueType* deviceData = cudaAllocator.allocate( ARRAY_TEST_SIZE );
    ValueType* deviceData2 = cudaAllocator.allocate( ARRAY_TEST_SIZE );
 
-   ArrayOperations< Devices::Host >::set( hostData, (ValueType) 7, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Cuda >::set( deviceData, (ValueType) 8, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Cuda >::set( deviceData2, (ValueType) 9, ARRAY_TEST_SIZE );
-   EXPECT_FALSE(( ArrayOperations< Devices::Host, Devices::Cuda >::compare< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
-   EXPECT_FALSE(( ArrayOperations< Devices::Cuda, Devices::Host >::compare< ValueType, ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
-   EXPECT_FALSE(( ArrayOperations< Devices::Cuda >::compare< ValueType, ValueType >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));
+   MemoryOperations< Devices::Host >::set( hostData, (ValueType) 7, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Cuda >::set( deviceData, (ValueType) 8, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Cuda >::set( deviceData2, (ValueType) 9, ARRAY_TEST_SIZE );
+   EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::compare< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
+   EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::compare< ValueType, ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
+   EXPECT_FALSE(( MemoryOperations< Devices::Cuda >::compare< ValueType, ValueType >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));
 
-   ArrayOperations< Devices::Cuda >::set( deviceData, (ValueType) 7, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Cuda >::set( deviceData2, (ValueType) 7, ARRAY_TEST_SIZE );
-   EXPECT_TRUE(( ArrayOperations< Devices::Host, Devices::Cuda >::compare< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
-   EXPECT_TRUE(( ArrayOperations< Devices::Cuda, Devices::Host >::compare< ValueType, ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
-   EXPECT_TRUE(( ArrayOperations< Devices::Cuda >::compare< ValueType, ValueType >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));
+   MemoryOperations< Devices::Cuda >::set( deviceData, (ValueType) 7, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Cuda >::set( deviceData2, (ValueType) 7, ARRAY_TEST_SIZE );
+   EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::compare< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
+   EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::compare< ValueType, ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
+   EXPECT_TRUE(( MemoryOperations< Devices::Cuda >::compare< ValueType, ValueType >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));
 
    hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE );
    cudaAllocator.deallocate( deviceData, ARRAY_TEST_SIZE );
    cudaAllocator.deallocate( deviceData2, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, compareWithConversions_cuda )
+TYPED_TEST( MemoryOperationsTest, compareWithConversions_cuda )
 {
    using HostAllocator = Allocators::Host< int >;
    using CudaAllocator1 = Allocators::Cuda< float >;
@@ -336,25 +337,25 @@ TYPED_TEST( ArrayOperationsTest, compareWithConversions_cuda )
    float* deviceData = cudaAllocator1.allocate( ARRAY_TEST_SIZE );
    double* deviceData2 = cudaAllocator2.allocate( ARRAY_TEST_SIZE );
 
-   ArrayOperations< Devices::Host >::set( hostData, 7, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Cuda >::set( deviceData, (float) 8, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Cuda >::set( deviceData2, (double) 9, ARRAY_TEST_SIZE );
-   EXPECT_FALSE(( ArrayOperations< Devices::Host, Devices::Cuda >::compare< int, float >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
-   EXPECT_FALSE(( ArrayOperations< Devices::Cuda, Devices::Host >::compare< float, int >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
-   EXPECT_FALSE(( ArrayOperations< Devices::Cuda >::compare< float, double >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));
+   MemoryOperations< Devices::Host >::set( hostData, 7, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Cuda >::set( deviceData, (float) 8, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Cuda >::set( deviceData2, (double) 9, ARRAY_TEST_SIZE );
+   EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::compare< int, float >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
+   EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::compare< float, int >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
+   EXPECT_FALSE(( MemoryOperations< Devices::Cuda >::compare< float, double >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));
 
-   ArrayOperations< Devices::Cuda >::set( deviceData, (float) 7, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Cuda >::set( deviceData2, (double) 7, ARRAY_TEST_SIZE );
-   EXPECT_TRUE(( ArrayOperations< Devices::Host, Devices::Cuda >::compare< int, float >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
-   EXPECT_TRUE(( ArrayOperations< Devices::Cuda, Devices::Host >::compare< float, int >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
-   EXPECT_TRUE(( ArrayOperations< Devices::Cuda >::compare< float, double >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));
+   MemoryOperations< Devices::Cuda >::set( deviceData, (float) 7, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Cuda >::set( deviceData2, (double) 7, ARRAY_TEST_SIZE );
+   EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::compare< int, float >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
+   EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::compare< float, int >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
+   EXPECT_TRUE(( MemoryOperations< Devices::Cuda >::compare< float, double >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));
 
    hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE );
    cudaAllocator1.deallocate( deviceData, ARRAY_TEST_SIZE );
    cudaAllocator2.deallocate( deviceData2, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, containsValue_cuda )
+TYPED_TEST( MemoryOperationsTest, containsValue_cuda )
 {
    using ValueType = typename TestFixture::ValueType;
    using HostAllocator = Allocators::Host< ValueType >;
@@ -367,18 +368,18 @@ TYPED_TEST( ArrayOperationsTest, containsValue_cuda )
 
    for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
       hostData[ i ] = i % 10;
-   ArrayOperations< Devices::Cuda, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE );
+   MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE );
 
    for( int i = 0; i < 10; i++ )
-      EXPECT_TRUE( ( ArrayOperations< Devices::Cuda >::containsValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) );
+      EXPECT_TRUE( ( MemoryOperations< Devices::Cuda >::containsValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) );
    for( int i = 10; i < 20; i++ )
-      EXPECT_FALSE( ( ArrayOperations< Devices::Cuda >::containsValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) );
+      EXPECT_FALSE( ( MemoryOperations< Devices::Cuda >::containsValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) );
 
    hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE );
    cudaAllocator.deallocate( deviceData, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, containsOnlyValue_cuda )
+TYPED_TEST( MemoryOperationsTest, containsOnlyValue_cuda )
 {
    using ValueType = typename TestFixture::ValueType;
    using HostAllocator = Allocators::Host< ValueType >;
@@ -391,16 +392,16 @@ TYPED_TEST( ArrayOperationsTest, containsOnlyValue_cuda )
 
    for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
       hostData[ i ] = i % 10;
-   ArrayOperations< Devices::Cuda, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE );
+   MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE );
 
    for( int i = 0; i < 20; i++ )
-      EXPECT_FALSE( ( ArrayOperations< Devices::Cuda >::containsOnlyValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) );
+      EXPECT_FALSE( ( MemoryOperations< Devices::Cuda >::containsOnlyValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) );
 
    for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
       hostData[ i ] = 10;
-   ArrayOperations< Devices::Cuda, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE );
+   MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE );
 
-   EXPECT_TRUE( ( ArrayOperations< Devices::Cuda >::containsOnlyValue( deviceData, ARRAY_TEST_SIZE, (ValueType) 10 ) ) );
+   EXPECT_TRUE( ( MemoryOperations< Devices::Cuda >::containsOnlyValue( deviceData, ARRAY_TEST_SIZE, (ValueType) 10 ) ) );
 
    hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE );
    cudaAllocator.deallocate( deviceData, ARRAY_TEST_SIZE );
-- 
GitLab


From 399f9627187682cef448c2adde1ecebfff22bad7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sun, 1 Sep 2019 23:20:45 +0200
Subject: [PATCH 19/35] Moved algorithms from TNL/Containers/Algorithms/ to
 just TNL/Algorithms/

The usage of algorithms such as MemoryOperations or Reduction is not
bound to a particular container. On the other hand, ArrayIO,
ArrayAssignment, VectorAssignment and StaticArrayAssignment are just
implementation details for the containers - moved into
TNL/Containers/detail/

Also moved ParallelFor, StaticFor, StaticVectorFor, TemplateStaticFor
into TNL/Algorithms/
---
 .../ReductionAndScan/ComparisonExample.cpp    |  4 +-
 .../ReductionAndScan/ExclusiveScanExample.cpp |  2 +-
 .../ReductionAndScan/MapReduceExample-1.cpp   |  4 +-
 .../ReductionAndScan/MapReduceExample-2.cpp   |  4 +-
 .../ReductionAndScan/MapReduceExample-3.cpp   |  4 +-
 .../ReductionAndScan/MaximumNormExample.cpp   |  4 +-
 .../ReductionAndScan/ProductExample.cpp       |  4 +-
 .../ReductionWithArgument.cpp                 |  4 +-
 .../ReductionAndScan/ScalarProductExample.cpp |  4 +-
 .../ReductionAndScan/ScanExample.cpp          |  2 +-
 .../ReductionAndScan/SegmentedScanExample.cpp |  2 +-
 .../Tutorials/ReductionAndScan/SumExample.cpp |  4 +-
 .../UpdateAndResidueExample.cpp               |  4 +-
 .../BLAS/CommonVectorOperations.hpp           | 36 +++++------
 src/Benchmarks/BLAS/VectorOperations.h        | 10 +--
 src/Benchmarks/BLAS/triad.h                   |  8 +--
 src/Benchmarks/BLAS/vector-operations.h       |  4 +-
 .../NDArray/tnl-benchmark-ndarray.h           |  4 +-
 src/Benchmarks/ODESolvers/SimpleProblem.h     |  4 +-
 .../Traversers/GridTraversersBenchmark.h      |  2 +-
 .../Traversers/GridTraversersBenchmark_1D.h   |  2 +-
 .../Traversers/GridTraversersBenchmark_2D.h   |  2 +-
 .../Traversers/GridTraversersBenchmark_3D.h   |  2 +-
 .../Traversers/tnl-benchmark-traversers.h     |  2 +-
 .../Algorithms/CudaMultireductionKernel.h     |  4 +-
 .../Algorithms/CudaReductionBuffer.h          |  2 -
 .../Algorithms/CudaReductionKernel.h          |  6 +-
 .../Algorithms/CudaScanKernel.h               |  4 +-
 .../Algorithms/DistributedScan.h              |  6 +-
 .../Algorithms/MemoryOperations.h             |  8 +--
 .../Algorithms/MemoryOperationsCuda.hpp       | 10 ++-
 .../Algorithms/MemoryOperationsHost.hpp       |  8 +--
 .../Algorithms/MemoryOperationsSequential.hpp |  4 +-
 .../Algorithms/MultiDeviceMemoryOperations.h  |  4 +-
 .../Algorithms/Multireduction.h               |  2 -
 .../Algorithms/Multireduction.hpp             |  8 +--
 src/TNL/{ => Algorithms}/ParallelFor.h        |  4 +-
 .../{Containers => }/Algorithms/Reduction.h   |  4 +-
 .../{Containers => }/Algorithms/Reduction.hpp |  8 +--
 src/TNL/{Containers => }/Algorithms/Scan.h    | 62 +++++++++----------
 src/TNL/{Containers => }/Algorithms/Scan.hpp  | 10 ++-
 src/TNL/{ => Algorithms}/StaticFor.h          |  2 +
 src/TNL/{ => Algorithms}/StaticVectorFor.h    |  2 +
 src/TNL/{ => Algorithms}/TemplateStaticFor.h  |  2 +
 src/TNL/Containers/Array.hpp                  | 16 +++--
 src/TNL/Containers/ArrayView.hpp              | 18 +++---
 src/TNL/Containers/DistributedArray.hpp       |  4 +-
 src/TNL/Containers/DistributedArrayView.hpp   |  2 +-
 src/TNL/Containers/DistributedNDArray.h       |  2 +-
 .../DistributedNDArraySynchronizer.h          |  8 +--
 src/TNL/Containers/DistributedVector.hpp      |  2 +-
 src/TNL/Containers/DistributedVectorView.hpp  |  2 +-
 src/TNL/Containers/Expressions/Comparison.h   |  4 +-
 .../Expressions/DistributedComparison.h       |  1 -
 .../DistributedExpressionTemplates.h          | 12 ++--
 .../Expressions/ExpressionTemplates.h         | 12 ++--
 .../Expressions/VerticalOperations.h          |  2 +-
 .../Multimaps/MultimapPermutationApplier.h    | 20 +++---
 src/TNL/Containers/NDArrayView.h              |  4 +-
 src/TNL/Containers/StaticArray.hpp            | 20 +++---
 src/TNL/Containers/StaticVector.hpp           | 18 +++---
 src/TNL/Containers/Vector.hpp                 | 12 ++--
 src/TNL/Containers/VectorView.h               |  2 +-
 src/TNL/Containers/VectorView.hpp             | 12 ++--
 .../{Algorithms => detail}/ArrayAssignment.h  | 12 ++--
 .../{Algorithms => detail}/ArrayIO.h          |  4 +-
 .../StaticArrayAssignment.h                   | 48 +++++++-------
 .../{Algorithms => detail}/VectorAssignment.h | 26 ++++----
 .../Containers/ndarray/BoundaryExecutors.h    | 26 ++++----
 src/TNL/Containers/ndarray/Executors.h        | 14 ++---
 src/TNL/Containers/ndarray/SizesHolder.h      | 14 ++---
 .../Containers/ndarray/SizesHolderHelpers.h   |  4 +-
 src/TNL/Functions/CutMeshFunction.h           |  4 +-
 src/TNL/Matrices/BiEllpack_impl.h             |  2 +-
 src/TNL/Matrices/CSR_impl.h                   |  2 +-
 src/TNL/Matrices/DistributedSpMV.h            | 32 +++++-----
 src/TNL/Matrices/MatrixOperations.h           |  2 +-
 .../Matrices/SlicedEllpackSymmetric_impl.h    |  2 +-
 src/TNL/Matrices/SlicedEllpack_impl.h         |  2 +-
 src/TNL/Matrices/SparseOperations_impl.h      | 12 ++--
 .../DistributedMeshes/BufferEntitiesHelper.h  |  8 +--
 .../DistributedMeshes/CopyEntitiesHelper.h    |  8 +--
 .../NeighborGridEntityGetter1D_impl.h         |  4 +-
 .../NeighborGridEntityGetter2D_impl.h         |  6 +-
 .../NeighborGridEntityGetter3D_impl.h         | 12 ++--
 .../MeshDetails/EntityStorageRebinder.h       | 24 +++----
 .../MeshDetails/IndexPermutationApplier.h     | 26 ++++----
 .../MeshLayers/BoundaryTags/Initializer.h     | 18 +++---
 .../initializer/EntityInitializer.h           |  2 -
 .../initializer/SubentitySeedsCreator.h       |  6 +-
 src/TNL/Meshes/Writers/VTKWriter_impl.h       |  4 +-
 src/TNL/Solvers/Linear/GMRES_impl.h           | 12 ++--
 .../Linear/Preconditioners/Diagonal_impl.h    | 10 +--
 .../Linear/Preconditioners/ILU0_impl.h        |  6 +-
 src/UnitTests/Algorithms/CMakeLists.txt       | 29 +++++++++
 .../MemoryOperationsTest.cpp                  |  0
 .../MemoryOperationsTest.cu                   |  0
 .../MemoryOperationsTest.h                    |  7 +--
 .../MultireductionTest.cpp                    |  0
 .../MultireductionTest.cu                     |  0
 .../MultireductionTest.h                      |  4 +-
 .../{ => Algorithms}/ParallelForTest.cpp      |  0
 .../{ => Algorithms}/ParallelForTest.cu       |  0
 .../{ => Algorithms}/ParallelForTest.h        | 28 ++++-----
 src/UnitTests/AllocatorsTest.h                |  6 +-
 src/UnitTests/CMakeLists.txt                  | 10 ---
 src/UnitTests/Containers/CMakeLists.txt       | 22 -------
 .../ndarray/DistributedNDArray_1D_test.h      |  4 +-
 .../ndarray/DistributedNDArray_semi1D_test.h  |  2 +-
 .../ndarray/StaticNDArrayCudaTest.cu          |  7 ++-
 110 files changed, 446 insertions(+), 480 deletions(-)
 rename src/TNL/{Containers => }/Algorithms/CudaMultireductionKernel.h (99%)
 rename src/TNL/{Containers => }/Algorithms/CudaReductionBuffer.h (97%)
 rename src/TNL/{Containers => }/Algorithms/CudaReductionKernel.h (99%)
 rename src/TNL/{Containers => }/Algorithms/CudaScanKernel.h (99%)
 rename src/TNL/{Containers => }/Algorithms/DistributedScan.h (94%)
 rename src/TNL/{Containers => }/Algorithms/MemoryOperations.h (96%)
 rename src/TNL/{Containers => }/Algorithms/MemoryOperationsCuda.hpp (95%)
 rename src/TNL/{Containers => }/Algorithms/MemoryOperationsHost.hpp (96%)
 rename src/TNL/{Containers => }/Algorithms/MemoryOperationsSequential.hpp (96%)
 rename src/TNL/{Containers => }/Algorithms/MultiDeviceMemoryOperations.h (98%)
 rename src/TNL/{Containers => }/Algorithms/Multireduction.h (98%)
 rename src/TNL/{Containers => }/Algorithms/Multireduction.hpp (97%)
 rename src/TNL/{ => Algorithms}/ParallelFor.h (99%)
 rename src/TNL/{Containers => }/Algorithms/Reduction.h (98%)
 rename src/TNL/{Containers => }/Algorithms/Reduction.hpp (98%)
 rename src/TNL/{Containers => }/Algorithms/Scan.h (97%)
 rename src/TNL/{Containers => }/Algorithms/Scan.hpp (97%)
 rename src/TNL/{ => Algorithms}/StaticFor.h (97%)
 rename src/TNL/{ => Algorithms}/StaticVectorFor.h (97%)
 rename src/TNL/{ => Algorithms}/TemplateStaticFor.h (98%)
 rename src/TNL/Containers/{Algorithms => detail}/ArrayAssignment.h (85%)
 rename src/TNL/Containers/{Algorithms => detail}/ArrayIO.h (98%)
 rename src/TNL/Containers/{Algorithms => detail}/StaticArrayAssignment.h (63%)
 rename src/TNL/Containers/{Algorithms => detail}/VectorAssignment.h (91%)
 create mode 100644 src/UnitTests/Algorithms/CMakeLists.txt
 rename src/UnitTests/{Containers => Algorithms}/MemoryOperationsTest.cpp (100%)
 rename src/UnitTests/{Containers => Algorithms}/MemoryOperationsTest.cu (100%)
 rename src/UnitTests/{Containers => Algorithms}/MemoryOperationsTest.h (98%)
 rename src/UnitTests/{Containers => Algorithms}/MultireductionTest.cpp (100%)
 rename src/UnitTests/{Containers => Algorithms}/MultireductionTest.cu (100%)
 rename src/UnitTests/{Containers => Algorithms}/MultireductionTest.h (97%)
 rename src/UnitTests/{ => Algorithms}/ParallelForTest.cpp (100%)
 rename src/UnitTests/{ => Algorithms}/ParallelForTest.cu (100%)
 rename src/UnitTests/{ => Algorithms}/ParallelForTest.h (86%)

diff --git a/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp b/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp
index d0b66adb4..4c3a17268 100644
--- a/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp
@@ -1,11 +1,11 @@
 #include <iostream>
 #include <cstdlib>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 bool comparison( const Vector< double, Device >& u, const Vector< double, Device >& v )
diff --git a/Documentation/Tutorials/ReductionAndScan/ExclusiveScanExample.cpp b/Documentation/Tutorials/ReductionAndScan/ExclusiveScanExample.cpp
index 9ccb5baa8..29817aa14 100644
--- a/Documentation/Tutorials/ReductionAndScan/ExclusiveScanExample.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/ExclusiveScanExample.cpp
@@ -4,7 +4,7 @@
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 void scan( Vector< double, Device >& v )
diff --git a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp
index 8d21107fd..2fb766238 100644
--- a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp
@@ -1,11 +1,11 @@
 #include <iostream>
 #include <cstdlib>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 double mapReduce( Vector< double, Device >& u )
diff --git a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-2.cpp b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-2.cpp
index f44cac918..10fb0b499 100644
--- a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-2.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-2.cpp
@@ -1,12 +1,12 @@
 #include <iostream>
 #include <cstdlib>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 #include <TNL/Timer.h>
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 double mapReduce( Vector< double, Device >& u )
diff --git a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-3.cpp b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-3.cpp
index 1125b605c..de8c4bab6 100644
--- a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-3.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-3.cpp
@@ -1,12 +1,12 @@
 #include <iostream>
 #include <cstdlib>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 #include <TNL/Timer.h>
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 double mapReduce( Vector< double, Device >& u )
diff --git a/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp b/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp
index 8e4dbc740..ca4b8c8a4 100644
--- a/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp
@@ -1,11 +1,11 @@
 #include <iostream>
 #include <cstdlib>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 double maximumNorm( const Vector< double, Device >& v )
diff --git a/Documentation/Tutorials/ReductionAndScan/ProductExample.cpp b/Documentation/Tutorials/ReductionAndScan/ProductExample.cpp
index 8be11efa3..e2691e40a 100644
--- a/Documentation/Tutorials/ReductionAndScan/ProductExample.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/ProductExample.cpp
@@ -1,11 +1,11 @@
 #include <iostream>
 #include <cstdlib>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 double product( const Vector< double, Device >& v )
diff --git a/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp b/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp
index e37a21b6e..000af86fe 100644
--- a/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp
@@ -1,11 +1,11 @@
 #include <iostream>
 #include <cstdlib>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 std::pair< int, double >
diff --git a/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp b/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp
index 5bfd759ed..c072e09ba 100644
--- a/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp
@@ -1,11 +1,11 @@
 #include <iostream>
 #include <cstdlib>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 double scalarProduct( const Vector< double, Device >& u, const Vector< double, Device >& v )
diff --git a/Documentation/Tutorials/ReductionAndScan/ScanExample.cpp b/Documentation/Tutorials/ReductionAndScan/ScanExample.cpp
index 38f44ccdd..3dbd8581d 100644
--- a/Documentation/Tutorials/ReductionAndScan/ScanExample.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/ScanExample.cpp
@@ -4,7 +4,7 @@
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 void scan( Vector< double, Device >& v )
diff --git a/Documentation/Tutorials/ReductionAndScan/SegmentedScanExample.cpp b/Documentation/Tutorials/ReductionAndScan/SegmentedScanExample.cpp
index b3f677639..5e1379f5d 100644
--- a/Documentation/Tutorials/ReductionAndScan/SegmentedScanExample.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/SegmentedScanExample.cpp
@@ -4,7 +4,7 @@
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 void segmentedScan( Vector< double, Device >& v, Vector< bool, Device >& flags )
diff --git a/Documentation/Tutorials/ReductionAndScan/SumExample.cpp b/Documentation/Tutorials/ReductionAndScan/SumExample.cpp
index 3be04bd92..3cf648a57 100644
--- a/Documentation/Tutorials/ReductionAndScan/SumExample.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/SumExample.cpp
@@ -1,11 +1,11 @@
 #include <iostream>
 #include <cstdlib>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 double sum( const Vector< double, Device >& v )
diff --git a/Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cpp b/Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cpp
index bf93cd58b..4e44abe57 100644
--- a/Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cpp
@@ -1,11 +1,11 @@
 #include <iostream>
 #include <cstdlib>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 double updateAndResidue( Vector< double, Device >& u, const Vector< double, Device >& delta_u, const double& tau )
diff --git a/src/Benchmarks/BLAS/CommonVectorOperations.hpp b/src/Benchmarks/BLAS/CommonVectorOperations.hpp
index 640fda337..13a0f6322 100644
--- a/src/Benchmarks/BLAS/CommonVectorOperations.hpp
+++ b/src/Benchmarks/BLAS/CommonVectorOperations.hpp
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 #include "CommonVectorOperations.h"
 
 namespace TNL {
@@ -30,7 +30,7 @@ getVectorMax( const Vector& v )
    const auto* data = v.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; };
    auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
+   return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
 }
 
 template< typename Device >
@@ -47,7 +47,7 @@ getVectorMin( const Vector& v )
    const auto* data = v.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return data[ i ]; };
    auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
+   return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
 }
 
 template< typename Device >
@@ -64,7 +64,7 @@ getVectorAbsMax( const Vector& v )
    const auto* data = v.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); };
    auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
+   return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
 }
 
 template< typename Device >
@@ -81,7 +81,7 @@ getVectorAbsMin( const Vector& v )
    const auto* data = v.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); };
    auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
+   return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
 }
 
 template< typename Device >
@@ -97,7 +97,7 @@ getVectorL1Norm( const Vector& v )
 
    const auto* data = v.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 );
+   return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 );
 }
 
 template< typename Device >
@@ -113,7 +113,7 @@ getVectorL2Norm( const Vector& v )
 
    const auto* data = v.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data[ i ] * data[ i ]; };
-   return std::sqrt( Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ) );
+   return std::sqrt( Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ) );
 }
 
 template< typename Device >
@@ -136,7 +136,7 @@ getVectorLpNorm( const Vector& v,
 
    const auto* data = v.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data[ i ] ), p ); };
-   return std::pow( Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ), 1.0 / p );
+   return std::pow( Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ), 1.0 / p );
 }
 
 template< typename Device >
@@ -155,7 +155,7 @@ getVectorSum( const Vector& v )
 
    const auto* data = v.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i )  -> ResultType { return data[ i ]; };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 );
+   return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 );
 }
 
 template< typename Device >
@@ -175,7 +175,7 @@ getVectorDifferenceMax( const Vector1& v1,
    const auto* data2 = v2.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; };
    auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
+   return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
 }
 
 template< typename Device >
@@ -195,7 +195,7 @@ getVectorDifferenceMin( const Vector1& v1,
    const auto* data2 = v2.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; };
    auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
+   return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
 }
 
 template< typename Device >
@@ -215,7 +215,7 @@ getVectorDifferenceAbsMax( const Vector1& v1,
    const auto* data2 = v2.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); };
    auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
+   return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
 }
 
 template< typename Device >
@@ -235,7 +235,7 @@ getVectorDifferenceAbsMin( const Vector1& v1,
    const auto* data2 = v2.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); };
    auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
+   return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
 }
 
 template< typename Device >
@@ -254,7 +254,7 @@ getVectorDifferenceL1Norm( const Vector1& v1,
    const auto* data1 = v1.getData();
    const auto* data2 = v2.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 );
+   return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 );
 }
 
 template< typename Device >
@@ -276,7 +276,7 @@ getVectorDifferenceL2Norm( const Vector1& v1,
       auto diff = data1[ i ] - data2[ i ];
       return diff * diff;
    };
-   return std::sqrt( Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ) );
+   return std::sqrt( Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ) );
 }
 
 template< typename Device >
@@ -302,7 +302,7 @@ getVectorDifferenceLpNorm( const Vector1& v1,
    const auto* data1 = v1.getData();
    const auto* data2 = v2.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data1[ i ] - data2[ i ] ), p ); };
-   return std::pow( Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ), 1.0 / p );
+   return std::pow( Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ), 1.0 / p );
 }
 
 template< typename Device >
@@ -321,7 +321,7 @@ getVectorDifferenceSum( const Vector1& v1,
    const auto* data1 = v1.getData();
    const auto* data2 = v2.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 );
+   return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 );
 }
 
 template< typename Device >
@@ -340,7 +340,7 @@ getScalarProduct( const Vector1& v1,
    const auto* data1 = v1.getData();
    const auto* data2 = v2.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] * data2[ i ]; };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 );
+   return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 );
 }
 
 } // namespace Benchmarks
diff --git a/src/Benchmarks/BLAS/VectorOperations.h b/src/Benchmarks/BLAS/VectorOperations.h
index 0ad2c1ee6..4c9ad6cc5 100644
--- a/src/Benchmarks/BLAS/VectorOperations.h
+++ b/src/Benchmarks/BLAS/VectorOperations.h
@@ -12,7 +12,7 @@
 
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 namespace TNL {
 namespace Benchmarks {
@@ -104,9 +104,9 @@ struct VectorOperations< Devices::Cuda >
       auto add2 = [=] __cuda_callable__ ( IndexType i ) { y[ i ] = thisMultiplicator * y[ i ] + alpha * x[ i ]; };
 
       if( thisMultiplicator == 1.0 )
-         ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _y.getSize(), add1 );
+         Algorithms::ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _y.getSize(), add1 );
       else
-         ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _y.getSize(), add2 );
+         Algorithms::ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _y.getSize(), add2 );
    }
 
    template< typename Vector1, typename Vector2, typename Vector3, typename Scalar1, typename Scalar2, typename Scalar3 >
@@ -131,9 +131,9 @@ struct VectorOperations< Devices::Cuda >
       auto add2 = [=] __cuda_callable__ ( IndexType i ) { v[ i ] = thisMultiplicator * v[ i ] + multiplicator1 * v1[ i ] + multiplicator2 * v2[ i ]; };
 
       if( thisMultiplicator == 1.0 )
-         ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _v.getSize(), add1 );
+         Algorithms::ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _v.getSize(), add1 );
       else
-         ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _v.getSize(), add2 );
+         Algorithms::ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _v.getSize(), add2 );
    }
 };
 
diff --git a/src/Benchmarks/BLAS/triad.h b/src/Benchmarks/BLAS/triad.h
index c107944c8..3ac747fba 100644
--- a/src/Benchmarks/BLAS/triad.h
+++ b/src/Benchmarks/BLAS/triad.h
@@ -73,7 +73,7 @@ benchmarkTriad( Benchmark & benchmark,
          {
             a_v[i] = b_v[i] + scalar * c_v[i];
          };
-         ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel );
+         Algorithms::ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel );
 
          a_h = a_d;
       };
@@ -117,7 +117,7 @@ benchmarkTriad( Benchmark & benchmark,
          {
             a_v[i] = b_v[i] + scalar * c_v[i];
          };
-         ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel );
+         Algorithms::ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel );
 
          a_h = a_d;
       };
@@ -150,7 +150,7 @@ benchmarkTriad( Benchmark & benchmark,
       };
       auto triad = [&]()
       {
-         ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel );
+         Algorithms::ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel );
       };
 
       benchmark.time< Devices::Cuda >( reset, "zero-copy", triad );
@@ -181,7 +181,7 @@ benchmarkTriad( Benchmark & benchmark,
       };
       auto triad = [&]()
       {
-         ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel );
+         Algorithms::ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel );
       };
 
       benchmark.time< Devices::Cuda >( reset, "unified memory", triad );
diff --git a/src/Benchmarks/BLAS/vector-operations.h b/src/Benchmarks/BLAS/vector-operations.h
index ce2114f31..5f5cd989f 100644
--- a/src/Benchmarks/BLAS/vector-operations.h
+++ b/src/Benchmarks/BLAS/vector-operations.h
@@ -578,13 +578,13 @@ benchmarkVectorOperations( Benchmark & benchmark,
    ////
    // Exclusive prefix sum
    auto exclusivePrefixSumHost = [&]() {
-      hostVector.template prefixSum< Containers::Algorithms::ScanType::Exclusive >();
+      hostVector.template prefixSum< Algorithms::ScanType::Exclusive >();
    };
    benchmark.setOperation( "exclusive prefix sum", 2 * datasetSize );
    benchmark.time< Devices::Host >( reset1, "CPU ET", exclusivePrefixSumHost );
 #ifdef HAVE_CUDA
    auto exclusivePrefixSumCuda = [&]() {
-      deviceVector.template prefixSum< Containers::Algorithms::ScanType::Exclusive >();
+      deviceVector.template prefixSum< Algorithms::ScanType::Exclusive >();
    };
    benchmark.time< Devices::Cuda >( reset1, "GPU ET", exclusivePrefixSumCuda );
 #endif
diff --git a/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h b/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h
index 0de53ea88..d8865a40a 100644
--- a/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h
+++ b/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h
@@ -14,7 +14,7 @@
 
 #include <TNL/Assert.h>
 #include <TNL/Math.h>
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 #include <TNL/Containers/NDArray.h>
 #include <TNL/Containers/ndarray/Operations.h>
@@ -98,7 +98,7 @@ void benchmark_array( Benchmark& benchmark, index_type size = 500000000 )
    };
 
    auto f = [&]() {
-      TNL::ParallelFor< Device >::exec( 0, (int) size, kernel, a.getData(), b.getData() );
+      Algorithms::ParallelFor< Device >::exec( 0, (int) size, kernel, a.getData(), b.getData() );
    };
 
    // warm-up for all benchmarks
diff --git a/src/Benchmarks/ODESolvers/SimpleProblem.h b/src/Benchmarks/ODESolvers/SimpleProblem.h
index 6323264b8..ff81fd18e 100644
--- a/src/Benchmarks/ODESolvers/SimpleProblem.h
+++ b/src/Benchmarks/ODESolvers/SimpleProblem.h
@@ -13,7 +13,7 @@
 #pragma once
 
 #include <TNL/Devices/Host.h>
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 namespace TNL {
    namespace Benchmarks {
@@ -43,7 +43,7 @@ struct SimpleProblem
       {
          fu[ i ] = 1.0;
       };
-      ParallelFor< DeviceType >::exec( ( IndexType ) 0, u.getSize(), computeF, u, fu );
+      Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, u.getSize(), computeF, u, fu );
    }
    
    template< typename Vector >
diff --git a/src/Benchmarks/Traversers/GridTraversersBenchmark.h b/src/Benchmarks/Traversers/GridTraversersBenchmark.h
index 72ca102bc..01590f122 100644
--- a/src/Benchmarks/Traversers/GridTraversersBenchmark.h
+++ b/src/Benchmarks/Traversers/GridTraversersBenchmark.h
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Containers/Vector.h>
diff --git a/src/Benchmarks/Traversers/GridTraversersBenchmark_1D.h b/src/Benchmarks/Traversers/GridTraversersBenchmark_1D.h
index 8ec5cdf88..9820af392 100644
--- a/src/Benchmarks/Traversers/GridTraversersBenchmark_1D.h
+++ b/src/Benchmarks/Traversers/GridTraversersBenchmark_1D.h
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Containers/Vector.h>
diff --git a/src/Benchmarks/Traversers/GridTraversersBenchmark_2D.h b/src/Benchmarks/Traversers/GridTraversersBenchmark_2D.h
index 3c2037f40..0e9ae7f2f 100644
--- a/src/Benchmarks/Traversers/GridTraversersBenchmark_2D.h
+++ b/src/Benchmarks/Traversers/GridTraversersBenchmark_2D.h
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Containers/Vector.h>
diff --git a/src/Benchmarks/Traversers/GridTraversersBenchmark_3D.h b/src/Benchmarks/Traversers/GridTraversersBenchmark_3D.h
index 9dfeadb05..26b6413e4 100644
--- a/src/Benchmarks/Traversers/GridTraversersBenchmark_3D.h
+++ b/src/Benchmarks/Traversers/GridTraversersBenchmark_3D.h
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Containers/Vector.h>
diff --git a/src/Benchmarks/Traversers/tnl-benchmark-traversers.h b/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
index 63b3cc8c9..dbe637d82 100644
--- a/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
+++ b/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
@@ -19,7 +19,7 @@
 #include <TNL/Config/ConfigDescription.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Containers/List.h>
 
 using namespace TNL;
diff --git a/src/TNL/Containers/Algorithms/CudaMultireductionKernel.h b/src/TNL/Algorithms/CudaMultireductionKernel.h
similarity index 99%
rename from src/TNL/Containers/Algorithms/CudaMultireductionKernel.h
rename to src/TNL/Algorithms/CudaMultireductionKernel.h
index c97e0a8aa..6a0785647 100644
--- a/src/TNL/Containers/Algorithms/CudaMultireductionKernel.h
+++ b/src/TNL/Algorithms/CudaMultireductionKernel.h
@@ -16,11 +16,10 @@
 #include <TNL/Math.h>
 #include <TNL/Cuda/DeviceInfo.h>
 #include <TNL/Cuda/SharedMemory.h>
-#include <TNL/Containers/Algorithms/CudaReductionBuffer.h>
+#include <TNL/Algorithms/CudaReductionBuffer.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 #ifdef HAVE_CUDA
@@ -282,5 +281,4 @@ CudaMultireductionKernelLauncher( const Result zero,
 }
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/CudaReductionBuffer.h b/src/TNL/Algorithms/CudaReductionBuffer.h
similarity index 97%
rename from src/TNL/Containers/Algorithms/CudaReductionBuffer.h
rename to src/TNL/Algorithms/CudaReductionBuffer.h
index f873d7815..af9b3fcc2 100644
--- a/src/TNL/Containers/Algorithms/CudaReductionBuffer.h
+++ b/src/TNL/Algorithms/CudaReductionBuffer.h
@@ -19,7 +19,6 @@
 #include <TNL/Exceptions/CudaSupportMissing.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 class CudaReductionBuffer
@@ -92,5 +91,4 @@ class CudaReductionBuffer
 };
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/CudaReductionKernel.h b/src/TNL/Algorithms/CudaReductionKernel.h
similarity index 99%
rename from src/TNL/Containers/Algorithms/CudaReductionKernel.h
rename to src/TNL/Algorithms/CudaReductionKernel.h
index 2a959cb9c..c1004a374 100644
--- a/src/TNL/Containers/Algorithms/CudaReductionKernel.h
+++ b/src/TNL/Algorithms/CudaReductionKernel.h
@@ -16,12 +16,11 @@
 #include <TNL/Math.h>
 #include <TNL/Cuda/DeviceInfo.h>
 #include <TNL/Cuda/SharedMemory.h>
-#include <TNL/Containers/Algorithms/CudaReductionBuffer.h>
-#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
+#include <TNL/Algorithms/CudaReductionBuffer.h>
+#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 /****
@@ -615,5 +614,4 @@ struct CudaReductionKernelLauncher
 };
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/CudaScanKernel.h b/src/TNL/Algorithms/CudaScanKernel.h
similarity index 99%
rename from src/TNL/Containers/Algorithms/CudaScanKernel.h
rename to src/TNL/Algorithms/CudaScanKernel.h
index 5b2016439..79a201959 100644
--- a/src/TNL/Containers/Algorithms/CudaScanKernel.h
+++ b/src/TNL/Algorithms/CudaScanKernel.h
@@ -18,7 +18,6 @@
 #include <TNL/Containers/Array.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 #ifdef HAVE_CUDA
@@ -249,7 +248,7 @@ struct CudaScanKernelLauncher
       //std::cerr << "numberOfgrids =  " << numberOfGrids << std::endl;
 
       // allocate array for the block sums
-      Array< Real, Devices::Cuda > blockSums;
+      Containers::Array< Real, Devices::Cuda > blockSums;
       blockSums.setSize( numberOfBlocks );
 
       // loop over all grids
@@ -388,5 +387,4 @@ struct CudaScanKernelLauncher
 #endif
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/DistributedScan.h b/src/TNL/Algorithms/DistributedScan.h
similarity index 94%
rename from src/TNL/Containers/Algorithms/DistributedScan.h
rename to src/TNL/Algorithms/DistributedScan.h
index 44fd425b9..f294b0cf3 100644
--- a/src/TNL/Containers/Algorithms/DistributedScan.h
+++ b/src/TNL/Algorithms/DistributedScan.h
@@ -12,11 +12,10 @@
 
 #pragma once
 
-#include <TNL/Containers/Algorithms/Scan.h>
+#include <TNL/Algorithms/Scan.h>
 #include <TNL/Containers/Vector.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 template< ScanType Type >
@@ -51,7 +50,7 @@ struct DistributedScan
          const int nproc = CommunicatorType::GetSize( group );
          RealType dataForScatter[ nproc ];
          for( int i = 0; i < nproc; i++ ) dataForScatter[ i ] = localSum;
-         Vector< RealType, Devices::Host > rankSums( nproc );
+         Containers::Vector< RealType, Devices::Host > rankSums( nproc );
          // NOTE: exchanging general data types does not work with MPI
          CommunicatorType::Alltoall( dataForScatter, 1, rankSums.getData(), 1, group );
 
@@ -66,5 +65,4 @@ struct DistributedScan
 };
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/MemoryOperations.h b/src/TNL/Algorithms/MemoryOperations.h
similarity index 96%
rename from src/TNL/Containers/Algorithms/MemoryOperations.h
rename to src/TNL/Algorithms/MemoryOperations.h
index de588484f..cdbdb7909 100644
--- a/src/TNL/Containers/Algorithms/MemoryOperations.h
+++ b/src/TNL/Algorithms/MemoryOperations.h
@@ -15,7 +15,6 @@
 #include <TNL/Cuda/CudaCallable.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 template< typename DestinationExecution >
@@ -180,9 +179,8 @@ struct MemoryOperations< Devices::Cuda >
 };
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
 
-#include <TNL/Containers/Algorithms/MemoryOperationsSequential.hpp>
-#include <TNL/Containers/Algorithms/MemoryOperationsHost.hpp>
-#include <TNL/Containers/Algorithms/MemoryOperationsCuda.hpp>
+#include <TNL/Algorithms/MemoryOperationsSequential.hpp>
+#include <TNL/Algorithms/MemoryOperationsHost.hpp>
+#include <TNL/Algorithms/MemoryOperationsCuda.hpp>
diff --git a/src/TNL/Containers/Algorithms/MemoryOperationsCuda.hpp b/src/TNL/Algorithms/MemoryOperationsCuda.hpp
similarity index 95%
rename from src/TNL/Containers/Algorithms/MemoryOperationsCuda.hpp
rename to src/TNL/Algorithms/MemoryOperationsCuda.hpp
index a504b5b76..a823f0ecb 100644
--- a/src/TNL/Containers/Algorithms/MemoryOperationsCuda.hpp
+++ b/src/TNL/Algorithms/MemoryOperationsCuda.hpp
@@ -14,14 +14,13 @@
 #include <memory>  // std::unique_ptr
 #include <stdexcept>
 
-#include <TNL/Containers/Algorithms/MemoryOperations.h>
-#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
-#include <TNL/ParallelFor.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/MemoryOperations.h>
+#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Algorithms/Reduction.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 template< typename Element >
@@ -156,5 +155,4 @@ containsOnlyValue( const Element* data,
 }
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/MemoryOperationsHost.hpp b/src/TNL/Algorithms/MemoryOperationsHost.hpp
similarity index 96%
rename from src/TNL/Containers/Algorithms/MemoryOperationsHost.hpp
rename to src/TNL/Algorithms/MemoryOperationsHost.hpp
index 80be4cc3d..a88688685 100644
--- a/src/TNL/Containers/Algorithms/MemoryOperationsHost.hpp
+++ b/src/TNL/Algorithms/MemoryOperationsHost.hpp
@@ -14,12 +14,11 @@
 #include <stdexcept>
 #include <algorithm>  // std::copy, std::equal
 
-#include <TNL/Containers/Algorithms/MemoryOperations.h>
-#include <TNL/ParallelFor.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/MemoryOperations.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Algorithms/Reduction.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 template< typename Element >
@@ -165,5 +164,4 @@ containsOnlyValue( const Element* data,
 }
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/MemoryOperationsSequential.hpp b/src/TNL/Algorithms/MemoryOperationsSequential.hpp
similarity index 96%
rename from src/TNL/Containers/Algorithms/MemoryOperationsSequential.hpp
rename to src/TNL/Algorithms/MemoryOperationsSequential.hpp
index 17d73cf78..e427f00dd 100644
--- a/src/TNL/Containers/Algorithms/MemoryOperationsSequential.hpp
+++ b/src/TNL/Algorithms/MemoryOperationsSequential.hpp
@@ -10,10 +10,9 @@
 
 #pragma once
 
-#include <TNL/Containers/Algorithms/MemoryOperations.h>
+#include <TNL/Algorithms/MemoryOperations.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 template< typename Element >
@@ -133,5 +132,4 @@ containsOnlyValue( const Element* data,
 }
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h b/src/TNL/Algorithms/MultiDeviceMemoryOperations.h
similarity index 98%
rename from src/TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h
rename to src/TNL/Algorithms/MultiDeviceMemoryOperations.h
index 4809cae0d..c0e75f2fa 100644
--- a/src/TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h
+++ b/src/TNL/Algorithms/MultiDeviceMemoryOperations.h
@@ -10,10 +10,9 @@
 
 #pragma once
 
-#include <TNL/Containers/Algorithms/MemoryOperations.h>
+#include <TNL/Algorithms/MemoryOperations.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 template< typename DestinationDevice,
@@ -273,5 +272,4 @@ compare( const Element1* hostData,
 }
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/Multireduction.h b/src/TNL/Algorithms/Multireduction.h
similarity index 98%
rename from src/TNL/Containers/Algorithms/Multireduction.h
rename to src/TNL/Algorithms/Multireduction.h
index 9802a2953..ac67255fe 100644
--- a/src/TNL/Containers/Algorithms/Multireduction.h
+++ b/src/TNL/Algorithms/Multireduction.h
@@ -18,7 +18,6 @@
 #include <TNL/Devices/Cuda.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 template< typename Device >
@@ -83,7 +82,6 @@ struct Multireduction< Devices::Cuda >
 };
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
 
 #include "Multireduction.hpp"
diff --git a/src/TNL/Containers/Algorithms/Multireduction.hpp b/src/TNL/Algorithms/Multireduction.hpp
similarity index 97%
rename from src/TNL/Containers/Algorithms/Multireduction.hpp
rename to src/TNL/Algorithms/Multireduction.hpp
index c80f68f32..25b91f026 100644
--- a/src/TNL/Containers/Algorithms/Multireduction.hpp
+++ b/src/TNL/Algorithms/Multireduction.hpp
@@ -17,9 +17,9 @@
 //#define CUDA_REDUCTION_PROFILING
 
 #include <TNL/Assert.h>
-#include <TNL/Containers/Algorithms/Multireduction.h>
-#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
-#include <TNL/Containers/Algorithms/CudaMultireductionKernel.h>
+#include <TNL/Algorithms/Multireduction.h>
+#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
+#include <TNL/Algorithms/CudaMultireductionKernel.h>
 
 #ifdef CUDA_REDUCTION_PROFILING
 #include <TNL/Timer.h>
@@ -27,7 +27,6 @@
 #endif
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 template< typename Result,
@@ -225,5 +224,4 @@ reduce( const Result zero,
 };
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/ParallelFor.h b/src/TNL/Algorithms/ParallelFor.h
similarity index 99%
rename from src/TNL/ParallelFor.h
rename to src/TNL/Algorithms/ParallelFor.h
index cc9ce7080..20e87f222 100644
--- a/src/TNL/ParallelFor.h
+++ b/src/TNL/Algorithms/ParallelFor.h
@@ -29,9 +29,10 @@
  */
 
 namespace TNL {
+namespace Algorithms {
 
 enum ParallelForMode { SynchronousMode, AsynchronousMode };
-   
+
 template< typename Device = Devices::Host,
           ParallelForMode Mode = SynchronousMode >
 struct ParallelFor
@@ -385,4 +386,5 @@ struct ParallelFor3D< Devices::Cuda, Mode >
    }
 };
 
+} // namespace Algorithms
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/Reduction.h b/src/TNL/Algorithms/Reduction.h
similarity index 98%
rename from src/TNL/Containers/Algorithms/Reduction.h
rename to src/TNL/Algorithms/Reduction.h
index 83cedb01f..e77fa1206 100644
--- a/src/TNL/Containers/Algorithms/Reduction.h
+++ b/src/TNL/Algorithms/Reduction.h
@@ -19,7 +19,6 @@
 #include <TNL/Devices/Cuda.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 /**
@@ -236,7 +235,6 @@ struct Reduction< Devices::Cuda >
 };
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
 
-#include <TNL/Containers/Algorithms/Reduction.hpp>
+#include <TNL/Algorithms/Reduction.hpp>
diff --git a/src/TNL/Containers/Algorithms/Reduction.hpp b/src/TNL/Algorithms/Reduction.hpp
similarity index 98%
rename from src/TNL/Containers/Algorithms/Reduction.hpp
rename to src/TNL/Algorithms/Reduction.hpp
index 19ed3e6af..9fd56576e 100644
--- a/src/TNL/Containers/Algorithms/Reduction.hpp
+++ b/src/TNL/Algorithms/Reduction.hpp
@@ -16,9 +16,9 @@
 
 //#define CUDA_REDUCTION_PROFILING
 
-#include <TNL/Containers/Algorithms/Reduction.h>
-#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
-#include <TNL/Containers/Algorithms/CudaReductionKernel.h>
+#include <TNL/Algorithms/Reduction.h>
+#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
+#include <TNL/Algorithms/CudaReductionKernel.h>
 
 #ifdef CUDA_REDUCTION_PROFILING
 #include <iostream>
@@ -26,7 +26,6 @@
 #endif
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 /****
@@ -453,5 +452,4 @@ reduceWithArgument( const Index size,
 }
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/Scan.h b/src/TNL/Algorithms/Scan.h
similarity index 97%
rename from src/TNL/Containers/Algorithms/Scan.h
rename to src/TNL/Algorithms/Scan.h
index 5587c6273..2f2275c53 100644
--- a/src/TNL/Containers/Algorithms/Scan.h
+++ b/src/TNL/Algorithms/Scan.h
@@ -16,13 +16,12 @@
 #include <TNL/Devices/Cuda.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 /**
  * \brief Scan (or prefix sum) type - inclusive or exclusive.
- * 
- * See \ref TNL::Containers::Algorithms::Scan
+ *
+ * See \ref TNL::Algorithms::Scan.
  */
 enum class ScanType {
    Exclusive,
@@ -31,22 +30,22 @@ enum class ScanType {
 
 /**
  * \brief Computes scan (or prefix sum) on a vector.
- * 
- * [Scan (or prefix sum)](https://en.wikipedia.org/wiki/Prefix_sum) operation turns a sequence 
+ *
+ * [Scan (or prefix sum)](https://en.wikipedia.org/wiki/Prefix_sum) operation turns a sequence
  * \f$a_1, \ldots, a_n\f$ into a sequence \f$s_1, \ldots, s_n\f$ defined as
- * 
+ *
  * \f[
  * s_i = \sum_{j=1}^i a_i.
  * \f]
  * Exclusive scan (or prefix sum) is defined as
- * 
+ *
  * \f[
  * \sigma_i = \sum_{j=1}^{i-1} a_i.
  * \f]
- * 
+ *
  * \tparam Device parameter says on what device the reduction is gonna be performed.
  * \tparam Type parameter says if inclusive or exclusive is scan is to be computed.
- * 
+ *
  * See \ref Scan< Devices::Host, Type > and \ref Scan< Devices::Cuda, Type >.
  */
 template< typename Device,
@@ -55,41 +54,41 @@ struct Scan;
 
 /**
  * \brief Computes segmented scan (or prefix sum) on a vector.
- * 
+ *
  * Segmented scan is a modification of common scan. In this case the sequence of
  * numbers in hand is divided into segments like this, for example
- * 
+ *
  * ```
  * [1,3,5][2,4,6,9][3,5],[3,6,9,12,15]
  * ```
- * 
+ *
  * and we want to compute inclusive or exclusive scan of each segment. For inclusive segmented prefix sum we get
- * 
+ *
  * ```
  * [1,4,9][2,6,12,21][3,8][3,9,18,30,45]
  * ```
- * 
+ *
  * and for exclusive segmented prefix sum it is
- * 
+ *
  * ```
  * [0,1,4][0,2,6,12][0,3][0,3,9,18,30]
  * ```
- * 
+ *
  * In addition to common scan, we need to encode the segments of the input sequence.
  * It is done by auxiliary flags array (it can be array of booleans) having `1` at the
  * beginning of each segment and `0` on all other positions. In our example, it would be like this:
- * 
+ *
  * ```
  * [1,0,0,1,0,0,0,1,0,1,0,0, 0, 0]
  * [1,3,5,2,4,6,9,3,5,3,6,9,12,15]
- * 
+ *
  * ```
- * 
+ *
  * \tparam Device parameter says on what device the reduction is gonna be performed.
  * \tparam Type parameter says if inclusive or exclusive is scan is to be computed.
- * 
+ *
  * See \ref Scan< Devices::Host, Type > and \ref Scan< Devices::Cuda, Type >.
- * 
+ *
  * **Note: Segmented scan is not implemented for CUDA yet.**
  */
 template< typename Device,
@@ -102,10 +101,10 @@ struct Scan< Devices::Host, Type >
 {
    /**
     * \brief Computes scan (prefix sum) on CPU.
-    * 
+    *
     * \tparam Vector type vector being used for the scan.
     * \tparam Reduction lambda function defining the reduction operation
-    * 
+    *
     * \param v input vector, the result of scan is stored in the same vector
     * \param begin the first element in the array to be scanned
     * \param end the last element in the array to be scanned
@@ -162,10 +161,10 @@ struct Scan< Devices::Cuda, Type >
 {
    /**
     * \brief Computes scan (prefix sum) on GPU.
-    * 
+    *
     * \tparam Vector type vector being used for the scan.
     * \tparam Reduction lambda function defining the reduction operation
-    * 
+    *
     * \param v input vector, the result of scan is stored in the same vector
     * \param begin the first element in the array to be scanned
     * \param end the last element in the array to be scanned
@@ -222,11 +221,11 @@ struct SegmentedScan< Devices::Host, Type >
 {
    /**
     * \brief Computes segmented scan (prefix sum) on CPU.
-    * 
+    *
     * \tparam Vector type vector being used for the scan.
     * \tparam Reduction lambda function defining the reduction operation
     * \tparam Flags array type containing zeros and ones defining the segments begining
-    * 
+    *
     * \param v input vector, the result of scan is stored in the same vector
     * \param flags is an array with zeros and ones defining the segments begining
     * \param begin the first element in the array to be scanned
@@ -266,11 +265,11 @@ struct SegmentedScan< Devices::Cuda, Type >
 {
    /**
     * \brief Computes segmented scan (prefix sum) on GPU.
-    * 
+    *
     * \tparam Vector type vector being used for the scan.
     * \tparam Reduction lambda function defining the reduction operation
     * \tparam Flags array type containing zeros and ones defining the segments begining
-    * 
+    *
     * \param v input vector, the result of scan is stored in the same vector
     * \param flags is an array with zeros and ones defining the segments begining
     * \param begin the first element in the array to be scanned
@@ -292,7 +291,7 @@ struct SegmentedScan< Devices::Cuda, Type >
     * \par Output
     *
     * \include SegmentedScanExample.out
-    * 
+    *
     * **Note: Segmented scan is not implemented for CUDA yet.**
     */
    template< typename Vector,
@@ -308,7 +307,6 @@ struct SegmentedScan< Devices::Cuda, Type >
 };
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
 
-#include <TNL/Containers/Algorithms/Scan.hpp>
+#include <TNL/Algorithms/Scan.hpp>
diff --git a/src/TNL/Containers/Algorithms/Scan.hpp b/src/TNL/Algorithms/Scan.hpp
similarity index 97%
rename from src/TNL/Containers/Algorithms/Scan.hpp
rename to src/TNL/Algorithms/Scan.hpp
index d7d2b1811..bb2288c6a 100644
--- a/src/TNL/Containers/Algorithms/Scan.hpp
+++ b/src/TNL/Algorithms/Scan.hpp
@@ -17,12 +17,11 @@
 #include <TNL/Assert.h>
 #include <TNL/Containers/Array.h>
 #include <TNL/Containers/StaticArray.h>
-#include <TNL/Containers/Algorithms/CudaScanKernel.h>
+#include <TNL/Algorithms/CudaScanKernel.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 #include <TNL/Exceptions/NotImplementedError.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 template< ScanType Type >
@@ -61,7 +60,7 @@ performFirstPhase( Vector& v,
 
 #ifdef HAVE_OPENMP
    const int threads = Devices::Host::getMaxThreadsCount();
-   Array< RealType, Devices::Host > block_sums( threads + 1 );
+   Containers::Array< RealType, Devices::Host > block_sums( threads + 1 );
    block_sums[ 0 ] = zero;
 
    #pragma omp parallel num_threads(threads)
@@ -100,8 +99,8 @@ performFirstPhase( Vector& v,
    return block_sums;
 #else
    // FIXME: StaticArray does not have getElement() which is used in DistributedScan
-//   return StaticArray< 1, RealType > block_sums;
-   Array< RealType, Devices::Host > block_sums( 1 );
+//   return Containers::StaticArray< 1, RealType > block_sums;
+   Containers::Array< RealType, Devices::Host > block_sums( 1 );
    block_sums[ 0 ] = zero;
 
    if( Type == ScanType::Inclusive ) {
@@ -303,5 +302,4 @@ perform( Vector& v,
 }
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/StaticFor.h b/src/TNL/Algorithms/StaticFor.h
similarity index 97%
rename from src/TNL/StaticFor.h
rename to src/TNL/Algorithms/StaticFor.h
index c37763aaa..c74045458 100644
--- a/src/TNL/StaticFor.h
+++ b/src/TNL/Algorithms/StaticFor.h
@@ -13,6 +13,7 @@
 #include <TNL/Cuda/CudaCallable.h>
 
 namespace TNL {
+namespace Algorithms {
 
 // Manual unrolling does not make sense for loops with a large iterations
 // count. For a very large iterations count it would trigger the compiler's
@@ -57,4 +58,5 @@ struct StaticFor< Begin, End, false >
    }
 };
 
+} // namespace Algorithms
 } // namespace TNL
diff --git a/src/TNL/StaticVectorFor.h b/src/TNL/Algorithms/StaticVectorFor.h
similarity index 97%
rename from src/TNL/StaticVectorFor.h
rename to src/TNL/Algorithms/StaticVectorFor.h
index 59af0fcb8..664f97aed 100644
--- a/src/TNL/StaticVectorFor.h
+++ b/src/TNL/Algorithms/StaticVectorFor.h
@@ -13,6 +13,7 @@
 #include <TNL/Containers/StaticVector.h>
 
 namespace TNL {
+namespace Algorithms {
 
 struct StaticVectorFor
 {
@@ -48,4 +49,5 @@ struct StaticVectorFor
    }
 };
 
+} // namespace Algorithms
 } // namespace TNL
diff --git a/src/TNL/TemplateStaticFor.h b/src/TNL/Algorithms/TemplateStaticFor.h
similarity index 98%
rename from src/TNL/TemplateStaticFor.h
rename to src/TNL/Algorithms/TemplateStaticFor.h
index efd9d1ad9..753ad9b26 100644
--- a/src/TNL/TemplateStaticFor.h
+++ b/src/TNL/Algorithms/TemplateStaticFor.h
@@ -16,6 +16,7 @@
 #include <TNL/Cuda/CudaCallable.h>
 
 namespace TNL {
+namespace Algorithms {
 namespace detail {
 
 template< typename IndexType,
@@ -89,4 +90,5 @@ struct TemplateStaticFor
    }
 };
 
+} // namespace Algorithms
 } // namespace TNL
diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp
index 7688ca194..ce36b27e9 100644
--- a/src/TNL/Containers/Array.hpp
+++ b/src/TNL/Containers/Array.hpp
@@ -16,10 +16,8 @@
 #include <TNL/Assert.h>
 #include <TNL/Math.h>
 #include <TNL/TypeInfo.h>
-#include <TNL/Containers/Algorithms/MemoryOperations.h>
-#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
-#include <TNL/Containers/Algorithms/ArrayIO.h>
-#include <TNL/Containers/Algorithms/ArrayAssignment.h>
+#include <TNL/Containers/detail/ArrayIO.h>
+#include <TNL/Containers/detail/ArrayAssignment.h>
 
 #include "Array.h"
 
@@ -186,7 +184,7 @@ String
 Array< Value, Device, Index, Allocator >::
 getSerializationType()
 {
-   return Algorithms::ArrayIO< Value, Device, Index >::getSerializationType();
+   return detail::ArrayIO< Value, Device, Index >::getSerializationType();
 }
 
 template< typename Value,
@@ -581,8 +579,8 @@ Array< Value, Device, Index, Allocator >&
 Array< Value, Device, Index, Allocator >::
 operator=( const T& data )
 {
-   Algorithms::ArrayAssignment< Array, T >::resize( *this, data );
-   Algorithms::ArrayAssignment< Array, T >::assign( *this, data );
+   detail::ArrayAssignment< Array, T >::resize( *this, data );
+   detail::ArrayAssignment< Array, T >::assign( *this, data );
    return *this;
 }
 
@@ -761,7 +759,7 @@ std::ostream& operator<<( std::ostream& str, const Array< Value, Device, Index,
 template< typename Value, typename Device, typename Index, typename Allocator >
 File& operator<<( File& file, const Array< Value, Device, Index, Allocator >& array )
 {
-   using IO = Algorithms::ArrayIO< Value, Device, Index >;
+   using IO = detail::ArrayIO< Value, Device, Index >;
    saveObjectType( file, IO::getSerializationType() );
    const Index size = array.getSize();
    file.save( &size );
@@ -780,7 +778,7 @@ File& operator<<( File&& file, const Array< Value, Device, Index, Allocator >& a
 template< typename Value, typename Device, typename Index, typename Allocator >
 File& operator>>( File& file, Array< Value, Device, Index, Allocator >& array )
 {
-   using IO = Algorithms::ArrayIO< Value, Device, Index >;
+   using IO = detail::ArrayIO< Value, Device, Index >;
    const String type = getObjectType( file );
    if( type != IO::getSerializationType() )
       throw Exceptions::FileDeserializationError( file.getFileName(), "object type does not match (expected " + IO::getSerializationType() + ", found " + type + ")." );
diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp
index 010a40c3a..b6915e4e3 100644
--- a/src/TNL/Containers/ArrayView.hpp
+++ b/src/TNL/Containers/ArrayView.hpp
@@ -14,11 +14,11 @@
 #include <stdexcept>
 
 #include <TNL/TypeInfo.h>
-#include <TNL/ParallelFor.h>
-#include <TNL/Containers/Algorithms/MemoryOperations.h>
-#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
-#include <TNL/Containers/Algorithms/ArrayIO.h>
-#include <TNL/Containers/Algorithms/ArrayAssignment.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Algorithms/MemoryOperations.h>
+#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
+#include <TNL/Containers/detail/ArrayIO.h>
+#include <TNL/Containers/detail/ArrayAssignment.h>
 
 #include "ArrayView.h"
 
@@ -113,7 +113,7 @@ ArrayView< Value, Device, Index >&
 ArrayView< Value, Device, Index >::
 operator=( const T& data )
 {
-   Algorithms::ArrayAssignment< ArrayView, T >::assign( *this, data );
+   detail::ArrayAssignment< ArrayView, T >::assign( *this, data );
    return *this;
 }
 
@@ -316,7 +316,7 @@ evaluate( const Function& f, const Index begin, Index end )
    if( end == 0 )
       end = this->getSize();
 
-   ParallelFor< DeviceType >::exec( begin, end, eval );
+   Algorithms::ParallelFor< DeviceType >::exec( begin, end, eval );
 }
 
 template< typename Value,
@@ -383,7 +383,7 @@ load( const String& fileName )
 template< typename Value, typename Device, typename Index >
 File& operator<<( File& file, const ArrayView< Value, Device, Index > view )
 {
-   using IO = Algorithms::ArrayIO< Value, Device, Index >;
+   using IO = detail::ArrayIO< Value, Device, Index >;
    saveObjectType( file, IO::getSerializationType() );
    const Index size = view.getSize();
    file.save( &size );
@@ -402,7 +402,7 @@ File& operator<<( File&& file, const ArrayView< Value, Device, Index > view )
 template< typename Value, typename Device, typename Index >
 File& operator>>( File& file, ArrayView< Value, Device, Index > view )
 {
-   using IO = Algorithms::ArrayIO< Value, Device, Index >;
+   using IO = detail::ArrayIO< Value, Device, Index >;
    const String type = getObjectType( file );
    if( type != IO::getSerializationType() )
       throw Exceptions::FileDeserializationError( file.getFileName(), "object type does not match (expected " + IO::getSerializationType() + ", found " + type + ")." );
diff --git a/src/TNL/Containers/DistributedArray.hpp b/src/TNL/Containers/DistributedArray.hpp
index c0f7522ab..c146bbf9f 100644
--- a/src/TNL/Containers/DistributedArray.hpp
+++ b/src/TNL/Containers/DistributedArray.hpp
@@ -14,7 +14,7 @@
 
 #include "DistributedArray.h"
 
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Communicators/MpiDefs.h>  // important only when MPI is disabled
 
 namespace TNL {
@@ -110,7 +110,7 @@ copyFromGlobal( ConstLocalViewType globalArray )
       localView[ i ] = globalArray[ localRange.getGlobalIndex( i ) ];
    };
 
-   ParallelFor< DeviceType >::exec( (IndexType) 0, localRange.getSize(), kernel );
+   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, localRange.getSize(), kernel );
 }
 
 
diff --git a/src/TNL/Containers/DistributedArrayView.hpp b/src/TNL/Containers/DistributedArrayView.hpp
index d92ae927d..0199229d4 100644
--- a/src/TNL/Containers/DistributedArrayView.hpp
+++ b/src/TNL/Containers/DistributedArrayView.hpp
@@ -180,7 +180,7 @@ copyFromGlobal( ConstLocalViewType globalArray )
       localView[ i ] = globalArray[ localRange.getGlobalIndex( i ) ];
    };
 
-   ParallelFor< DeviceType >::exec( (IndexType) 0, localRange.getSize(), kernel );
+   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, localRange.getSize(), kernel );
 }
 
 
diff --git a/src/TNL/Containers/DistributedNDArray.h b/src/TNL/Containers/DistributedNDArray.h
index 4b123d114..57b94a34b 100644
--- a/src/TNL/Containers/DistributedNDArray.h
+++ b/src/TNL/Containers/DistributedNDArray.h
@@ -392,7 +392,7 @@ public:
    void allocate()
    {
       SizesHolderType localSizes;
-      TemplateStaticFor< std::size_t, 0, SizesHolderType::getDimension(), LocalSizesSetter >::execHost( localSizes, globalSizes, localBegins, localEnds );
+      Algorithms::TemplateStaticFor< std::size_t, 0, SizesHolderType::getDimension(), LocalSizesSetter >::execHost( localSizes, globalSizes, localBegins, localEnds );
       localArray.setSize( localSizes );
    }
 
diff --git a/src/TNL/Containers/DistributedNDArraySynchronizer.h b/src/TNL/Containers/DistributedNDArraySynchronizer.h
index e6e41ba33..698530378 100644
--- a/src/TNL/Containers/DistributedNDArraySynchronizer.h
+++ b/src/TNL/Containers/DistributedNDArraySynchronizer.h
@@ -51,7 +51,7 @@ public:
          array_view.bind( array.getView() );
 
          // allocate buffers
-         TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), AllocateHelper >::execHost( buffers, array_view );
+         Algorithms::TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), AllocateHelper >::execHost( buffers, array_view );
       }
       else {
          // only bind to the actual data
@@ -80,18 +80,18 @@ protected:
       #endif
 
       // fill send buffers
-      TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), CopyHelper >::execHost( buffers, array_view, true );
+      Algorithms::TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), CopyHelper >::execHost( buffers, array_view, true );
 
       // issue all send and receive async operations
       std::vector< typename Communicator::Request > requests;
       const typename Communicator::CommunicationGroup group = array_view.getCommunicationGroup();
-      TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), SendHelper >::execHost( buffers, requests, group );
+      Algorithms::TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), SendHelper >::execHost( buffers, requests, group );
 
       // wait until send is done
       Communicator::WaitAll( requests.data(), requests.size() );
 
       // copy data from receive buffers
-      TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), CopyHelper >::execHost( buffers, array_view, false );
+      Algorithms::TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), CopyHelper >::execHost( buffers, array_view, false );
    }
 
    template< std::size_t dim >
diff --git a/src/TNL/Containers/DistributedVector.hpp b/src/TNL/Containers/DistributedVector.hpp
index 0820dd218..dbe9760f6 100644
--- a/src/TNL/Containers/DistributedVector.hpp
+++ b/src/TNL/Containers/DistributedVector.hpp
@@ -13,7 +13,7 @@
 #pragma once
 
 #include "DistributedVector.h"
-#include <TNL/Containers/Algorithms/DistributedScan.h>
+#include <TNL/Algorithms/DistributedScan.h>
 
 namespace TNL {
 namespace Containers {
diff --git a/src/TNL/Containers/DistributedVectorView.hpp b/src/TNL/Containers/DistributedVectorView.hpp
index 9bc0045a5..5669a52b6 100644
--- a/src/TNL/Containers/DistributedVectorView.hpp
+++ b/src/TNL/Containers/DistributedVectorView.hpp
@@ -13,7 +13,7 @@
 #pragma once
 
 #include "DistributedVectorView.h"
-#include <TNL/Containers/Algorithms/DistributedScan.h>
+#include <TNL/Algorithms/DistributedScan.h>
 
 namespace TNL {
 namespace Containers {
diff --git a/src/TNL/Containers/Expressions/Comparison.h b/src/TNL/Containers/Expressions/Comparison.h
index d487948a9..98e39ad8c 100644
--- a/src/TNL/Containers/Expressions/Comparison.h
+++ b/src/TNL/Containers/Expressions/Comparison.h
@@ -14,8 +14,8 @@
 
 #include <TNL/Assert.h>
 #include <TNL/Containers/Expressions/ExpressionVariableType.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
-#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
+#include <TNL/Algorithms/Reduction.h>
+#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
 
 namespace TNL {
 namespace Containers {
diff --git a/src/TNL/Containers/Expressions/DistributedComparison.h b/src/TNL/Containers/Expressions/DistributedComparison.h
index 7a7d5c5be..b5e0e96a9 100644
--- a/src/TNL/Containers/Expressions/DistributedComparison.h
+++ b/src/TNL/Containers/Expressions/DistributedComparison.h
@@ -11,7 +11,6 @@
 #pragma once
 
 #include <TNL/Containers/Expressions/ExpressionVariableType.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
 #include <TNL/Communicators/MpiDefs.h>
 
 namespace TNL {
diff --git a/src/TNL/Containers/Expressions/DistributedExpressionTemplates.h b/src/TNL/Containers/Expressions/DistributedExpressionTemplates.h
index fe8997aac..355689039 100644
--- a/src/TNL/Containers/Expressions/DistributedExpressionTemplates.h
+++ b/src/TNL/Containers/Expressions/DistributedExpressionTemplates.h
@@ -2207,7 +2207,7 @@ Result evaluateAndReduce( Vector& lhs,
 
    RealType* lhs_data = lhs.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return ( lhs_data[ i ] = expression[ i ] ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 template< typename Vector,
@@ -2226,7 +2226,7 @@ Result evaluateAndReduce( Vector& lhs,
 
    RealType* lhs_data = lhs.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return ( lhs_data[ i ] = expression[ i ] ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 ////
@@ -2252,7 +2252,7 @@ Result addAndReduce( Vector& lhs,
       lhs_data[ i ] += aux;
       return aux;
    };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 template< typename Vector,
@@ -2275,7 +2275,7 @@ Result addAndReduce( Vector& lhs,
       lhs_data[ i ] += aux;
       return aux;
    };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 ////
@@ -2301,7 +2301,7 @@ Result addAndReduceAbs( Vector& lhs,
       lhs_data[ i ] += aux;
       return TNL::abs( aux );
    };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 template< typename Vector,
@@ -2324,7 +2324,7 @@ Result addAndReduceAbs( Vector& lhs,
       lhs_data[ i ] += aux;
       return TNL::abs( aux );
    };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 } // namespace TNL
diff --git a/src/TNL/Containers/Expressions/ExpressionTemplates.h b/src/TNL/Containers/Expressions/ExpressionTemplates.h
index 763bdbfd1..a0980baf6 100644
--- a/src/TNL/Containers/Expressions/ExpressionTemplates.h
+++ b/src/TNL/Containers/Expressions/ExpressionTemplates.h
@@ -2130,7 +2130,7 @@ Result evaluateAndReduce( Vector& lhs,
 
    RealType* lhs_data = lhs.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return ( lhs_data[ i ] = expression[ i ] ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 template< typename Vector,
@@ -2149,7 +2149,7 @@ Result evaluateAndReduce( Vector& lhs,
 
    RealType* lhs_data = lhs.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return ( lhs_data[ i ] = expression[ i ] ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 ////
@@ -2175,7 +2175,7 @@ Result addAndReduce( Vector& lhs,
       lhs_data[ i ] += aux;
       return aux;
    };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 template< typename Vector,
@@ -2198,7 +2198,7 @@ Result addAndReduce( Vector& lhs,
       lhs_data[ i ] += aux;
       return aux;
    };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 ////
@@ -2224,7 +2224,7 @@ Result addAndReduceAbs( Vector& lhs,
       lhs_data[ i ] += aux;
       return TNL::abs( aux );
    };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 template< typename Vector,
@@ -2247,7 +2247,7 @@ Result addAndReduceAbs( Vector& lhs,
       lhs_data[ i ] += aux;
       return TNL::abs( aux );
    };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 } // namespace TNL
diff --git a/src/TNL/Containers/Expressions/VerticalOperations.h b/src/TNL/Containers/Expressions/VerticalOperations.h
index 29e904bbf..84d362e8a 100644
--- a/src/TNL/Containers/Expressions/VerticalOperations.h
+++ b/src/TNL/Containers/Expressions/VerticalOperations.h
@@ -13,7 +13,7 @@
 #include <limits>
 #include <type_traits>
 
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 
 ////
 // By vertical operations we mean those applied across vector elements or
diff --git a/src/TNL/Containers/Multimaps/MultimapPermutationApplier.h b/src/TNL/Containers/Multimaps/MultimapPermutationApplier.h
index 2acd3c5d4..953339305 100644
--- a/src/TNL/Containers/Multimaps/MultimapPermutationApplier.h
+++ b/src/TNL/Containers/Multimaps/MultimapPermutationApplier.h
@@ -11,7 +11,7 @@
 #pragma once
 
 #include <TNL/Pointers/DevicePointer.h>
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 namespace TNL {
 namespace Containers {
@@ -48,11 +48,11 @@ void permuteMultimapKeys( Multimap& multimap, const PermutationVector& perm )
    Pointers::DevicePointer< Multimap > multimapPointer( multimap );
    Pointers::DevicePointer< Multimap > multimapCopyPointer( multimapCopy );
 
-   ParallelFor< DeviceType >::exec( (IndexType) 0, multimap.getKeysRange(),
-                                    kernel,
-                                    &multimapPointer.template getData< DeviceType >(),
-                                    &multimapCopyPointer.template modifyData< DeviceType >(),
-                                    perm.getData() );
+   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, multimap.getKeysRange(),
+                                                kernel,
+                                                &multimapPointer.template getData< DeviceType >(),
+                                                &multimapCopyPointer.template modifyData< DeviceType >(),
+                                                perm.getData() );
 
    // copy the permuted data back into the multimap
    multimap = multimapCopy;
@@ -79,10 +79,10 @@ void permuteMultimapValues( Multimap& multimap, const PermutationVector& iperm )
    };
 
    Pointers::DevicePointer< Multimap > multimapPointer( multimap );
-   ParallelFor< DeviceType >::exec( (IndexType) 0, multimap.getKeysRange(),
-                                    kernel,
-                                    &multimapPointer.template modifyData< DeviceType >(),
-                                    iperm.getData() );
+   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, multimap.getKeysRange(),
+                                                kernel,
+                                                &multimapPointer.template modifyData< DeviceType >(),
+                                                iperm.getData() );
 }
 
 } // namespace Multimaps
diff --git a/src/TNL/Containers/NDArrayView.h b/src/TNL/Containers/NDArrayView.h
index 0dc2d9d7e..d5d94d61e 100644
--- a/src/TNL/Containers/NDArrayView.h
+++ b/src/TNL/Containers/NDArrayView.h
@@ -18,8 +18,8 @@
 #include <TNL/Containers/ndarray/Executors.h>
 #include <TNL/Containers/ndarray/BoundaryExecutors.h>
 #include <TNL/Containers/ndarray/Operations.h>
-#include <TNL/Containers/Algorithms/MemoryOperations.h>
-#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
+#include <TNL/Algorithms/MemoryOperations.h>
+#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
 
 namespace TNL {
 namespace Containers {
diff --git a/src/TNL/Containers/StaticArray.hpp b/src/TNL/Containers/StaticArray.hpp
index 9567590c0..85171e3d7 100644
--- a/src/TNL/Containers/StaticArray.hpp
+++ b/src/TNL/Containers/StaticArray.hpp
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          StaticArray_impl.h  -  description
+                          StaticArray.hpp  -  description
                              -------------------
     begin                : Feb 10, 2014
     copyright            : (C) 2014 by Tomas Oberhuber
@@ -13,8 +13,8 @@
 #include <TNL/TypeInfo.h>
 #include <TNL/Math.h>
 #include <TNL/Containers/StaticArray.h>
-#include <TNL/Containers/Algorithms/StaticArrayAssignment.h>
-#include <TNL/StaticFor.h>
+#include <TNL/Containers/detail/StaticArrayAssignment.h>
+#include <TNL/Algorithms/StaticFor.h>
 
 namespace TNL {
 namespace Containers {
@@ -102,21 +102,21 @@ template< int Size, typename Value >
 __cuda_callable__
 StaticArray< Size, Value >::StaticArray( const Value v[ Size ] )
 {
-   StaticFor< 0, Size >::exec( Algorithms::detail::AssignArrayFunctor{}, data, v );
+   Algorithms::StaticFor< 0, Size >::exec( detail::AssignArrayFunctor{}, data, v );
 }
 
 template< int Size, typename Value >
 __cuda_callable__
 StaticArray< Size, Value >::StaticArray( const Value& v )
 {
-   StaticFor< 0, Size >::exec( Algorithms::detail::AssignValueFunctor{}, data, v );
+   Algorithms::StaticFor< 0, Size >::exec( detail::AssignValueFunctor{}, data, v );
 }
 
 template< int Size, typename Value >
 __cuda_callable__
 StaticArray< Size, Value >::StaticArray( const StaticArray< Size, Value >& v )
 {
-   StaticFor< 0, Size >::exec( Algorithms::detail::AssignArrayFunctor{}, data, v.getData() );
+   Algorithms::StaticFor< 0, Size >::exec( detail::AssignArrayFunctor{}, data, v.getData() );
 }
 
 template< int Size, typename Value >
@@ -227,7 +227,7 @@ template< int Size, typename Value >
 __cuda_callable__
 StaticArray< Size, Value >& StaticArray< Size, Value >::operator=( const StaticArray< Size, Value >& array )
 {
-   StaticFor< 0, Size >::exec( Algorithms::detail::AssignArrayFunctor{}, data, array.getData() );
+   Algorithms::StaticFor< 0, Size >::exec( detail::AssignArrayFunctor{}, data, array.getData() );
    return *this;
 }
 
@@ -236,7 +236,7 @@ template< int Size, typename Value >
 __cuda_callable__
 StaticArray< Size, Value >& StaticArray< Size, Value >::operator=( const T& v )
 {
-   Algorithms::StaticArrayAssignment< StaticArray, T >::assign( *this, v );
+   detail::StaticArrayAssignment< StaticArray, T >::assign( *this, v );
    return *this;
 }
 
@@ -263,7 +263,7 @@ StaticArray< Size, Value >::
 operator StaticArray< Size, OtherValue >() const
 {
    StaticArray< Size, OtherValue > aux;
-   StaticFor< 0, Size >::exec( Algorithms::detail::AssignArrayFunctor{}, aux.getData(), data );
+   Algorithms::StaticFor< 0, Size >::exec( detail::AssignArrayFunctor{}, aux.getData(), data );
    return aux;
 }
 
@@ -271,7 +271,7 @@ template< int Size, typename Value >
 __cuda_callable__
 void StaticArray< Size, Value >::setValue( const ValueType& val )
 {
-   StaticFor< 0, Size >::exec( Algorithms::detail::AssignValueFunctor{}, data, val );
+   Algorithms::StaticFor< 0, Size >::exec( detail::AssignValueFunctor{}, data, val );
 }
 
 template< int Size, typename Value >
diff --git a/src/TNL/Containers/StaticVector.hpp b/src/TNL/Containers/StaticVector.hpp
index 99088626d..b512a51a0 100644
--- a/src/TNL/Containers/StaticVector.hpp
+++ b/src/TNL/Containers/StaticVector.hpp
@@ -11,7 +11,7 @@
 #pragma once
 
 #include <TNL/Containers/StaticVector.h>
-#include <TNL/Containers/Algorithms/VectorAssignment.h>
+#include <TNL/Containers/detail/VectorAssignment.h>
 
 namespace TNL {
 namespace Containers {
@@ -22,7 +22,7 @@ template< int Size, typename Real >
              template< typename, typename > class Operation >
 StaticVector< Size, Real >::StaticVector( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& expr )
 {
-   Algorithms::VectorAssignment< StaticVector< Size, Real >, Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation > >::assignStatic( *this, expr );
+   detail::VectorAssignment< StaticVector< Size, Real >, Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation > >::assignStatic( *this, expr );
 }
 
 template< int Size,
@@ -32,7 +32,7 @@ template< int Size,
 __cuda_callable__
 StaticVector< Size, Real >::StaticVector( const Expressions::StaticUnaryExpressionTemplate< T, Operation >& expr )
 {
-   Algorithms::VectorAssignment< StaticVector< Size, Real >, Expressions::StaticUnaryExpressionTemplate< T, Operation > >::assignStatic( *this, expr );
+   detail::VectorAssignment< StaticVector< Size, Real >, Expressions::StaticUnaryExpressionTemplate< T, Operation > >::assignStatic( *this, expr );
 }
 
 template< int Size, typename Real >
@@ -55,7 +55,7 @@ template< int Size, typename Real >
 StaticVector< Size, Real >&
 StaticVector< Size, Real >::operator=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignment< StaticVector< Size, Real >, VectorExpression >::assignStatic( *this, expression );
+   detail::VectorAssignment< StaticVector< Size, Real >, VectorExpression >::assignStatic( *this, expression );
    return *this;
 }
 
@@ -64,7 +64,7 @@ template< int Size, typename Real >
 __cuda_callable__
 StaticVector< Size, Real >& StaticVector< Size, Real >::operator+=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< StaticVector, VectorExpression >::additionStatic( *this, expression );
+   detail::VectorAssignmentWithOperation< StaticVector, VectorExpression >::additionStatic( *this, expression );
    return *this;
 }
 
@@ -73,7 +73,7 @@ template< int Size, typename Real >
 __cuda_callable__
 StaticVector< Size, Real >& StaticVector< Size, Real >::operator-=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< StaticVector, VectorExpression >::subtractionStatic( *this, expression );
+   detail::VectorAssignmentWithOperation< StaticVector, VectorExpression >::subtractionStatic( *this, expression );
    return *this;
 }
 
@@ -82,7 +82,7 @@ template< int Size, typename Real >
 __cuda_callable__
 StaticVector< Size, Real >& StaticVector< Size, Real >::operator*=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< StaticVector, VectorExpression >::multiplicationStatic( *this, expression );
+   detail::VectorAssignmentWithOperation< StaticVector, VectorExpression >::multiplicationStatic( *this, expression );
    return *this;
 }
 
@@ -91,7 +91,7 @@ template< int Size, typename Real >
 __cuda_callable__
 StaticVector< Size, Real >& StaticVector< Size, Real >::operator/=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< StaticVector, VectorExpression >::divisionStatic( *this, expression );
+   detail::VectorAssignmentWithOperation< StaticVector, VectorExpression >::divisionStatic( *this, expression );
    return *this;
 }
 
@@ -102,7 +102,7 @@ StaticVector< Size, Real >::
 operator StaticVector< Size, OtherReal >() const
 {
    StaticVector< Size, OtherReal > aux;
-   StaticFor< 0, Size >::exec( Algorithms::detail::AssignArrayFunctor{}, aux.getData(), this->getData() );
+   Algorithms::StaticFor< 0, Size >::exec( detail::AssignArrayFunctor{}, aux.getData(), this->getData() );
    return aux;
 }
 
diff --git a/src/TNL/Containers/Vector.hpp b/src/TNL/Containers/Vector.hpp
index e01bdf75e..a5c20d596 100644
--- a/src/TNL/Containers/Vector.hpp
+++ b/src/TNL/Containers/Vector.hpp
@@ -82,8 +82,8 @@ Vector< Real, Device, Index, Allocator >&
 Vector< Real, Device, Index, Allocator >::
 operator=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignment< Vector, VectorExpression >::resize( *this, expression );
-   Algorithms::VectorAssignment< Vector, VectorExpression >::assign( *this, expression );
+   detail::VectorAssignment< Vector, VectorExpression >::resize( *this, expression );
+   detail::VectorAssignment< Vector, VectorExpression >::assign( *this, expression );
    return *this;
 }
 
@@ -96,7 +96,7 @@ Vector< Real, Device, Index, Allocator >&
 Vector< Real, Device, Index, Allocator >::
 operator+=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< Vector, VectorExpression >::addition( *this, expression );
+   detail::VectorAssignmentWithOperation< Vector, VectorExpression >::addition( *this, expression );
    return *this;
 }
 
@@ -109,7 +109,7 @@ Vector< Real, Device, Index, Allocator >&
 Vector< Real, Device, Index, Allocator >::
 operator-=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< Vector, VectorExpression >::subtraction( *this, expression );
+   detail::VectorAssignmentWithOperation< Vector, VectorExpression >::subtraction( *this, expression );
    return *this;
 }
 
@@ -122,7 +122,7 @@ Vector< Real, Device, Index, Allocator >&
 Vector< Real, Device, Index, Allocator >::
 operator*=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< Vector, VectorExpression >::multiplication( *this, expression );
+   detail::VectorAssignmentWithOperation< Vector, VectorExpression >::multiplication( *this, expression );
    return *this;
 }
 
@@ -135,7 +135,7 @@ Vector< Real, Device, Index, Allocator >&
 Vector< Real, Device, Index, Allocator >::
 operator/=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< Vector, VectorExpression >::division( *this, expression );
+   detail::VectorAssignmentWithOperation< Vector, VectorExpression >::division( *this, expression );
    return *this;
 }
 
diff --git a/src/TNL/Containers/VectorView.h b/src/TNL/Containers/VectorView.h
index f111a14be..e99754d4b 100644
--- a/src/TNL/Containers/VectorView.h
+++ b/src/TNL/Containers/VectorView.h
@@ -14,7 +14,7 @@
 
 #include <TNL/Containers/ArrayView.h>
 #include <TNL/Containers/Expressions/ExpressionTemplates.h>
-#include <TNL/Containers/Algorithms/Scan.h>
+#include <TNL/Algorithms/Scan.h>
 
 namespace TNL {
 namespace Containers {
diff --git a/src/TNL/Containers/VectorView.hpp b/src/TNL/Containers/VectorView.hpp
index 40d870f62..490288e6b 100644
--- a/src/TNL/Containers/VectorView.hpp
+++ b/src/TNL/Containers/VectorView.hpp
@@ -11,7 +11,7 @@
 #pragma once
 
 #include <TNL/Containers/VectorView.h>
-#include <TNL/Containers/Algorithms/VectorAssignment.h>
+#include <TNL/Containers/detail/VectorAssignment.h>
 #include <TNL/Exceptions/NotImplementedError.h>
 
 namespace TNL {
@@ -50,7 +50,7 @@ template< typename Real,
 VectorView< Real, Device, Index >&
 VectorView< Real, Device, Index >::operator=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignment< VectorView, VectorExpression >::assign( *this, expression );
+   detail::VectorAssignment< VectorView, VectorExpression >::assign( *this, expression );
    return *this;
 }
 
@@ -62,7 +62,7 @@ VectorView< Real, Device, Index >&
 VectorView< Real, Device, Index >::
 operator+=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< VectorView, VectorExpression >::addition( *this, expression );
+   detail::VectorAssignmentWithOperation< VectorView, VectorExpression >::addition( *this, expression );
    return *this;
 }
 
@@ -74,7 +74,7 @@ VectorView< Real, Device, Index >&
 VectorView< Real, Device, Index >::
 operator-=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< VectorView, VectorExpression >::subtraction( *this, expression );
+   detail::VectorAssignmentWithOperation< VectorView, VectorExpression >::subtraction( *this, expression );
    return *this;
 }
 
@@ -86,7 +86,7 @@ VectorView< Real, Device, Index >&
 VectorView< Real, Device, Index >::
 operator*=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< VectorView, VectorExpression >::multiplication( *this, expression );
+   detail::VectorAssignmentWithOperation< VectorView, VectorExpression >::multiplication( *this, expression );
    return *this;
 }
 
@@ -98,7 +98,7 @@ VectorView< Real, Device, Index >&
 VectorView< Real, Device, Index >::
 operator/=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< VectorView, VectorExpression >::division( *this, expression );
+   detail::VectorAssignmentWithOperation< VectorView, VectorExpression >::division( *this, expression );
    return *this;
 }
 
diff --git a/src/TNL/Containers/Algorithms/ArrayAssignment.h b/src/TNL/Containers/detail/ArrayAssignment.h
similarity index 85%
rename from src/TNL/Containers/Algorithms/ArrayAssignment.h
rename to src/TNL/Containers/detail/ArrayAssignment.h
index 402ebce5f..e6671bb2c 100644
--- a/src/TNL/Containers/Algorithms/ArrayAssignment.h
+++ b/src/TNL/Containers/detail/ArrayAssignment.h
@@ -11,12 +11,12 @@
 #pragma once
 
 #include <TNL/TypeTraits.h>
-#include <TNL/Containers/Algorithms/MemoryOperations.h>
-#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
+#include <TNL/Algorithms/MemoryOperations.h>
+#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
 
 namespace TNL {
 namespace Containers {
-namespace Algorithms {
+namespace detail {
 
 template< typename Array,
           typename T,
@@ -40,7 +40,7 @@ struct ArrayAssignment< Array, T, true >
    {
       TNL_ASSERT_EQ( a.getSize(), t.getSize(), "The sizes of the arrays must be equal." );
       if( t.getSize() > 0 ) // we allow even assignment of empty arrays
-         MultiDeviceMemoryOperations< typename Array::DeviceType, typename T::DeviceType >::template
+         Algorithms::MultiDeviceMemoryOperations< typename Array::DeviceType, typename T::DeviceType >::template
             copy< typename Array::ValueType, typename T::ValueType, typename Array::IndexType >
             ( a.getArrayData(), t.getArrayData(), t.getSize() );
    }
@@ -61,12 +61,12 @@ struct ArrayAssignment< Array, T, false >
    static void assign( Array& a, const T& t )
    {
       TNL_ASSERT_FALSE( a.empty(), "Cannot assign value to empty array." );
-      MemoryOperations< typename Array::DeviceType >::template
+      Algorithms::MemoryOperations< typename Array::DeviceType >::template
          set< typename Array::ValueType, typename Array::IndexType >
          ( a.getArrayData(), ( typename Array::ValueType ) t, a.getSize() );
    }
 };
 
-} // namespace Algorithms
+} // namespace detail
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/ArrayIO.h b/src/TNL/Containers/detail/ArrayIO.h
similarity index 98%
rename from src/TNL/Containers/Algorithms/ArrayIO.h
rename to src/TNL/Containers/detail/ArrayIO.h
index 5ec8b000b..58817a83c 100644
--- a/src/TNL/Containers/Algorithms/ArrayIO.h
+++ b/src/TNL/Containers/detail/ArrayIO.h
@@ -17,7 +17,7 @@
 
 namespace TNL {
 namespace Containers {
-namespace Algorithms {
+namespace detail {
 
 template< typename Value,
           typename Device,
@@ -118,6 +118,6 @@ struct ArrayIO< Value, Device, Index, false >
    }
 };
 
-} // namespace Algorithms
+} // namespace detail
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/StaticArrayAssignment.h b/src/TNL/Containers/detail/StaticArrayAssignment.h
similarity index 63%
rename from src/TNL/Containers/Algorithms/StaticArrayAssignment.h
rename to src/TNL/Containers/detail/StaticArrayAssignment.h
index 32a59e98c..9a8d7d3ee 100644
--- a/src/TNL/Containers/Algorithms/StaticArrayAssignment.h
+++ b/src/TNL/Containers/detail/StaticArrayAssignment.h
@@ -11,33 +11,31 @@
 #pragma once
 
 #include <TNL/TypeTraits.h>
-#include <TNL/StaticFor.h>
+#include <TNL/Algorithms/StaticFor.h>
 
 namespace TNL {
 namespace Containers {
-namespace Algorithms {
+namespace detail {
 
-   namespace detail {
-      struct AssignArrayFunctor
-      {
-         template< typename LeftValue, typename RightValue >
-         __cuda_callable__
-         void operator()( int i, LeftValue& data, const RightValue& v ) const
-         {
-            data[ i ] = v[ i ];
-         }
-      };
+struct AssignArrayFunctor
+{
+   template< typename LeftValue, typename RightValue >
+   __cuda_callable__
+   void operator()( int i, LeftValue& data, const RightValue& v ) const
+   {
+      data[ i ] = v[ i ];
+   }
+};
 
-      struct AssignValueFunctor
-      {
-         template< typename LeftValue, typename RightValue >
-         __cuda_callable__
-         void operator()( int i, LeftValue& data, const RightValue& v ) const
-         {
-            data[ i ] = v;
-         }
-      };
-   } // namespace detail
+struct AssignValueFunctor
+{
+   template< typename LeftValue, typename RightValue >
+   __cuda_callable__
+   void operator()( int i, LeftValue& data, const RightValue& v ) const
+   {
+      data[ i ] = v;
+   }
+};
 
 template< typename StaticArray,
           typename T,
@@ -55,7 +53,7 @@ struct StaticArrayAssignment< StaticArray, T, true >
    static void assign( StaticArray& a, const T& v )
    {
       static_assert( StaticArray::getSize() == T::getSize(), "Cannot assign static arrays with different size." );
-      StaticFor< 0, StaticArray::getSize() >::exec( detail::AssignArrayFunctor{}, a.getData(), v );
+      Algorithms::StaticFor< 0, StaticArray::getSize() >::exec( AssignArrayFunctor{}, a.getData(), v );
    }
 };
 
@@ -70,10 +68,10 @@ struct StaticArrayAssignment< StaticArray, T, false >
    __cuda_callable__
    static void assign( StaticArray& a, const T& v )
    {
-      StaticFor< 0, StaticArray::getSize() >::exec( detail::AssignValueFunctor{}, a, v );
+      Algorithms::StaticFor< 0, StaticArray::getSize() >::exec( AssignValueFunctor{}, a, v );
    }
 };
 
-} // namespace Algorithms
+} // namespace detail
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/VectorAssignment.h b/src/TNL/Containers/detail/VectorAssignment.h
similarity index 91%
rename from src/TNL/Containers/Algorithms/VectorAssignment.h
rename to src/TNL/Containers/detail/VectorAssignment.h
index c861579f4..fa778a248 100644
--- a/src/TNL/Containers/Algorithms/VectorAssignment.h
+++ b/src/TNL/Containers/detail/VectorAssignment.h
@@ -11,11 +11,11 @@
 #pragma once
 
 #include <TNL/TypeTraits.h>
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 namespace TNL {
 namespace Containers {
-namespace Algorithms {
+namespace detail {
 
 /**
  * \brief Vector assignment
@@ -68,7 +68,7 @@ struct VectorAssignment< Vector, T, true >
       {
          data[ i ] = t[ i ];
       };
-      ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), assignment );
+      Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), assignment );
    }
 };
 
@@ -103,7 +103,7 @@ struct VectorAssignment< Vector, T, false >
       {
          data[ i ] = t;
       };
-      ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), assignment );
+      Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), assignment );
    }
 };
 
@@ -169,7 +169,7 @@ struct VectorAssignmentWithOperation< Vector, T, true, false >
       {
          data[ i ] += t[ i ];
       };
-      ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), add );
+      Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), add );
    }
 
    __cuda_callable__
@@ -194,7 +194,7 @@ struct VectorAssignmentWithOperation< Vector, T, true, false >
       {
          data[ i ] -= t[ i ];
       };
-      ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), subtract );
+      Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), subtract );
    }
 
    __cuda_callable__
@@ -219,7 +219,7 @@ struct VectorAssignmentWithOperation< Vector, T, true, false >
       {
          data[ i ] *= t[ i ];
       };
-      ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), multiply );
+      Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), multiply );
    }
 
    __cuda_callable__
@@ -244,7 +244,7 @@ struct VectorAssignmentWithOperation< Vector, T, true, false >
       {
          data[ i ] /= t[ i ];
       };
-      ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), divide );
+      Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), divide );
    }
 };
 
@@ -275,7 +275,7 @@ struct VectorAssignmentWithOperation< Vector, T, false, false >
       {
          data[ i ] += t;
       };
-      ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), add );
+      Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), add );
    }
 
    __cuda_callable__
@@ -297,7 +297,7 @@ struct VectorAssignmentWithOperation< Vector, T, false, false >
       {
          data[ i ] -= t;
       };
-      ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), subtract );
+      Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), subtract );
    }
 
    __cuda_callable__
@@ -319,7 +319,7 @@ struct VectorAssignmentWithOperation< Vector, T, false, false >
       {
          data[ i ] *= t;
       };
-      ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), multiply );
+      Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), multiply );
    }
 
    __cuda_callable__
@@ -341,10 +341,10 @@ struct VectorAssignmentWithOperation< Vector, T, false, false >
       {
          data[ i ] /= t;
       };
-      ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), divide );
+      Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), divide );
    }
 };
 
-} // namespace Algorithms
+} // namespace detail
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/ndarray/BoundaryExecutors.h b/src/TNL/Containers/ndarray/BoundaryExecutors.h
index e4cd93705..cf06ab151 100644
--- a/src/TNL/Containers/ndarray/BoundaryExecutors.h
+++ b/src/TNL/Containers/ndarray/BoundaryExecutors.h
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 #include <TNL/Containers/ndarray/Meta.h>
 #include <TNL/Containers/ndarray/SizesHolder.h>
@@ -225,12 +225,12 @@ struct ParallelBoundaryExecutor< Permutation, Device, IndexTag< 3 > >
       const auto end1 = ends.template getSize< get< 1 >( Permutation{} ) >();
       const auto end2 = ends.template getSize< get< 2 >( Permutation{} ) >();
 
-      ParallelFor3D< Device >::exec( begin2,     begin1,     begin0,   skipBegin2, end1,       end0,       kernel, f );
-      ParallelFor3D< Device >::exec( skipEnd2,   begin1,     begin0,   end2,       end1,       end0,       kernel, f );
-      ParallelFor3D< Device >::exec( skipBegin2, begin1,     begin0,   skipEnd2,   skipBegin1, end0,       kernel, f );
-      ParallelFor3D< Device >::exec( skipBegin2, skipEnd1,   begin0,   skipEnd2,   end1,       end0,       kernel, f );
-      ParallelFor3D< Device >::exec( skipBegin2, skipBegin1, begin0,   skipEnd2,   skipEnd1,   skipBegin0, kernel, f );
-      ParallelFor3D< Device >::exec( skipBegin2, skipBegin1, skipEnd0, skipEnd2,   skipEnd1,   end0,       kernel, f );
+      Algorithms::ParallelFor3D< Device >::exec( begin2,     begin1,     begin0,   skipBegin2, end1,       end0,       kernel, f );
+      Algorithms::ParallelFor3D< Device >::exec( skipEnd2,   begin1,     begin0,   end2,       end1,       end0,       kernel, f );
+      Algorithms::ParallelFor3D< Device >::exec( skipBegin2, begin1,     begin0,   skipEnd2,   skipBegin1, end0,       kernel, f );
+      Algorithms::ParallelFor3D< Device >::exec( skipBegin2, skipEnd1,   begin0,   skipEnd2,   end1,       end0,       kernel, f );
+      Algorithms::ParallelFor3D< Device >::exec( skipBegin2, skipBegin1, begin0,   skipEnd2,   skipEnd1,   skipBegin0, kernel, f );
+      Algorithms::ParallelFor3D< Device >::exec( skipBegin2, skipBegin1, skipEnd0, skipEnd2,   skipEnd1,   end0,       kernel, f );
    }
 
    template< typename __Device, typename = void >
@@ -291,10 +291,10 @@ struct ParallelBoundaryExecutor< Permutation, Device, IndexTag< 2 > >
       const auto end0 = ends.template getSize< get< 0 >( Permutation{} ) >();
       const auto end1 = ends.template getSize< get< 1 >( Permutation{} ) >();
 
-      ParallelFor2D< Device >::exec( begin1,     begin0,   skipBegin1, end0,       kernel, f );
-      ParallelFor2D< Device >::exec( skipEnd1,   begin0,   end1,       end0,       kernel, f );
-      ParallelFor2D< Device >::exec( skipBegin1, begin0,   skipEnd1,   skipBegin0, kernel, f );
-      ParallelFor2D< Device >::exec( skipBegin1, skipEnd0, skipEnd1,   end0,       kernel, f );
+      Algorithms::ParallelFor2D< Device >::exec( begin1,     begin0,   skipBegin1, end0,       kernel, f );
+      Algorithms::ParallelFor2D< Device >::exec( skipEnd1,   begin0,   end1,       end0,       kernel, f );
+      Algorithms::ParallelFor2D< Device >::exec( skipBegin1, begin0,   skipEnd1,   skipBegin0, kernel, f );
+      Algorithms::ParallelFor2D< Device >::exec( skipBegin1, skipEnd0, skipEnd1,   end0,       kernel, f );
    }
 
    template< typename __Device, typename = void >
@@ -343,8 +343,8 @@ struct ParallelBoundaryExecutor< Permutation, Device, IndexTag< 1 > >
       const auto skipEnd = skipEnds.template getSize< get< 0 >( Permutation{} ) >();
       const auto end = ends.template getSize< get< 0 >( Permutation{} ) >();
 
-      ParallelFor< Device >::exec( begin, skipBegin, f );
-      ParallelFor< Device >::exec( skipEnd, end, f );
+      Algorithms::ParallelFor< Device >::exec( begin, skipBegin, f );
+      Algorithms::ParallelFor< Device >::exec( skipEnd, end, f );
    }
 };
 
diff --git a/src/TNL/Containers/ndarray/Executors.h b/src/TNL/Containers/ndarray/Executors.h
index eff2adff3..2d3db794d 100644
--- a/src/TNL/Containers/ndarray/Executors.h
+++ b/src/TNL/Containers/ndarray/Executors.h
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 #include <TNL/Containers/ndarray/Meta.h>
 #include <TNL/Containers/ndarray/SizesHolder.h>
@@ -139,7 +139,7 @@ struct ParallelExecutorDeviceDispatch
       const Index end0 = ends.template getSize< get< 0 >( Permutation{} ) >();
       const Index end1 = ends.template getSize< get< 1 >( Permutation{} ) >();
       const Index end2 = ends.template getSize< get< 2 >( Permutation{} ) >();
-      ParallelFor3D< Device >::exec( begin2, begin1, begin0, end2, end1, end0, kernel );
+      Algorithms::ParallelFor3D< Device >::exec( begin2, begin1, begin0, end2, end1, end0, kernel );
    }
 };
 
@@ -168,7 +168,7 @@ struct ParallelExecutorDeviceDispatch< Permutation, Devices::Cuda >
       const Index end0 = ends.template getSize< get< Ends::getDimension() - 3 >( Permutation{} ) >();
       const Index end1 = ends.template getSize< get< Ends::getDimension() - 2 >( Permutation{} ) >();
       const Index end2 = ends.template getSize< get< Ends::getDimension() - 1 >( Permutation{} ) >();
-      ParallelFor3D< Devices::Cuda >::exec( begin2, begin1, begin0, end2, end1, end0, kernel );
+      Algorithms::ParallelFor3D< Devices::Cuda >::exec( begin2, begin1, begin0, end2, end1, end0, kernel );
    }
 };
 
@@ -214,7 +214,7 @@ struct ParallelExecutor< Permutation, Device, IndexTag< 3 > >
       const Index end0 = ends.template getSize< get< 0 >( Permutation{} ) >();
       const Index end1 = ends.template getSize< get< 1 >( Permutation{} ) >();
       const Index end2 = ends.template getSize< get< 2 >( Permutation{} ) >();
-      ParallelFor3D< Device >::exec( begin2, begin1, begin0, end2, end1, end0, kernel, f );
+      Algorithms::ParallelFor3D< Device >::exec( begin2, begin1, begin0, end2, end1, end0, kernel, f );
    }
 
    template< typename __Device, typename = void >
@@ -265,7 +265,7 @@ struct ParallelExecutor< Permutation, Device, IndexTag< 2 > >
       const Index begin1 = begins.template getSize< get< 1 >( Permutation{} ) >();
       const Index end0 = ends.template getSize< get< 0 >( Permutation{} ) >();
       const Index end1 = ends.template getSize< get< 1 >( Permutation{} ) >();
-      ParallelFor2D< Device >::exec( begin1, begin0, end1, end0, kernel, f );
+      Algorithms::ParallelFor2D< Device >::exec( begin1, begin0, end1, end0, kernel, f );
    }
 
    template< typename __Device, typename = void >
@@ -312,8 +312,8 @@ struct ParallelExecutor< Permutation, Device, IndexTag< 1 > >
 
       const Index begin = begins.template getSize< get< 0 >( Permutation{} ) >();
       const Index end = ends.template getSize< get< 0 >( Permutation{} ) >();
-//      ParallelFor< Device >::exec( begin, end, kernel );
-      ParallelFor< Device >::exec( begin, end, f );
+//      Algorithms::ParallelFor< Device >::exec( begin, end, kernel );
+      Algorithms::ParallelFor< Device >::exec( begin, end, f );
    }
 };
 
diff --git a/src/TNL/Containers/ndarray/SizesHolder.h b/src/TNL/Containers/ndarray/SizesHolder.h
index 72d61bf81..1375683b2 100644
--- a/src/TNL/Containers/ndarray/SizesHolder.h
+++ b/src/TNL/Containers/ndarray/SizesHolder.h
@@ -14,7 +14,7 @@
 
 #include <TNL/Assert.h>
 #include <TNL/Cuda/CudaCallable.h>
-#include <TNL/TemplateStaticFor.h>
+#include <TNL/Algorithms/TemplateStaticFor.h>
 
 #include <TNL/Containers/ndarray/Meta.h>
 
@@ -231,7 +231,7 @@ SizesHolder< Index, sizes... >
 operator+( const SizesHolder< Index, sizes... >& lhs, const OtherHolder& rhs )
 {
    SizesHolder< Index, sizes... > result;
-   TemplateStaticFor< std::size_t, 0, sizeof...(sizes), __ndarray_impl::SizesHolerOperatorPlusHelper >::execHost( result, lhs, rhs );
+   Algorithms::TemplateStaticFor< std::size_t, 0, sizeof...(sizes), __ndarray_impl::SizesHolerOperatorPlusHelper >::execHost( result, lhs, rhs );
    return result;
 }
 
@@ -242,7 +242,7 @@ SizesHolder< Index, sizes... >
 operator-( const SizesHolder< Index, sizes... >& lhs, const OtherHolder& rhs )
 {
    SizesHolder< Index, sizes... > result;
-   TemplateStaticFor< std::size_t, 0, sizeof...(sizes), __ndarray_impl::SizesHolerOperatorMinusHelper >::execHost( result, lhs, rhs );
+   Algorithms::TemplateStaticFor< std::size_t, 0, sizeof...(sizes), __ndarray_impl::SizesHolerOperatorMinusHelper >::execHost( result, lhs, rhs );
    return result;
 }
 
@@ -295,9 +295,9 @@ template< typename Index,
 std::ostream& operator<<( std::ostream& str, const SizesHolder< Index, sizes... >& holder )
 {
    str << "SizesHolder< ";
-   TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderStaticSizePrinter >::execHost( str, holder );
+   Algorithms::TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderStaticSizePrinter >::execHost( str, holder );
    str << holder.template getStaticSize< sizeof...(sizes) - 1 >() << " >( ";
-   TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderSizePrinter >::execHost( str, holder );
+   Algorithms::TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderSizePrinter >::execHost( str, holder );
    str << holder.template getSize< sizeof...(sizes) - 1 >() << " )";
    return str;
 }
@@ -360,10 +360,10 @@ template< typename Index,
 std::ostream& operator<<( std::ostream& str, const __ndarray_impl::LocalBeginsHolder< SizesHolder< Index, sizes... >, ConstValue >& holder )
 {
    str << "LocalBeginsHolder< SizesHolder< ";
-   TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderStaticSizePrinter >::execHost( str, (SizesHolder< Index, sizes... >) holder );
+   Algorithms::TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderStaticSizePrinter >::execHost( str, (SizesHolder< Index, sizes... >) holder );
    str << holder.template getStaticSize< sizeof...(sizes) - 1 >() << " >, ";
    str << ConstValue << " >( ";
-   TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderSizePrinter >::execHost( str, holder );
+   Algorithms::TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderSizePrinter >::execHost( str, holder );
    str << holder.template getSize< sizeof...(sizes) - 1 >() << " )";
    return str;
 }
diff --git a/src/TNL/Containers/ndarray/SizesHolderHelpers.h b/src/TNL/Containers/ndarray/SizesHolderHelpers.h
index 9d1c0d439..d06c9a7a0 100644
--- a/src/TNL/Containers/ndarray/SizesHolderHelpers.h
+++ b/src/TNL/Containers/ndarray/SizesHolderHelpers.h
@@ -15,7 +15,7 @@
 #include <algorithm>
 
 #include <TNL/Assert.h>
-#include <TNL/TemplateStaticFor.h>
+#include <TNL/Algorithms/TemplateStaticFor.h>
 #include <TNL/Containers/ndarray/Meta.h>
 
 namespace TNL {
@@ -227,7 +227,7 @@ bool sizesWeakCompare( const SizesHolder1& sizes1, const SizesHolder2& sizes2 )
    static_assert( SizesHolder1::getDimension() == SizesHolder2::getDimension(),
                   "Cannot compare sizes of different dimensions." );
    bool result = true;
-   TemplateStaticFor< std::size_t, 0, SizesHolder1::getDimension(), WeakCompareHelper >::exec( sizes1, sizes2, result );
+   Algorithms::TemplateStaticFor< std::size_t, 0, SizesHolder1::getDimension(), WeakCompareHelper >::exec( sizes1, sizes2, result );
    return result;
 }
 
diff --git a/src/TNL/Functions/CutMeshFunction.h b/src/TNL/Functions/CutMeshFunction.h
index 4cad00d1b..e727b15fc 100644
--- a/src/TNL/Functions/CutMeshFunction.h
+++ b/src/TNL/Functions/CutMeshFunction.h
@@ -11,7 +11,7 @@
 #pragma once
 
 #include <TNL/Functions/MeshFunction.h>
-#include <TNL/StaticVectorFor.h>
+#include <TNL/Algorithms/StaticVectorFor.h>
 #include <TNL/Containers/StaticVector.h>
 
 namespace TNL {
@@ -101,7 +101,7 @@ class CutMeshFunction
 
             typename OutMesh::CoordinatesType starts;
             starts.setValue(0);
-            StaticVectorFor::exec(starts,outMesh.getDimensions(),kernel);
+            Algorithms::StaticVectorFor::exec(starts,outMesh.getDimensions(),kernel);
         }
 
         return inCut;
diff --git a/src/TNL/Matrices/BiEllpack_impl.h b/src/TNL/Matrices/BiEllpack_impl.h
index 6af808995..53f61903e 100644
--- a/src/TNL/Matrices/BiEllpack_impl.h
+++ b/src/TNL/Matrices/BiEllpack_impl.h
@@ -94,7 +94,7 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
 	//DeviceDependentCode::performRowBubbleSort( *this, rowLengths );
 	//DeviceDependentCode::computeColumnSizes( *this, rowLengths );
 
-	this->groupPointers.template prefixSum< Containers::Algorithms::ScanType::Exclusive >();
+	this->groupPointers.template prefixSum< Algorithms::ScanType::Exclusive >();
 
 	// uncomment to perform structure test
 	//DeviceDependentCode::verifyRowPerm( *this, rowLengths );
diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/CSR_impl.h
index 86b10119c..9d8fd6456 100644
--- a/src/TNL/Matrices/CSR_impl.h
+++ b/src/TNL/Matrices/CSR_impl.h
@@ -88,7 +88,7 @@ void CSR< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLeng
    rowPtrs.bind( this->rowPointers.getData(), this->getRows() );
    rowPtrs = rowLengths;
    this->rowPointers.setElement( this->rows, 0 );
-   this->rowPointers.template prefixSum< Containers::Algorithms::ScanType::Exclusive >();
+   this->rowPointers.template prefixSum< Algorithms::ScanType::Exclusive >();
    this->maxRowLength = max( rowLengths );
 
    /****
diff --git a/src/TNL/Matrices/DistributedSpMV.h b/src/TNL/Matrices/DistributedSpMV.h
index 0886d686c..b2abd13c5 100644
--- a/src/TNL/Matrices/DistributedSpMV.h
+++ b/src/TNL/Matrices/DistributedSpMV.h
@@ -27,7 +27,7 @@
 // operations
 #include <type_traits>  // std::add_const
 #include <TNL/Atomic.h>
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Pointers/DevicePointer.h>
 
 namespace TNL {
@@ -105,13 +105,13 @@ public:
             local_span[1].fetch_min( i );
       };
 
-      ParallelFor< DeviceType >::exec( (IndexType) 0, localMatrix.getRows(),
-                                       kernel,
-                                       &localMatrixPointer.template getData< DeviceType >(),
-                                       span_starts.getData(),
-                                       span_ends.getData(),
-                                       local_span.getData()
-                                    );
+      Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, localMatrix.getRows(),
+                                                   kernel,
+                                                   &localMatrixPointer.template getData< DeviceType >(),
+                                                   span_starts.getData(),
+                                                   span_ends.getData(),
+                                                   local_span.getData()
+                                                );
 
       // set the local-only span (optimization for banded matrices)
       localOnlySpan.first = local_span.getElement( 0 );
@@ -192,8 +192,8 @@ public:
          {
             outVectorView[ i ] = localMatrix->rowVectorProduct( i, globalBufferView );
          };
-         ParallelFor< DeviceType >::exec( (IndexType) 0, localMatrix.getRows(), kernel,
-                                          &localMatrixPointer.template getData< DeviceType >() );
+         Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, localMatrix.getRows(), kernel,
+                                                      &localMatrixPointer.template getData< DeviceType >() );
       }
       // optimization for banded matrices
       else {
@@ -206,8 +206,8 @@ public:
          {
             outVectorView[ i ] = localMatrix->rowVectorProduct( i, inView );
          };
-         ParallelFor< DeviceType >::exec( localOnlySpan.first, localOnlySpan.second, kernel1,
-                                          &localMatrixPointer.template getData< DeviceType >() );
+         Algorithms::ParallelFor< DeviceType >::exec( localOnlySpan.first, localOnlySpan.second, kernel1,
+                                                      &localMatrixPointer.template getData< DeviceType >() );
 
          // wait for all communications to finish
          CommunicatorType::WaitAll( &commRequests[0], commRequests.size() );
@@ -217,10 +217,10 @@ public:
          {
             outVectorView[ i ] = localMatrix->rowVectorProduct( i, globalBufferView );
          };
-         ParallelFor< DeviceType >::exec( (IndexType) 0, localOnlySpan.first, kernel2,
-                                          &localMatrixPointer.template getData< DeviceType >() );
-         ParallelFor< DeviceType >::exec( localOnlySpan.second, localMatrix.getRows(), kernel2,
-                                          &localMatrixPointer.template getData< DeviceType >() );
+         Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, localOnlySpan.first, kernel2,
+                                                      &localMatrixPointer.template getData< DeviceType >() );
+         Algorithms::ParallelFor< DeviceType >::exec( localOnlySpan.second, localMatrix.getRows(), kernel2,
+                                                      &localMatrixPointer.template getData< DeviceType >() );
       }
    }
 
diff --git a/src/TNL/Matrices/MatrixOperations.h b/src/TNL/Matrices/MatrixOperations.h
index 9cc7b477b..354b0a9e1 100644
--- a/src/TNL/Matrices/MatrixOperations.h
+++ b/src/TNL/Matrices/MatrixOperations.h
@@ -343,7 +343,7 @@ public:
       // TODO: use static storage, e.g. from the CudaReductionBuffer, to avoid frequent reallocations
       Containers::Vector< RealType, Devices::Cuda, IndexType > xDevice;
       xDevice.setSize( n );
-      Containers::Algorithms::MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::copy< RealType, RealType, IndexType >( xDevice.getData(), x, n );
+      Algorithms::MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::copy< RealType, RealType, IndexType >( xDevice.getData(), x, n );
 
       // desGridSize = blocksPerMultiprocessor * numberOfMultiprocessors
       const int desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() );
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h
index f53efdc0c..00df43cd4 100644
--- a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h
+++ b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h
@@ -58,7 +58,7 @@ void SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::setCompressedRowL
 
    this->maxRowLength = max( rowLengths );
 
-   this->slicePointers.template prefixSum< Containers::Algorithms::ScanType::Exclusive >();
+   this->slicePointers.template prefixSum< Algorithms::ScanType::Exclusive >();
    this->allocateMatrixElements( this->slicePointers.getElement( slices ) );
 }
 
diff --git a/src/TNL/Matrices/SlicedEllpack_impl.h b/src/TNL/Matrices/SlicedEllpack_impl.h
index 9f5875f17..d1caef3ec 100644
--- a/src/TNL/Matrices/SlicedEllpack_impl.h
+++ b/src/TNL/Matrices/SlicedEllpack_impl.h
@@ -79,7 +79,7 @@ void SlicedEllpack< Real, Device, Index, SliceSize >::setCompressedRowLengths( C
 
    this->maxRowLength = max( rowLengths );
 
-   this->slicePointers.template prefixSum< Containers::Algorithms::ScanType::Exclusive >();
+   this->slicePointers.template prefixSum< Algorithms::ScanType::Exclusive >();
    this->allocateMatrixElements( this->slicePointers.getElement( slices ) );
 }
 
diff --git a/src/TNL/Matrices/SparseOperations_impl.h b/src/TNL/Matrices/SparseOperations_impl.h
index ce7caaf32..8771d24dc 100644
--- a/src/TNL/Matrices/SparseOperations_impl.h
+++ b/src/TNL/Matrices/SparseOperations_impl.h
@@ -17,7 +17,7 @@
 #include <algorithm>
 
 #include <TNL/Pointers/DevicePointer.h>
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 namespace TNL {
 namespace Matrices {
@@ -353,11 +353,11 @@ reorderArray( const Array1& src, Array2& dest, const PermutationArray& perm )
       dest[ i ] = src[ perm[ i ] ];
    };
 
-   ParallelFor< DeviceType >::exec( (IndexType) 0, src.getSize(),
-                                    kernel,
-                                    src.getData(),
-                                    dest.getData(),
-                                    perm.getData() );
+   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, src.getSize(),
+                                                kernel,
+                                                src.getData(),
+                                                dest.getData(),
+                                                perm.getData() );
 }
 
 } // namespace Matrices
diff --git a/src/TNL/Meshes/DistributedMeshes/BufferEntitiesHelper.h b/src/TNL/Meshes/DistributedMeshes/BufferEntitiesHelper.h
index d0461dded..6030b976f 100644
--- a/src/TNL/Meshes/DistributedMeshes/BufferEntitiesHelper.h
+++ b/src/TNL/Meshes/DistributedMeshes/BufferEntitiesHelper.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Containers/StaticVector.h>
 #include <TNL/Communicators/MPIPrint.h>
 
@@ -67,7 +67,7 @@ class BufferEntitiesHelper< MeshFunctionType, MaskPointer, 1, RealType, Device,
                   meshFunctionData[ entity.getIndex() ] = buffer[ j ];
             }
          };
-         ParallelFor< Device >::exec( 0, sizex, kernel );
+         Algorithms::ParallelFor< Device >::exec( 0, sizex, kernel );
       };
 };
 
@@ -115,7 +115,7 @@ class BufferEntitiesHelper< MeshFunctionType, MaskPointer, 2, RealType, Device,
                   meshFunctionData[ entity.getIndex() ] = buffer[ j * sizex + i ];
             }
          };
-         ParallelFor2D< Device >::exec( 0, 0, sizex, sizey, kernel );
+         Algorithms::ParallelFor2D< Device >::exec( 0, 0, sizex, sizey, kernel );
       };
 };
 
@@ -164,7 +164,7 @@ class BufferEntitiesHelper< MeshFunctionType, MaskPointer, 3, RealType, Device,
                   meshFunctionData[ entity.getIndex() ] = buffer[ k * sizex * sizey + j * sizex + i ];
             }
          };
-         ParallelFor3D< Device >::exec( 0, 0, 0, sizex, sizey, sizez, kernel );
+         Algorithms::ParallelFor3D< Device >::exec( 0, 0, 0, sizex, sizey, sizez, kernel );
       };
 };
 
diff --git a/src/TNL/Meshes/DistributedMeshes/CopyEntitiesHelper.h b/src/TNL/Meshes/DistributedMeshes/CopyEntitiesHelper.h
index 20efa0259..ec30a4f47 100644
--- a/src/TNL/Meshes/DistributedMeshes/CopyEntitiesHelper.h
+++ b/src/TNL/Meshes/DistributedMeshes/CopyEntitiesHelper.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 namespace TNL {
 namespace Meshes {
@@ -45,7 +45,7 @@ class CopyEntitiesHelper<MeshFunctionType, 1>
             fromEntity.refresh();
             toData[toEntity.getIndex()]=fromData[fromEntity.getIndex()];
         };
-        ParallelFor< typename MeshFunctionType::MeshType::DeviceType >::exec( (Index)0, (Index)size.x(), kernel );
+        Algorithms::ParallelFor< typename MeshFunctionType::MeshType::DeviceType >::exec( (Index)0, (Index)size.x(), kernel );
 
     }
 
@@ -79,7 +79,7 @@ class CopyEntitiesHelper<MeshFunctionType,2>
             fromEntity.refresh();
             toData[toEntity.getIndex()]=fromData[fromEntity.getIndex()];
         };
-        ParallelFor2D< typename MeshFunctionType::MeshType::DeviceType >::exec( (Index)0,(Index)0,(Index)size.x(), (Index)size.y(), kernel );
+        Algorithms::ParallelFor2D< typename MeshFunctionType::MeshType::DeviceType >::exec( (Index)0,(Index)0,(Index)size.x(), (Index)size.y(), kernel );
     }
 
 };
@@ -113,7 +113,7 @@ class CopyEntitiesHelper<MeshFunctionType,3>
             fromEntity.refresh();
             toData[toEntity.getIndex()]=fromData[fromEntity.getIndex()];
         };
-        ParallelFor3D< typename MeshFunctionType::MeshType::DeviceType >::exec( (Index)0,(Index)0,(Index)0,(Index)size.x(),(Index)size.y(), (Index)size.z(), kernel );
+        Algorithms::ParallelFor3D< typename MeshFunctionType::MeshType::DeviceType >::exec( (Index)0,(Index)0,(Index)0,(Index)size.x(),(Index)size.y(), (Index)size.z(), kernel );
     }
 };
 
diff --git a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter1D_impl.h b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter1D_impl.h
index b8983feed..840a201c6 100644
--- a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter1D_impl.h
+++ b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter1D_impl.h
@@ -14,7 +14,7 @@
 #include <TNL/Meshes/GridDetails/Grid1D.h>
 #include <TNL/Meshes/GridDetails/Grid2D.h>
 #include <TNL/Meshes/GridDetails/Grid3D.h>
-#include <TNL/TemplateStaticFor.h>
+#include <TNL/Algorithms/TemplateStaticFor.h>
 
 namespace TNL {
 namespace Meshes {
@@ -176,7 +176,7 @@ class NeighborGridEntityGetter<
       void refresh( const GridType& grid, const IndexType& entityIndex )
       {
 #ifndef HAVE_CUDA  // TODO: fix it -- does not work with nvcc
-         TemplateStaticFor< IndexType, -stencilSize, stencilSize + 1, StencilRefresher >::exec( *this, entityIndex );
+         Algorithms::TemplateStaticFor< IndexType, -stencilSize, stencilSize + 1, StencilRefresher >::exec( *this, entityIndex );
 #endif
       };
  
diff --git a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter2D_impl.h b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter2D_impl.h
index 5b0e48767..d6f4ab24e 100644
--- a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter2D_impl.h
+++ b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter2D_impl.h
@@ -199,9 +199,9 @@ class NeighborGridEntityGetter<
       void refresh( const GridType& grid, const IndexType& entityIndex )
       {
 #ifndef HAVE_CUDA // TODO: fix this to work with CUDA
-         TemplateStaticFor< IndexType, -stencilSize, 0, StencilYRefresher >::exec( *this, entityIndex );
-         TemplateStaticFor< IndexType, 1, stencilSize + 1, StencilYRefresher >::exec( *this, entityIndex );
-         TemplateStaticFor< IndexType, -stencilSize, stencilSize + 1, StencilXRefresher >::exec( *this, entityIndex );
+         Algorithms::TemplateStaticFor< IndexType, -stencilSize, 0, StencilYRefresher >::exec( *this, entityIndex );
+         Algorithms::TemplateStaticFor< IndexType, 1, stencilSize + 1, StencilYRefresher >::exec( *this, entityIndex );
+         Algorithms::TemplateStaticFor< IndexType, -stencilSize, stencilSize + 1, StencilXRefresher >::exec( *this, entityIndex );
 #endif
       };
  
diff --git a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter3D_impl.h b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter3D_impl.h
index 5fe5329bb..3cf2bb8d1 100644
--- a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter3D_impl.h
+++ b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter3D_impl.h
@@ -14,7 +14,7 @@
 #include <TNL/Meshes/GridDetails/Grid1D.h>
 #include <TNL/Meshes/GridDetails/Grid2D.h>
 #include <TNL/Meshes/GridDetails/Grid3D.h>
-#include <TNL/TemplateStaticFor.h>
+#include <TNL/Algorithms/TemplateStaticFor.h>
 
 namespace TNL {
 namespace Meshes {
@@ -223,11 +223,11 @@ class NeighborGridEntityGetter<
       void refresh( const GridType& grid, const IndexType& entityIndex )
       {
 #ifndef HAVE_CUDA // TODO: fix this to work with CUDA
-         TemplateStaticFor< IndexType, -stencilSize, 0, StencilZRefresher >::exec( *this, entityIndex );
-         TemplateStaticFor< IndexType, 1, stencilSize + 1, StencilZRefresher >::exec( *this, entityIndex );
-         TemplateStaticFor< IndexType, -stencilSize, 0, StencilYRefresher >::exec( *this, entityIndex );
-         TemplateStaticFor< IndexType, 1, stencilSize + 1, StencilYRefresher >::exec( *this, entityIndex );
-         TemplateStaticFor< IndexType, -stencilSize, stencilSize + 1, StencilXRefresher >::exec( *this, entityIndex );
+         Algorithms::TemplateStaticFor< IndexType, -stencilSize, 0, StencilZRefresher >::exec( *this, entityIndex );
+         Algorithms::TemplateStaticFor< IndexType, 1, stencilSize + 1, StencilZRefresher >::exec( *this, entityIndex );
+         Algorithms::TemplateStaticFor< IndexType, -stencilSize, 0, StencilYRefresher >::exec( *this, entityIndex );
+         Algorithms::TemplateStaticFor< IndexType, 1, stencilSize + 1, StencilYRefresher >::exec( *this, entityIndex );
+         Algorithms::TemplateStaticFor< IndexType, -stencilSize, stencilSize + 1, StencilXRefresher >::exec( *this, entityIndex );
 #endif
       };
  
diff --git a/src/TNL/Meshes/MeshDetails/EntityStorageRebinder.h b/src/TNL/Meshes/MeshDetails/EntityStorageRebinder.h
index 7630a2d64..c956d3169 100644
--- a/src/TNL/Meshes/MeshDetails/EntityStorageRebinder.h
+++ b/src/TNL/Meshes/MeshDetails/EntityStorageRebinder.h
@@ -28,8 +28,8 @@
 #include <TNL/Meshes/DimensionTag.h>
 #include <TNL/Meshes/Mesh.h>
 #include <TNL/Pointers/DevicePointer.h>
-#include <TNL/ParallelFor.h>
-#include <TNL/TemplateStaticFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Algorithms/TemplateStaticFor.h>
 
 namespace TNL {
 namespace Meshes {
@@ -69,10 +69,10 @@ public:
             subentity.template bindSuperentitiesStorageNetwork< SuperdimensionTag::value >( superentitiesStorage->getValues( i ) );
          };
 
-         ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount,
-                                          kernel,
-                                          &meshPointer.template modifyData< DeviceType >(),
-                                          &superentitiesStoragePointer.template modifyData< DeviceType >() );
+         Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount,
+                                                      kernel,
+                                                      &meshPointer.template modifyData< DeviceType >(),
+                                                      &superentitiesStoragePointer.template modifyData< DeviceType >() );
       }
    };
 
@@ -109,10 +109,10 @@ public:
             superentity.template bindSubentitiesStorageNetwork< DimensionTag::value >( subentitiesStorage->getValues( i ) );
          };
 
-         ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount,
-                                          kernel,
-                                          &meshPointer.template modifyData< DeviceType >(),
-                                          &subentitiesStoragePointer.template modifyData< DeviceType >() );
+         Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount,
+                                                      kernel,
+                                                      &meshPointer.template modifyData< DeviceType >(),
+                                                      &subentitiesStoragePointer.template modifyData< DeviceType >() );
       }
    };
 
@@ -144,14 +144,14 @@ public:
 
       static void exec( Mesh& mesh )
       {
-         TemplateStaticFor< int, Dimension + 1, Mesh::getMeshDimension() + 1, Inner >::execHost( mesh );
+         Algorithms::TemplateStaticFor< int, Dimension + 1, Mesh::getMeshDimension() + 1, Inner >::execHost( mesh );
       }
    };
 
 public:
    static void exec( Mesh& mesh )
    {
-      TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, OuterLoop >::execHost( mesh );
+      Algorithms::TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, OuterLoop >::execHost( mesh );
    }
 };
 
diff --git a/src/TNL/Meshes/MeshDetails/IndexPermutationApplier.h b/src/TNL/Meshes/MeshDetails/IndexPermutationApplier.h
index c98f145d0..bd9c02411 100644
--- a/src/TNL/Meshes/MeshDetails/IndexPermutationApplier.h
+++ b/src/TNL/Meshes/MeshDetails/IndexPermutationApplier.h
@@ -156,27 +156,27 @@ public:
       };
 
       Pointers::DevicePointer< Mesh > meshPointer( mesh );
-      ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount,
-                                       kernel1,
-                                       &meshPointer.template getData< DeviceType >(),
-                                       entities.getData(),
-                                       perm.getData() );
-      ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount,
-                                       kernel2,
-                                       &meshPointer.template modifyData< DeviceType >(),
-                                       entities.getData() );
+      Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount,
+                                                   kernel1,
+                                                   &meshPointer.template getData< DeviceType >(),
+                                                   entities.getData(),
+                                                   perm.getData() );
+      Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount,
+                                                   kernel2,
+                                                   &meshPointer.template modifyData< DeviceType >(),
+                                                   entities.getData() );
 
       // permute superentities storage
-      TemplateStaticFor< int, 0, Dimension, SubentitiesStorageWorker >::execHost( mesh, perm );
+      Algorithms::TemplateStaticFor< int, 0, Dimension, SubentitiesStorageWorker >::execHost( mesh, perm );
 
       // permute subentities storage
-      TemplateStaticFor< int, Dimension + 1, Mesh::getMeshDimension() + 1, SuperentitiesStorageWorker >::execHost( mesh, perm );
+      Algorithms::TemplateStaticFor< int, Dimension + 1, Mesh::getMeshDimension() + 1, SuperentitiesStorageWorker >::execHost( mesh, perm );
 
       // update superentity indices from the subentities
-      TemplateStaticFor< int, 0, Dimension, SubentitiesWorker >::execHost( mesh, iperm );
+      Algorithms::TemplateStaticFor< int, 0, Dimension, SubentitiesWorker >::execHost( mesh, iperm );
 
       // update subentity indices from the superentities
-      TemplateStaticFor< int, Dimension + 1, Mesh::getMeshDimension() + 1, SuperentitiesWorker >::execHost( mesh, iperm );
+      Algorithms::TemplateStaticFor< int, Dimension + 1, Mesh::getMeshDimension() + 1, SuperentitiesWorker >::execHost( mesh, iperm );
    }
 };
 
diff --git a/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Initializer.h b/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Initializer.h
index 21ccd0ccd..f29fec33e 100644
--- a/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Initializer.h
+++ b/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Initializer.h
@@ -10,8 +10,8 @@
 
 #pragma once
 
-#include <TNL/TemplateStaticFor.h>
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/TemplateStaticFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Pointers/DevicePointer.h>
 #include <TNL/Meshes/DimensionTag.h>
 #include <TNL/Meshes/MeshDetails/traits/MeshEntityTraits.h>
@@ -121,8 +121,8 @@ public:
    public:
       static void exec( Mesh& mesh )
       {
-         TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, SetEntitiesCount >::execHost( mesh );
-         TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, ResetBoundaryTags >::execHost( mesh );
+         Algorithms::TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, SetEntitiesCount >::execHost( mesh );
+         Algorithms::TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, ResetBoundaryTags >::execHost( mesh );
 
          auto kernel = [] __cuda_callable__
             ( GlobalIndexType faceIndex,
@@ -136,17 +136,17 @@ public:
                const GlobalIndexType cellIndex = face.template getSuperentityIndex< Mesh::getMeshDimension() >( 0 );
                mesh->template setIsBoundaryEntity< Mesh::getMeshDimension() >( cellIndex, true );
                // initialize all subentities
-               TemplateStaticFor< int, 0, Mesh::getMeshDimension() - 1, InitializeSubentities >::exec( *mesh, faceIndex, face );
+               Algorithms::TemplateStaticFor< int, 0, Mesh::getMeshDimension() - 1, InitializeSubentities >::exec( *mesh, faceIndex, face );
             }
          };
 
          const GlobalIndexType facesCount = mesh.template getEntitiesCount< Mesh::getMeshDimension() - 1 >();
          Pointers::DevicePointer< Mesh > meshPointer( mesh );
-         ParallelFor< DeviceType >::exec( (GlobalIndexType) 0, facesCount,
-                                          kernel,
-                                          &meshPointer.template modifyData< DeviceType >() );
+         Algorithms::ParallelFor< DeviceType >::exec( (GlobalIndexType) 0, facesCount,
+                                                      kernel,
+                                                      &meshPointer.template modifyData< DeviceType >() );
 
-         TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, UpdateBoundaryIndices >::execHost( mesh );
+         Algorithms::TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, UpdateBoundaryIndices >::execHost( mesh );
       }
    };
 
diff --git a/src/TNL/Meshes/MeshDetails/initializer/EntityInitializer.h b/src/TNL/Meshes/MeshDetails/initializer/EntityInitializer.h
index 031be1900..ada83b5fb 100644
--- a/src/TNL/Meshes/MeshDetails/initializer/EntityInitializer.h
+++ b/src/TNL/Meshes/MeshDetails/initializer/EntityInitializer.h
@@ -16,8 +16,6 @@
 
 #pragma once
 
-#include <TNL/TemplateStaticFor.h>
-
 #include <TNL/Meshes/MeshDetails/initializer/EntitySeed.h>
 #include <TNL/Meshes/MeshDetails/initializer/SubentitySeedsCreator.h>
 #include <TNL/Meshes/MeshDetails/initializer/SuperentityStorageInitializer.h>
diff --git a/src/TNL/Meshes/MeshDetails/initializer/SubentitySeedsCreator.h b/src/TNL/Meshes/MeshDetails/initializer/SubentitySeedsCreator.h
index 8ad08bd63..30cbb31e6 100644
--- a/src/TNL/Meshes/MeshDetails/initializer/SubentitySeedsCreator.h
+++ b/src/TNL/Meshes/MeshDetails/initializer/SubentitySeedsCreator.h
@@ -16,7 +16,7 @@
 
 #pragma once
 
-#include <TNL/TemplateStaticFor.h>
+#include <TNL/Algorithms/TemplateStaticFor.h>
 #include <TNL/Meshes/MeshDetails/traits/MeshTraits.h>
 
 namespace TNL {
@@ -47,7 +47,7 @@ public:
    static SubentitySeedArray create( const SubvertexAccessorType& subvertices )
    {
       SubentitySeedArray subentitySeeds;
-      TemplateStaticFor< LocalIndexType, 0, SUBENTITIES_COUNT, CreateSubentitySeeds >::execHost( subentitySeeds, subvertices );
+      Algorithms::TemplateStaticFor< LocalIndexType, 0, SUBENTITIES_COUNT, CreateSubentitySeeds >::execHost( subentitySeeds, subvertices );
 
       return subentitySeeds;
    }
@@ -61,7 +61,7 @@ private:
       public:
          static void exec( SubentitySeedArray& subentitySeeds, const SubvertexAccessorType& subvertices )
          {
-            TemplateStaticFor< LocalIndexType, 0, SUBENTITY_VERTICES_COUNT, SetSubentitySeedVertex >::execHost( subentitySeeds[ subentityIndex ], subvertices );
+            Algorithms::TemplateStaticFor< LocalIndexType, 0, SUBENTITY_VERTICES_COUNT, SetSubentitySeedVertex >::execHost( subentitySeeds[ subentityIndex ], subvertices );
          }
 
       private:
diff --git a/src/TNL/Meshes/Writers/VTKWriter_impl.h b/src/TNL/Meshes/Writers/VTKWriter_impl.h
index e6c3eca44..83cf95ec4 100644
--- a/src/TNL/Meshes/Writers/VTKWriter_impl.h
+++ b/src/TNL/Meshes/Writers/VTKWriter_impl.h
@@ -407,10 +407,10 @@ VTKWriter< Mesh >::writeAllEntities( const Mesh& mesh, std::ostream& str )
    const Index cellsListSize = __impl::getCellsListSize( mesh );
 
    str << std::endl << "CELLS " << allEntitiesCount << " " << cellsListSize << std::endl;
-   TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, EntitiesWriter >::exec( mesh, str );
+   Algorithms::TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, EntitiesWriter >::exec( mesh, str );
 
    str << std::endl << "CELL_TYPES " << allEntitiesCount << std::endl;
-   TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, EntityTypesWriter >::exec( mesh, str );
+   Algorithms::TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, EntityTypesWriter >::exec( mesh, str );
 }
 
 template< typename Mesh >
diff --git a/src/TNL/Solvers/Linear/GMRES_impl.h b/src/TNL/Solvers/Linear/GMRES_impl.h
index 5b0915f97..d6cb8fdd0 100644
--- a/src/TNL/Solvers/Linear/GMRES_impl.h
+++ b/src/TNL/Solvers/Linear/GMRES_impl.h
@@ -15,7 +15,7 @@
 #include <type_traits>
 #include <cmath>
 
-#include <TNL/Containers/Algorithms/Multireduction.h>
+#include <TNL/Algorithms/Multireduction.h>
 #include <TNL/Matrices/MatrixOperations.h>
 
 #include "GMRES.h"
@@ -380,7 +380,7 @@ hauseholder_generate( const int i,
          else
             y_i[ j ] = z[ j ];
       };
-      ParallelFor< DeviceType >::exec( (IndexType) 0, size, kernel_truncation );
+      Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, size, kernel_truncation );
    }
    else {
       ConstDeviceView z_local = Traits::getConstLocalView( z );
@@ -420,7 +420,7 @@ hauseholder_generate( const int i,
       const RealType* _y_i = Traits::getConstLocalView( y_i ).getData();
       const IndexType ldSize = this->ldSize;
       auto fetch = [_Y, _y_i, ldSize] __cuda_callable__ ( IndexType idx, int k ) { return _Y[ idx + k * ldSize ] * _y_i[ idx ]; };
-      Containers::Algorithms::Multireduction< DeviceType >::reduce
+      Algorithms::Multireduction< DeviceType >::reduce
                ( (RealType) 0,
                  fetch,
                  std::plus<>{},
@@ -451,7 +451,7 @@ hauseholder_apply_trunc( HostView out,
    // The upper (m+1)x(m+1) submatrix of Y is duplicated in the YL buffer,
    // which resides on host and is broadcasted from rank 0 to all processes.
    HostView YL_i( &YL[ i * (restarting_max + 1) ], restarting_max + 1 );
-   Containers::Algorithms::MultiDeviceMemoryOperations< Devices::Host, DeviceType >::copy( YL_i.getData(), Traits::getLocalView( y_i ).getData(), YL_i.getSize() );
+   Algorithms::MultiDeviceMemoryOperations< Devices::Host, DeviceType >::copy( YL_i.getData(), Traits::getLocalView( y_i ).getData(), YL_i.getSize() );
    // no-op if the problem is not distributed
    CommunicatorType::Bcast( YL_i.getData(), YL_i.getSize(), 0, Traits::getCommunicationGroup( *this->matrix ) );
 
@@ -466,7 +466,7 @@ hauseholder_apply_trunc( HostView out,
       }
       if( std::is_same< DeviceType, Devices::Cuda >::value ) {
          RealType host_z[ i + 1 ];
-         Containers::Algorithms::MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( host_z, Traits::getConstLocalView( z ).getData(), i + 1 );
+         Algorithms::MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( host_z, Traits::getConstLocalView( z ).getData(), i + 1 );
          for( int k = 0; k <= i; k++ )
             out[ k ] = host_z[ k ] - YL_i[ k ] * aux;
       }
@@ -520,7 +520,7 @@ hauseholder_cwy_transposed( VectorViewType z,
    const RealType* _w = Traits::getConstLocalView( w ).getData();
    const IndexType ldSize = this->ldSize;
    auto fetch = [_Y, _w, ldSize] __cuda_callable__ ( IndexType idx, int k ) { return _Y[ idx + k * ldSize ] * _w[ idx ]; };
-   Containers::Algorithms::Multireduction< DeviceType >::reduce
+   Algorithms::Multireduction< DeviceType >::reduce
             ( (RealType) 0,
               fetch,
               std::plus<>{},
diff --git a/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h b/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h
index de4b9f3f1..c9751fe4f 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h
@@ -14,7 +14,7 @@
 
 #include "Diagonal.h"
 
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 namespace TNL {
 namespace Solvers {
@@ -39,7 +39,7 @@ update( const MatrixPointer& matrixPointer )
       diag_view[ i ] = kernel_matrix->getElementFast( i, i );
    };
 
-   ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
+   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
 }
 
 template< typename Matrix >
@@ -54,7 +54,7 @@ solve( ConstVectorViewType b, VectorViewType x ) const
       x[ i ] = b[ i ] / diag_view[ i ];
    };
 
-   ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
+   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
 }
 
 
@@ -77,7 +77,7 @@ update( const MatrixPointer& matrixPointer )
       diag_view[ i ] = kernel_matrix->getLocalMatrix().getElementFast( i, gi );
    };
 
-   ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
+   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
 }
 
 template< typename Matrix, typename Communicator >
@@ -94,7 +94,7 @@ solve( ConstVectorViewType b, VectorViewType x ) const
       x_view[ i ] = b_view[ i ] / diag_view[ i ];
    };
 
-   ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
+   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
 }
 
 } // namespace Preconditioners
diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h b/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h
index be9e37f23..5ae255304 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h
@@ -16,7 +16,7 @@
 #include "TriangularSolve.h"
 
 #include <TNL/Exceptions/CudaSupportMissing.h>
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 namespace TNL {
 namespace Solvers {
@@ -308,7 +308,7 @@ allocate_LU()
       L_rowLengths_view[ i ] = L_entries;
       U_rowLengths_view[ i ] = U_entries;
    };
-   ParallelFor< DeviceType >::exec( (IndexType) 0, N, kernel_copy_row_lengths );
+   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, N, kernel_copy_row_lengths );
    L->setCompressedRowLengths( L_rowLengths );
    U->setCompressedRowLengths( U_rowLengths );
 #else
@@ -349,7 +349,7 @@ copy_triangular_factors()
             break;
       }
    };
-   ParallelFor< DeviceType >::exec( (IndexType) 0, N, kernel_copy_values );
+   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, N, kernel_copy_values );
 #else
    throw std::runtime_error("The program was not compiled with the CUSPARSE library. Pass -DHAVE_CUSPARSE -lcusparse to the compiler.");
 #endif
diff --git a/src/UnitTests/Algorithms/CMakeLists.txt b/src/UnitTests/Algorithms/CMakeLists.txt
new file mode 100644
index 000000000..6870bc84e
--- /dev/null
+++ b/src/UnitTests/Algorithms/CMakeLists.txt
@@ -0,0 +1,29 @@
+IF( BUILD_CUDA )
+   CUDA_ADD_EXECUTABLE( MemoryOperationsTest MemoryOperationsTest.cu
+                        OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MemoryOperationsTest ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( MultireductionTest MultireductionTest.cu
+                        OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MultireductionTest ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( ParallelForTest ParallelForTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( ParallelForTest ${GTEST_BOTH_LIBRARIES} )
+ELSE( BUILD_CUDA )
+   ADD_EXECUTABLE( MemoryOperationsTest MemoryOperationsTest.cpp )
+   TARGET_COMPILE_OPTIONS( MemoryOperationsTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MemoryOperationsTest ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( MultireductionTest MultireductionTest.cpp )
+   TARGET_COMPILE_OPTIONS( MultireductionTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MultireductionTest ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( ParallelForTest ParallelForTest.cpp )
+   TARGET_COMPILE_OPTIONS( ParallelForTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( ParallelForTest ${GTEST_BOTH_LIBRARIES} )
+ENDIF( BUILD_CUDA )
+
+
+ADD_TEST( MemoryOperationsTest ${EXECUTABLE_OUTPUT_PATH}/MemoryOperationsTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( MultireductionTest ${EXECUTABLE_OUTPUT_PATH}/MultireductionTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( ParallelForTest ${EXECUTABLE_OUTPUT_PATH}/ParallelForTest${CMAKE_EXECUTABLE_SUFFIX} )
diff --git a/src/UnitTests/Containers/MemoryOperationsTest.cpp b/src/UnitTests/Algorithms/MemoryOperationsTest.cpp
similarity index 100%
rename from src/UnitTests/Containers/MemoryOperationsTest.cpp
rename to src/UnitTests/Algorithms/MemoryOperationsTest.cpp
diff --git a/src/UnitTests/Containers/MemoryOperationsTest.cu b/src/UnitTests/Algorithms/MemoryOperationsTest.cu
similarity index 100%
rename from src/UnitTests/Containers/MemoryOperationsTest.cu
rename to src/UnitTests/Algorithms/MemoryOperationsTest.cu
diff --git a/src/UnitTests/Containers/MemoryOperationsTest.h b/src/UnitTests/Algorithms/MemoryOperationsTest.h
similarity index 98%
rename from src/UnitTests/Containers/MemoryOperationsTest.h
rename to src/UnitTests/Algorithms/MemoryOperationsTest.h
index 6049e09bc..ebfb01f1b 100644
--- a/src/UnitTests/Containers/MemoryOperationsTest.h
+++ b/src/UnitTests/Algorithms/MemoryOperationsTest.h
@@ -13,14 +13,13 @@
 #ifdef HAVE_GTEST
 #include <TNL/Allocators/Host.h>
 #include <TNL/Allocators/Cuda.h>
-#include <TNL/Containers/Algorithms/MemoryOperations.h>
-#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
+#include <TNL/Algorithms/MemoryOperations.h>
+#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
 
 #include "gtest/gtest.h"
 
 using namespace TNL;
-using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 constexpr int ARRAY_TEST_SIZE = 5000;
 
diff --git a/src/UnitTests/Containers/MultireductionTest.cpp b/src/UnitTests/Algorithms/MultireductionTest.cpp
similarity index 100%
rename from src/UnitTests/Containers/MultireductionTest.cpp
rename to src/UnitTests/Algorithms/MultireductionTest.cpp
diff --git a/src/UnitTests/Containers/MultireductionTest.cu b/src/UnitTests/Algorithms/MultireductionTest.cu
similarity index 100%
rename from src/UnitTests/Containers/MultireductionTest.cu
rename to src/UnitTests/Algorithms/MultireductionTest.cu
diff --git a/src/UnitTests/Containers/MultireductionTest.h b/src/UnitTests/Algorithms/MultireductionTest.h
similarity index 97%
rename from src/UnitTests/Containers/MultireductionTest.h
rename to src/UnitTests/Algorithms/MultireductionTest.h
index 7a321f583..5e11efd16 100644
--- a/src/UnitTests/Containers/MultireductionTest.h
+++ b/src/UnitTests/Algorithms/MultireductionTest.h
@@ -15,11 +15,11 @@
 
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/VectorView.h>
-#include <TNL/Containers/Algorithms/Multireduction.h>
+#include <TNL/Algorithms/Multireduction.h>
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename View >
 void setLinearSequence( View& deviceVector )
diff --git a/src/UnitTests/ParallelForTest.cpp b/src/UnitTests/Algorithms/ParallelForTest.cpp
similarity index 100%
rename from src/UnitTests/ParallelForTest.cpp
rename to src/UnitTests/Algorithms/ParallelForTest.cpp
diff --git a/src/UnitTests/ParallelForTest.cu b/src/UnitTests/Algorithms/ParallelForTest.cu
similarity index 100%
rename from src/UnitTests/ParallelForTest.cu
rename to src/UnitTests/Algorithms/ParallelForTest.cu
diff --git a/src/UnitTests/ParallelForTest.h b/src/UnitTests/Algorithms/ParallelForTest.h
similarity index 86%
rename from src/UnitTests/ParallelForTest.h
rename to src/UnitTests/Algorithms/ParallelForTest.h
index 95455286e..aa75fd560 100644
--- a/src/UnitTests/ParallelForTest.h
+++ b/src/UnitTests/Algorithms/ParallelForTest.h
@@ -11,7 +11,7 @@
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Containers/Array.h>
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 #ifdef HAVE_GTEST
 #include <gtest/gtest.h>
@@ -38,7 +38,7 @@ TEST( ParallelForTest, 1D_host )
       {
          view[i] = i;
       };
-      ParallelFor< Devices::Host >::exec( 0, size, kernel );
+      Algorithms::ParallelFor< Devices::Host >::exec( 0, size, kernel );
 
       if( a != expected ) {
          for (int i = 0; i < size; i++)
@@ -65,7 +65,7 @@ TEST( ParallelForTest, 2D_host )
       {
          view[i] = i;
       };
-      ParallelFor2D< Devices::Host >::exec( 0, 0, size, 1, kernel1 );
+      Algorithms::ParallelFor2D< Devices::Host >::exec( 0, 0, size, 1, kernel1 );
 
       if( a != expected ) {
          for (int i = 0; i < size; i++)
@@ -77,7 +77,7 @@ TEST( ParallelForTest, 2D_host )
       {
          view[j] = j;
       };
-      ParallelFor2D< Devices::Host >::exec( 0, 0, 1, size, kernel2 );
+      Algorithms::ParallelFor2D< Devices::Host >::exec( 0, 0, 1, size, kernel2 );
 
       if( a != expected ) {
          for (int i = 0; i < size; i++)
@@ -104,7 +104,7 @@ TEST( ParallelForTest, 3D_host )
       {
          view[i] = i;
       };
-      ParallelFor3D< Devices::Host >::exec( 0, 0, 0, size, 1, 1, kernel1 );
+      Algorithms::ParallelFor3D< Devices::Host >::exec( 0, 0, 0, size, 1, 1, kernel1 );
 
       if( a != expected ) {
          for (int i = 0; i < size; i++)
@@ -116,7 +116,7 @@ TEST( ParallelForTest, 3D_host )
       {
          view[j] = j;
       };
-      ParallelFor3D< Devices::Host >::exec( 0, 0, 0, 1, size, 1, kernel2 );
+      Algorithms::ParallelFor3D< Devices::Host >::exec( 0, 0, 0, 1, size, 1, kernel2 );
 
       if( a != expected ) {
          for (int i = 0; i < size; i++)
@@ -128,7 +128,7 @@ TEST( ParallelForTest, 3D_host )
       {
          view[k] = k;
       };
-      ParallelFor3D< Devices::Host >::exec( 0, 0, 0, 1, 1, size, kernel3 );
+      Algorithms::ParallelFor3D< Devices::Host >::exec( 0, 0, 0, 1, 1, size, kernel3 );
 
       if( a != expected ) {
          for (int i = 0; i < size; i++)
@@ -158,7 +158,7 @@ void test_1D_cuda()
       {
          view[i] = i;
       };
-      ParallelFor< Devices::Cuda >::exec( 0, size, kernel );
+      Algorithms::ParallelFor< Devices::Cuda >::exec( 0, size, kernel );
 
       ArrayHost ah;
       ah = a;
@@ -194,7 +194,7 @@ void test_2D_cuda()
       {
          view[i] = i;
       };
-      ParallelFor2D< Devices::Cuda >::exec( 0, 0, size, 1, kernel1 );
+      Algorithms::ParallelFor2D< Devices::Cuda >::exec( 0, 0, size, 1, kernel1 );
 
       ArrayHost ah;
       ah = a;
@@ -208,7 +208,7 @@ void test_2D_cuda()
       {
          view[j] = j;
       };
-      ParallelFor2D< Devices::Cuda >::exec( 0, 0, 1, size, kernel2 );
+      Algorithms::ParallelFor2D< Devices::Cuda >::exec( 0, 0, 1, size, kernel2 );
 
       ah = a;
       if( ah != expected ) {
@@ -243,7 +243,7 @@ void test_3D_cuda()
       {
          view[i] = i;
       };
-      ParallelFor3D< Devices::Cuda >::exec( 0, 0, 0, size, 1, 1, kernel1 );
+      Algorithms::ParallelFor3D< Devices::Cuda >::exec( 0, 0, 0, size, 1, 1, kernel1 );
 
       ArrayHost ah;
       ah = a;
@@ -257,7 +257,7 @@ void test_3D_cuda()
       {
          view[j] = j;
       };
-      ParallelFor3D< Devices::Cuda >::exec( 0, 0, 0, 1, size, 1, kernel2 );
+      Algorithms::ParallelFor3D< Devices::Cuda >::exec( 0, 0, 0, 1, size, 1, kernel2 );
 
       ah = a;
       if( ah != expected ) {
@@ -270,7 +270,7 @@ void test_3D_cuda()
       {
          view[k] = k;
       };
-      ParallelFor3D< Devices::Cuda >::exec( 0, 0, 0, 1, 1, size, kernel3 );
+      Algorithms::ParallelFor3D< Devices::Cuda >::exec( 0, 0, 0, 1, 1, size, kernel3 );
 
       ah = a;
       if( ah != expected ) {
@@ -287,4 +287,4 @@ TEST( ParallelForTest, 3D_cuda )
 #endif
 #endif
 
-#include "main.h"
+#include "../main.h"
diff --git a/src/UnitTests/AllocatorsTest.h b/src/UnitTests/AllocatorsTest.h
index 30d904b7a..16438e082 100644
--- a/src/UnitTests/AllocatorsTest.h
+++ b/src/UnitTests/AllocatorsTest.h
@@ -15,7 +15,7 @@
 #include <TNL/Allocators/Cuda.h>
 #include <TNL/Allocators/CudaHost.h>
 #include <TNL/Allocators/CudaManaged.h>
-#include <TNL/Containers/Algorithms/MemoryOperations.h>
+#include <TNL/Algorithms/MemoryOperations.h>
 
 #include "gtest/gtest.h"
 
@@ -83,7 +83,7 @@ TYPED_TEST( AllocatorsTest, CudaManaged )
    ASSERT_NE( data, nullptr );
 
    // set data on the device
-   Containers::Algorithms::MemoryOperations< Devices::Cuda >::set( data, (ValueType) 0, ARRAY_TEST_SIZE );
+   Algorithms::MemoryOperations< Devices::Cuda >::set( data, (ValueType) 0, ARRAY_TEST_SIZE );
    ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE );
 
    // check values on the host
@@ -103,7 +103,7 @@ TYPED_TEST( AllocatorsTest, Cuda )
    ASSERT_NE( data, nullptr );
 
    // set data on the device
-   Containers::Algorithms::MemoryOperations< Devices::Cuda >::set( data, (ValueType) 0, ARRAY_TEST_SIZE );
+   Algorithms::MemoryOperations< Devices::Cuda >::set( data, (ValueType) 0, ARRAY_TEST_SIZE );
    ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE );
 
    allocator.deallocate( data, ARRAY_TEST_SIZE );
diff --git a/src/UnitTests/CMakeLists.txt b/src/UnitTests/CMakeLists.txt
index b6f7f383f..6bfae47e1 100644
--- a/src/UnitTests/CMakeLists.txt
+++ b/src/UnitTests/CMakeLists.txt
@@ -44,15 +44,6 @@ ADD_EXECUTABLE( ObjectTest ObjectTest.cpp )
 TARGET_COMPILE_OPTIONS( ObjectTest PRIVATE ${CXX_TESTS_FLAGS} )
 TARGET_LINK_LIBRARIES( ObjectTest ${GTEST_BOTH_LIBRARIES} )
 
-if( BUILD_CUDA )
-   CUDA_ADD_EXECUTABLE( ParallelForTest ParallelForTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( ParallelForTest ${GTEST_BOTH_LIBRARIES} )
-else()
-   ADD_EXECUTABLE( ParallelForTest ParallelForTest.cpp )
-   TARGET_COMPILE_OPTIONS( ParallelForTest PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( ParallelForTest ${GTEST_BOTH_LIBRARIES} )
-endif()
-
 ADD_EXECUTABLE( TimerTest TimerTest.cpp )
 TARGET_COMPILE_OPTIONS( TimerTest PRIVATE ${CXX_TESTS_FLAGS} )
 TARGET_LINK_LIBRARIES( TimerTest ${GTEST_BOTH_LIBRARIES} )
@@ -69,6 +60,5 @@ ADD_TEST( AllocatorsTest ${EXECUTABLE_OUTPUT_PATH}/AllocatorsTest${CMAKE_EXECUTA
 ADD_TEST( FileTest ${EXECUTABLE_OUTPUT_PATH}/FileTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( StringTest ${EXECUTABLE_OUTPUT_PATH}/StringTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( ObjectTest ${EXECUTABLE_OUTPUT_PATH}/ObjectTest${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( ParallelForTest ${EXECUTABLE_OUTPUT_PATH}/ParallelForTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( TimerTest ${EXECUTABLE_OUTPUT_PATH}/TimerTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( TypeInfoTest ${EXECUTABLE_OUTPUT_PATH}/TypeInfoTest${CMAKE_EXECUTABLE_SUFFIX} )
diff --git a/src/UnitTests/Containers/CMakeLists.txt b/src/UnitTests/Containers/CMakeLists.txt
index 3a0643274..51060d770 100644
--- a/src/UnitTests/Containers/CMakeLists.txt
+++ b/src/UnitTests/Containers/CMakeLists.txt
@@ -2,16 +2,6 @@ ADD_EXECUTABLE( ListTest ListTest.cpp )
 TARGET_COMPILE_OPTIONS( ListTest PRIVATE ${CXX_TESTS_FLAGS} )
 TARGET_LINK_LIBRARIES( ListTest ${GTEST_BOTH_LIBRARIES} )
 
-IF( BUILD_CUDA )
-   CUDA_ADD_EXECUTABLE( MemoryOperationsTest MemoryOperationsTest.cu
-                        OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( MemoryOperationsTest ${GTEST_BOTH_LIBRARIES} )
-ELSE( BUILD_CUDA )
-   ADD_EXECUTABLE( MemoryOperationsTest MemoryOperationsTest.cpp )
-   TARGET_COMPILE_OPTIONS( MemoryOperationsTest PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( MemoryOperationsTest ${GTEST_BOTH_LIBRARIES} )
-ENDIF( BUILD_CUDA )
-
 ADD_EXECUTABLE( ArrayTest ArrayTest.cpp )
 TARGET_COMPILE_OPTIONS( ArrayTest PRIVATE ${CXX_TESTS_FLAGS} )
 TARGET_LINK_LIBRARIES( ArrayTest ${GTEST_BOTH_LIBRARIES} )
@@ -70,16 +60,6 @@ IF( BUILD_CUDA )
    TARGET_LINK_LIBRARIES( VectorVerticalOperationsTestCuda ${GTEST_BOTH_LIBRARIES} )
 ENDIF( BUILD_CUDA )
 
-IF( BUILD_CUDA )
-   CUDA_ADD_EXECUTABLE( MultireductionTest MultireductionTest.cu
-                        OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( MultireductionTest ${GTEST_BOTH_LIBRARIES} )
-ELSE( BUILD_CUDA )
-   ADD_EXECUTABLE( MultireductionTest MultireductionTest.cpp )
-   TARGET_COMPILE_OPTIONS( MultireductionTest PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( MultireductionTest ${GTEST_BOTH_LIBRARIES} )
-ENDIF( BUILD_CUDA )
-
 ADD_EXECUTABLE( StaticArrayTest StaticArrayTest.cpp )
 TARGET_COMPILE_OPTIONS( StaticArrayTest PRIVATE ${CXX_TESTS_FLAGS} )
 TARGET_LINK_LIBRARIES( StaticArrayTest ${GTEST_BOTH_LIBRARIES} )
@@ -94,7 +74,6 @@ TARGET_LINK_LIBRARIES( StaticVectorOperationsTest ${GTEST_BOTH_LIBRARIES} )
 
 
 ADD_TEST( ListTest ${EXECUTABLE_OUTPUT_PATH}/ListTest${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( MemoryOperationsTest ${EXECUTABLE_OUTPUT_PATH}/MemoryOperationsTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( ArrayTest ${EXECUTABLE_OUTPUT_PATH}/ArrayTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( ArrayViewTest ${EXECUTABLE_OUTPUT_PATH}/ArrayViewTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( VectorTest ${EXECUTABLE_OUTPUT_PATH}/VectorTest${CMAKE_EXECUTABLE_SUFFIX} )
@@ -113,7 +92,6 @@ IF( BUILD_CUDA )
    ADD_TEST( VectorUnaryOperationsTestCuda ${EXECUTABLE_OUTPUT_PATH}/VectorUnaryOperationsTestCuda${CMAKE_EXECUTABLE_SUFFIX} )
    ADD_TEST( VectorVerticalOperationsTestCuda ${EXECUTABLE_OUTPUT_PATH}/VectorVerticalOperationsTestCuda${CMAKE_EXECUTABLE_SUFFIX} )
 ENDIF()
-ADD_TEST( MultireductionTest ${EXECUTABLE_OUTPUT_PATH}/MultireductionTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( StaticArrayTest ${EXECUTABLE_OUTPUT_PATH}/StaticArrayTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( StaticVectorTest ${EXECUTABLE_OUTPUT_PATH}/StaticVectorTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( StaticVectorOperationsTest ${EXECUTABLE_OUTPUT_PATH}/StaticVectorOperationsTest${CMAKE_EXECUTABLE_SUFFIX} )
diff --git a/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h b/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h
index 2faf5ba04..04afb91a4 100644
--- a/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h
+++ b/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h
@@ -139,7 +139,7 @@ void test_helper_setValue( DistributedArray& array, BufferView& buffer_view )
    {
       buffer_view[ i - localRange.getBegin() ] = array_view( i );
    };
-   ParallelFor< DeviceType >::exec( localRange.getBegin(), localRange.getEnd(), kernel );
+   Algorithms::ParallelFor< DeviceType >::exec( localRange.getBegin(), localRange.getEnd(), kernel );
 }
 
 TYPED_TEST( DistributedNDArray_1D_test, setValue )
@@ -224,7 +224,7 @@ void test_helper_comparisonOperators( DistributedArray& u, DistributedArray& v,
       v_view( gi ) = gi;
       w_view( gi ) = 2 * gi;
    };
-   ParallelFor< DeviceType >::exec( localRange.getBegin(), localRange.getEnd(), kernel );
+   Algorithms::ParallelFor< DeviceType >::exec( localRange.getBegin(), localRange.getEnd(), kernel );
 }
 
 TYPED_TEST( DistributedNDArray_1D_test, comparisonOperators )
diff --git a/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h b/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h
index 33390a33c..17108509d 100644
--- a/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h
+++ b/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h
@@ -181,7 +181,7 @@ void test_helper_comparisonOperators( DistributedArray& u, DistributedArray& v,
       v_view( q, gi, j ) = gi;
       w_view( q, gi, j ) = 2 * gi;
    };
-   ParallelFor3D< DeviceType >::exec( (IndexType) 0, localRange.getBegin(), (IndexType) 0,
+   Algorithms::ParallelFor3D< DeviceType >::exec( (IndexType) 0, localRange.getBegin(), (IndexType) 0,
                                       9, localRange.getEnd(), u.template getSize< 2 >(),
                                       kernel );
 }
diff --git a/src/UnitTests/Containers/ndarray/StaticNDArrayCudaTest.cu b/src/UnitTests/Containers/ndarray/StaticNDArrayCudaTest.cu
index 0a0a83dd8..5a0561955 100644
--- a/src/UnitTests/Containers/ndarray/StaticNDArrayCudaTest.cu
+++ b/src/UnitTests/Containers/ndarray/StaticNDArrayCudaTest.cu
@@ -2,8 +2,9 @@
 
 #include <TNL/Containers/NDArray.h>
 
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
+using namespace TNL;
 using namespace TNL::Containers;
 using std::index_sequence;
 
@@ -37,7 +38,7 @@ void __test_SetThroughView()
     };
 
     a.setValue(0);
-    TNL::ParallelFor< TNL::Devices::Cuda >::exec( 0, 1, kernel, a_view );
+    Algorithms::ParallelFor< TNL::Devices::Cuda >::exec( 0, 1, kernel, a_view );
     expect_identity( a.getStorageArray() );
 }
 TEST( StaticNDArrayCudaTest, SetThroughView )
@@ -68,7 +69,7 @@ void __test_CopyFromArray()
     };
 
     a.setValue(0);
-    TNL::ParallelFor< TNL::Devices::Cuda >::exec( 0, 1, kernel, a_view );
+    Algorithms::ParallelFor< TNL::Devices::Cuda >::exec( 0, 1, kernel, a_view );
     expect_identity( a.getStorageArray() );
 }
 TEST( StaticNDArrayCudaTest, CopyFromArray )
-- 
GitLab


From dbfa5d11a76dc1261a6a359d724545b8f90af7c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Mon, 2 Sep 2019 14:26:54 +0200
Subject: [PATCH 20/35] Serialization in TNL::File: File::save and File::load
 are specialized by Allocator instead of Device

---
 Documentation/Examples/FileExampleCuda.cu     |  4 +-
 .../Examples/FileExampleSaveAndLoad.cpp       |  6 +--
 src/TNL/Containers/Array.h                    |  4 +-
 src/TNL/Containers/Array.hpp                  |  4 +-
 src/TNL/Containers/ArrayView.hpp              |  5 +-
 src/TNL/Containers/StaticArray.hpp            |  4 +-
 src/TNL/Containers/detail/ArrayIO.h           | 29 ++++++-----
 src/TNL/File.h                                | 48 ++++++++++---------
 src/TNL/File.hpp                              | 28 ++++++-----
 src/UnitTests/FileTest.h                      | 24 +++++-----
 10 files changed, 82 insertions(+), 74 deletions(-)

diff --git a/Documentation/Examples/FileExampleCuda.cu b/Documentation/Examples/FileExampleCuda.cu
index 0cfde8fc8..4411b8c51 100644
--- a/Documentation/Examples/FileExampleCuda.cu
+++ b/Documentation/Examples/FileExampleCuda.cu
@@ -17,7 +17,7 @@ int main()
     */
    File file;
    file.open( "file-example-cuda-test-file.tnl", std::ios_base::out | std::ios_base::trunc );
-   file.save< double, double, Devices::Host >( doubleArray, size );
+   file.save< double, double, Allocators::Host< double > >( doubleArray, size );
    file.close();
 
    /***
@@ -31,7 +31,7 @@ int main()
     * Read array from the file to device
     */
    file.open( "file-example-cuda-test-file.tnl", std::ios_base::in );
-   file.load< double, double, Devices::Cuda >( deviceArray, size );
+   file.load< double, double, Allocators::Cuda< double > >( deviceArray, size );
    file.close();
 
    /***
diff --git a/Documentation/Examples/FileExampleSaveAndLoad.cpp b/Documentation/Examples/FileExampleSaveAndLoad.cpp
index 00e353218..c232fc3fe 100644
--- a/Documentation/Examples/FileExampleSaveAndLoad.cpp
+++ b/Documentation/Examples/FileExampleSaveAndLoad.cpp
@@ -18,21 +18,21 @@ int main()
     */
    File file;
    file.open( "test-file.tnl", std::ios_base::out | std::ios_base::trunc );
-   file.save< double, float, Devices::Host >( doubleArray, size );
+   file.save< double, float >( doubleArray, size );
    file.close();
 
    /***
     * Load the array of floats from the file.
     */
    file.open( "test-file.tnl", std::ios_base::in );
-   file.load< float, float, Devices::Host >( floatArray, size );
+   file.load< float, float >( floatArray, size );
    file.close();
 
    /***
     * Load the array of floats from the file and convert them to integers.
     */
    file.open( "test-file.tnl", std::ios_base::in );
-   file.load< int, float, Devices::Host >( intArray, size );
+   file.load< int, float >( intArray, size );
    file.close();
 
    /***
diff --git a/src/TNL/Containers/Array.h b/src/TNL/Containers/Array.h
index a67c8c1e5..2d9848769 100644
--- a/src/TNL/Containers/Array.h
+++ b/src/TNL/Containers/Array.h
@@ -228,13 +228,13 @@ class Array
 
       /**
        * \brief Returns a \ref String representation of the array type in C++ style,
-       * where device is always \ref Devices::Host.
+       * with a placeholder in place of \e Device and \e Allocator.
        */
       static String getSerializationType();
 
       /**
        * \brief Returns a \ref String representation of the array type in C++ style,
-       * where device is always \ref Devices::Host.
+       * with a placeholder in place of \e Device and \e Allocator.
        */
       virtual String getSerializationTypeVirtual() const;
 
diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp
index ce36b27e9..24e3f8b43 100644
--- a/src/TNL/Containers/Array.hpp
+++ b/src/TNL/Containers/Array.hpp
@@ -759,7 +759,7 @@ std::ostream& operator<<( std::ostream& str, const Array< Value, Device, Index,
 template< typename Value, typename Device, typename Index, typename Allocator >
 File& operator<<( File& file, const Array< Value, Device, Index, Allocator >& array )
 {
-   using IO = detail::ArrayIO< Value, Device, Index >;
+   using IO = detail::ArrayIO< Value, Index, Allocator >;
    saveObjectType( file, IO::getSerializationType() );
    const Index size = array.getSize();
    file.save( &size );
@@ -778,7 +778,7 @@ File& operator<<( File&& file, const Array< Value, Device, Index, Allocator >& a
 template< typename Value, typename Device, typename Index, typename Allocator >
 File& operator>>( File& file, Array< Value, Device, Index, Allocator >& array )
 {
-   using IO = detail::ArrayIO< Value, Device, Index >;
+   using IO = detail::ArrayIO< Value, Index, Allocator >;
    const String type = getObjectType( file );
    if( type != IO::getSerializationType() )
       throw Exceptions::FileDeserializationError( file.getFileName(), "object type does not match (expected " + IO::getSerializationType() + ", found " + type + ")." );
diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp
index b6915e4e3..c3c39bc10 100644
--- a/src/TNL/Containers/ArrayView.hpp
+++ b/src/TNL/Containers/ArrayView.hpp
@@ -19,6 +19,7 @@
 #include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
 #include <TNL/Containers/detail/ArrayIO.h>
 #include <TNL/Containers/detail/ArrayAssignment.h>
+#include <TNL/Allocators/Default.h>
 
 #include "ArrayView.h"
 
@@ -383,7 +384,7 @@ load( const String& fileName )
 template< typename Value, typename Device, typename Index >
 File& operator<<( File& file, const ArrayView< Value, Device, Index > view )
 {
-   using IO = detail::ArrayIO< Value, Device, Index >;
+   using IO = detail::ArrayIO< Value, Index, typename Allocators::Default< Device >::template Allocator< Value > >;
    saveObjectType( file, IO::getSerializationType() );
    const Index size = view.getSize();
    file.save( &size );
@@ -402,7 +403,7 @@ File& operator<<( File&& file, const ArrayView< Value, Device, Index > view )
 template< typename Value, typename Device, typename Index >
 File& operator>>( File& file, ArrayView< Value, Device, Index > view )
 {
-   using IO = detail::ArrayIO< Value, Device, Index >;
+   using IO = detail::ArrayIO< Value, Index, typename Allocators::Default< Device >::template Allocator< Value > >;
    const String type = getObjectType( file );
    if( type != IO::getSerializationType() )
       throw Exceptions::FileDeserializationError( file.getFileName(), "object type does not match (expected " + IO::getSerializationType() + ", found " + type + ")." );
diff --git a/src/TNL/Containers/StaticArray.hpp b/src/TNL/Containers/StaticArray.hpp
index 85171e3d7..ed7a2df0b 100644
--- a/src/TNL/Containers/StaticArray.hpp
+++ b/src/TNL/Containers/StaticArray.hpp
@@ -277,14 +277,14 @@ void StaticArray< Size, Value >::setValue( const ValueType& val )
 template< int Size, typename Value >
 bool StaticArray< Size, Value >::save( File& file ) const
 {
-   file.save< Value, Value, Devices::Host >( data, Size );
+   file.save( data, Size );
    return true;
 }
 
 template< int Size, typename Value >
 bool StaticArray< Size, Value >::load( File& file)
 {
-   file.load< Value, Value, Devices::Host >( data, Size );
+   file.load( data, Size );
    return true;
 }
 
diff --git a/src/TNL/Containers/detail/ArrayIO.h b/src/TNL/Containers/detail/ArrayIO.h
index 58817a83c..8844a554f 100644
--- a/src/TNL/Containers/detail/ArrayIO.h
+++ b/src/TNL/Containers/detail/ArrayIO.h
@@ -14,29 +14,29 @@
 
 #include <TNL/Object.h>
 #include <TNL/File.h>
+#include <TNL/TypeInfo.h>
 
 namespace TNL {
 namespace Containers {
 namespace detail {
 
 template< typename Value,
-          typename Device,
           typename Index,
+          typename Allocator,
           bool Elementwise = std::is_base_of< Object, Value >::value >
 struct ArrayIO
 {};
 
 template< typename Value,
-          typename Device,
-          typename Index >
-struct ArrayIO< Value, Device, Index, true >
+          typename Index,
+          typename Allocator >
+struct ArrayIO< Value, Index, Allocator, true >
 {
    static String getSerializationType()
    {
       return String( "Containers::Array< " ) +
-             TNL::getSerializationType< Value >() + ", " +
-             TNL::getSerializationType< Devices::Host >() + ", " +
-             TNL::getSerializationType< Index >() + " >";
+             TNL::getSerializationType< Value >() + ", [any_device], " +
+             TNL::getSerializationType< Index >() + ", [any_allocator] >";
    }
 
    static void save( File& file,
@@ -73,16 +73,15 @@ struct ArrayIO< Value, Device, Index, true >
 };
 
 template< typename Value,
-          typename Device,
-          typename Index >
-struct ArrayIO< Value, Device, Index, false >
+          typename Index,
+          typename Allocator >
+struct ArrayIO< Value, Index, Allocator, false >
 {
    static String getSerializationType()
    {
       return String( "Containers::Array< " ) +
-             TNL::getSerializationType< Value >() + ", " +
-             TNL::getSerializationType< Devices::Host >() + ", " +
-             TNL::getSerializationType< Index >() + " >";
+             TNL::getSerializationType< Value >() + ", [any_device], " +
+             TNL::getSerializationType< Index >() + ", [any_allocator] >";
    }
 
    static void save( File& file,
@@ -93,7 +92,7 @@ struct ArrayIO< Value, Device, Index, false >
          return;
       try
       {
-         file.save< Value, Value, Device >( data, elements );
+         file.save< Value, Value, Allocator >( data, elements );
       }
       catch(...)
       {
@@ -109,7 +108,7 @@ struct ArrayIO< Value, Device, Index, false >
          return;
       try
       {
-         file.load< Value, Value, Device >( data, elements );
+         file.load< Value, Value, Allocator >( data, elements );
       }
       catch(...)
       {
diff --git a/src/TNL/File.h b/src/TNL/File.h
index 747f4f4e7..cef110e16 100644
--- a/src/TNL/File.h
+++ b/src/TNL/File.h
@@ -14,8 +14,8 @@
 #include <type_traits>
 
 #include <TNL/String.h>
-#include <TNL/Devices/Host.h>
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Allocators/Host.h>
+#include <TNL/Allocators/Cuda.h>
 
 namespace TNL {
 
@@ -85,9 +85,9 @@ class File
       /**
        * \brief Method for loading data from the file.
        *
-       * The data will be stored in \e buffer allocated on device given by the
-       * \e Device parameter. The data type of the buffer is given by the
-       * template parameter \e Type. The second template parameter 
+       * The data will be stored in \e buffer which was allocated using the
+       * allocator of type \e Allocator. The data type of the buffer is given
+       * by the template parameter \e Type. The second template parameter
        * \e SourceType defines the type of data in the source file. If both
        * types are different, on-the-fly conversion takes place during the
        * data loading.
@@ -96,31 +96,31 @@ class File
        *
        * \tparam Type type of data to be loaded to the \e buffer.
        * \tparam SourceType type of data stored on the file,
-       * \tparam Device device where the data are stored after reading. For example \ref Devices::Host or \ref Devices::Cuda.
+       * \tparam Allocator type of the allocator which was used to allocate \e buffer.
        * \param buffer Pointer in memory where the elements are loaded and stored after reading.
        * \param elements number of elements to be loaded from the file.
-       * 
+       *
        * The following example shows how to load data directly to GPU.
-       * 
+       *
        * \par Example
        * \include FileExampleCuda.cpp
        * \par Output
        * \include FileExampleCuda.out
        * The following example shows how to do on-the-fly data conversion.
-       * 
+       *
        * \par Example
        * \include FileExampleSaveAndLoad.cpp
        * \par Output
        * \include FileExampleSaveAndLoad.out
        */
-      template< typename Type, typename SourceType = Type, typename Device = Devices::Host >
+      template< typename Type, typename SourceType = Type, typename Allocator = Allocators::Host< Type > >
       void load( Type* buffer, std::streamsize elements = 1 );
 
       /**
        * \brief Method for saving data to the file.
        *
-       * The data from the \e buffer (with type \e Type) allocated on the device
-       * \e Device will be saved into the file. \e TargetType defines as what
+       * The data from the \e buffer (with type \e Type) which was allocated
+       * using an allocator of type \e Allocator. \e TargetType defines as what
        * data type the buffer shall be saved. If the type is different from the
        * data type, on-the-fly data type conversion takes place during the data
        * saving.
@@ -129,40 +129,44 @@ class File
        *
        * \tparam Type type of data in the \e buffer.
        * \tparam TargetType tells as what type data the buffer shall be saved.
-       * \tparam Device device from where the data are loaded before writing into file. For example \ref Devices::Host or \ref Devices::Cuda.
+       * \tparam Allocator type of the allocator which was used to allocate \e buffer.
        * \tparam Index type of index by which the elements are indexed.
        * \param buffer buffer that is going to be saved to the file.
        * \param elements number of elements saved to the file.
        * 
        * See \ref File::load for examples.
        */
-      template< typename Type, typename TargetType = Type, typename Device = Devices::Host >
+      template< typename Type, typename TargetType = Type, typename Allocator = Allocators::Host< Type > >
       void save( const Type* buffer, std::streamsize elements = 1 );
 
    protected:
+      // implementation for all allocators which allocate data accessible from host
       template< typename Type,
                 typename SourceType,
-                typename Device,
-                typename = typename std::enable_if< std::is_same< Device, Devices::Host >::value >::type >
+                typename Allocator,
+                typename = std::enable_if_t< ! std::is_same< Allocator, Allocators::Cuda< Type > >::value > >
       void load_impl( Type* buffer, std::streamsize elements );
 
+      // implementation for \ref Allocators::Cuda
       template< typename Type,
                 typename SourceType,
-                typename Device,
-                typename = typename std::enable_if< std::is_same< Device, Devices::Cuda >::value >::type,
+                typename Allocator,
+                typename = std::enable_if_t< std::is_same< Allocator, Allocators::Cuda< Type > >::value >,
                 typename = void >
       void load_impl( Type* buffer, std::streamsize elements );
 
+      // implementation for all allocators which allocate data accessible from host
       template< typename Type,
                 typename TargetType,
-                typename Device,
-                typename = typename std::enable_if< std::is_same< Device, Devices::Host >::value >::type >
+                typename Allocator,
+                typename = std::enable_if_t< ! std::is_same< Allocator, Allocators::Cuda< Type > >::value > >
       void save_impl( const Type* buffer, std::streamsize elements );
 
+      // implementation for \ref Allocators::Cuda
       template< typename Type,
                 typename TargetType,
-                typename Device,
-                typename = typename std::enable_if< std::is_same< Device, Devices::Cuda >::value >::type,
+                typename Allocator,
+                typename = std::enable_if_t< std::is_same< Allocator, Allocators::Cuda< Type > >::value >,
                 typename = void >
       void save_impl( const Type* buffer, std::streamsize elements );
 
diff --git a/src/TNL/File.hpp b/src/TNL/File.hpp
index d00903703..af112e992 100644
--- a/src/TNL/File.hpp
+++ b/src/TNL/File.hpp
@@ -80,21 +80,23 @@ inline void File::close()
 
 template< typename Type,
           typename SourceType,
-          typename Device >
+          typename Allocator >
 void File::load( Type* buffer, std::streamsize elements )
 {
+   static_assert( std::is_same< Type, typename Allocator::value_type >::value,
+                  "Allocator::value_type must be the same as Type." );
    TNL_ASSERT_GE( elements, 0, "Number of elements to load must be non-negative." );
 
    if( ! elements )
       return;
 
-   load_impl< Type, SourceType, Device >( buffer, elements );
+   load_impl< Type, SourceType, Allocator >( buffer, elements );
 }
 
-// Host
+// Host allocators
 template< typename Type,
           typename SourceType,
-          typename Device,
+          typename Allocator,
           typename >
 void File::load_impl( Type* buffer, std::streamsize elements )
 {
@@ -117,10 +119,10 @@ void File::load_impl( Type* buffer, std::streamsize elements )
    }
 }
 
-// Cuda
+// Allocators::Cuda
 template< typename Type,
           typename SourceType,
-          typename Device,
+          typename Allocator,
           typename, typename >
 void File::load_impl( Type* buffer, std::streamsize elements )
 {
@@ -171,21 +173,23 @@ void File::load_impl( Type* buffer, std::streamsize elements )
 
 template< typename Type,
           typename TargetType,
-          typename Device >
+          typename Allocator >
 void File::save( const Type* buffer, std::streamsize elements )
 {
+   static_assert( std::is_same< Type, typename Allocator::value_type >::value,
+                  "Allocator::value_type must be the same as Type." );
    TNL_ASSERT_GE( elements, 0, "Number of elements to save must be non-negative." );
 
    if( ! elements )
       return;
 
-   save_impl< Type, TargetType, Device >( buffer, elements );
+   save_impl< Type, TargetType, Allocator >( buffer, elements );
 }
 
-// Host
+// Host allocators
 template< typename Type,
           typename TargetType,
-          typename Device,
+          typename Allocator,
           typename >
 void File::save_impl( const Type* buffer, std::streamsize elements )
 {
@@ -209,10 +213,10 @@ void File::save_impl( const Type* buffer, std::streamsize elements )
    }
 }
 
-// Cuda
+// Allocators::Cuda
 template< typename Type,
           typename TargetType,
-          typename Device,
+          typename Allocator,
           typename, typename >
 void File::save_impl( const Type* buffer, std::streamsize elements )
 {
diff --git a/src/UnitTests/FileTest.h b/src/UnitTests/FileTest.h
index f376c60f4..b9f2ee7ef 100644
--- a/src/UnitTests/FileTest.h
+++ b/src/UnitTests/FileTest.h
@@ -61,15 +61,15 @@ TEST( FileTest, WriteAndReadWithConversion )
    int intData[ 3 ];
    File file;
    ASSERT_NO_THROW( file.open( TEST_FILE_NAME, std::ios_base::out | std::ios_base::trunc ) );
-   file.save< double, float, Devices::Host >( doubleData, 3 );
+   file.save< double, float >( doubleData, 3 );
    ASSERT_NO_THROW( file.close() );
 
    ASSERT_NO_THROW( file.open( TEST_FILE_NAME, std::ios_base::in ) );
-   file.load< float, float, Devices::Host >( floatData, 3 );
+   file.load< float, float >( floatData, 3 );
    ASSERT_NO_THROW( file.close() );
 
    ASSERT_NO_THROW( file.open( TEST_FILE_NAME, std::ios_base::in ) );
-   file.load< int, float, Devices::Host >( intData, 3 );
+   file.load< int, float >( intData, 3 );
    ASSERT_NO_THROW( file.close() );
 
    EXPECT_NEAR( floatData[ 0 ], 3.14159, 0.0001 );
@@ -112,9 +112,9 @@ TEST( FileTest, WriteAndReadCUDA )
    File file;
    ASSERT_NO_THROW( file.open( String( TEST_FILE_NAME ), std::ios_base::out ) );
 
-   file.save< int, int, Devices::Cuda >( cudaIntData );
-   file.save< float, float, Devices::Cuda >( cudaFloatData, 3 );
-   file.save< const double, double, Devices::Cuda >( cudaConstDoubleData );
+   file.save< int, int, Allocators::Cuda<int> >( cudaIntData );
+   file.save< float, float, Allocators::Cuda<float> >( cudaFloatData, 3 );
+   file.save< const double, double, Allocators::Cuda<const double> >( cudaConstDoubleData );
    ASSERT_NO_THROW( file.close() );
 
    ASSERT_NO_THROW( file.open( String( TEST_FILE_NAME ), std::ios_base::in ) );
@@ -127,9 +127,9 @@ TEST( FileTest, WriteAndReadCUDA )
    cudaMalloc( ( void** ) &newCudaIntData, sizeof( int ) );
    cudaMalloc( ( void** ) &newCudaFloatData, 3 * sizeof( float ) );
    cudaMalloc( ( void** ) &newCudaDoubleData, sizeof( double ) );
-   file.load< int, int, Devices::Cuda >( newCudaIntData, 1 );
-   file.load< float, float, Devices::Cuda >( newCudaFloatData, 3 );
-   file.load< double, double, Devices::Cuda >( newCudaDoubleData, 1 );
+   file.load< int, int, Allocators::Cuda<int> >( newCudaIntData, 1 );
+   file.load< float, float, Allocators::Cuda<float> >( newCudaFloatData, 3 );
+   file.load< double, double, Allocators::Cuda<double> >( newCudaDoubleData, 1 );
    cudaMemcpy( &newIntData,
                newCudaIntData,
                sizeof( int ),
@@ -172,15 +172,15 @@ TEST( FileTest, WriteAndReadCUDAWithConversion )
 
    File file;
    ASSERT_NO_THROW( file.open( String( TEST_FILE_NAME ), std::ios_base::out | std::ios_base::trunc ) );
-   file.save< double, float, Devices::Cuda >( cudaConstDoubleData, 3 );
+   file.save< double, float, Allocators::Cuda<double> >( cudaConstDoubleData, 3 );
    ASSERT_NO_THROW( file.close() );
 
    ASSERT_NO_THROW( file.open( String( TEST_FILE_NAME ), std::ios_base::in ) );
-   file.load< float, float, Devices::Cuda >( cudaFloatData, 3 );
+   file.load< float, float, Allocators::Cuda<float> >( cudaFloatData, 3 );
    ASSERT_NO_THROW( file.close() );
 
    ASSERT_NO_THROW( file.open( String( TEST_FILE_NAME ), std::ios_base::in ) );
-   file.load< int, float, Devices::Cuda >( cudaIntData, 3 );
+   file.load< int, float, Allocators::Cuda<int> >( cudaIntData, 3 );
    ASSERT_NO_THROW( file.close() );
 
    cudaMemcpy( floatData,
-- 
GitLab


From 7756e2d0c69479158f6bd9a76f4d6694e199acd1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Wed, 4 Sep 2019 17:17:14 +0200
Subject: [PATCH 21/35] Added Devices::Sequential and corresponding
 specializations in TNL::Algorithms

---
 src/TNL/Algorithms/CudaReductionKernel.h      |   6 +-
 src/TNL/Algorithms/MemoryOperations.h         |   6 +-
 src/TNL/Algorithms/MemoryOperationsHost.hpp   |   6 +-
 .../Algorithms/MemoryOperationsSequential.hpp |  16 +-
 src/TNL/Algorithms/Multireduction.h           |  30 +++
 src/TNL/Algorithms/Multireduction.hpp         | 145 +++++++-----
 src/TNL/Algorithms/ParallelFor.h              | 107 +++++----
 src/TNL/Algorithms/Reduction.h                |  25 ++
 src/TNL/Algorithms/Reduction.hpp              | 216 ++++++++++--------
 src/TNL/Algorithms/Scan.h                     | 109 ++++++++-
 src/TNL/Algorithms/Scan.hpp                   | 133 ++++++++---
 src/TNL/Allocators/Default.h                  |   9 +
 src/TNL/Containers/NDArray.h                  |   4 +-
 src/TNL/Devices/Host.h                        |   1 -
 src/TNL/Devices/Sequential.h                  |  21 ++
 15 files changed, 583 insertions(+), 251 deletions(-)
 create mode 100644 src/TNL/Devices/Sequential.h

diff --git a/src/TNL/Algorithms/CudaReductionKernel.h b/src/TNL/Algorithms/CudaReductionKernel.h
index c1004a374..b97295e00 100644
--- a/src/TNL/Algorithms/CudaReductionKernel.h
+++ b/src/TNL/Algorithms/CudaReductionKernel.h
@@ -351,7 +351,7 @@ struct CudaReductionKernelLauncher
 
       // Copy result on CPU
       Result result;
-      MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result, output, 1 );
+      MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( &result, output, 1 );
       return result;
    }
 
@@ -384,8 +384,8 @@ struct CudaReductionKernelLauncher
       ////
       // Copy result on CPU
       std::pair< Index, Result > result;
-      MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result.first, idxOutput, 1 );
-      MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result.second, output, 1 );
+      MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( &result.first, idxOutput, 1 );
+      MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( &result.second, output, 1 );
       return result;
    }
 
diff --git a/src/TNL/Algorithms/MemoryOperations.h b/src/TNL/Algorithms/MemoryOperations.h
index cdbdb7909..59da32402 100644
--- a/src/TNL/Algorithms/MemoryOperations.h
+++ b/src/TNL/Algorithms/MemoryOperations.h
@@ -10,6 +10,7 @@
 
 #pragma once
 
+#include <TNL/Devices/Sequential.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Cuda/CudaCallable.h>
@@ -17,12 +18,11 @@
 namespace TNL {
 namespace Algorithms {
 
-template< typename DestinationExecution >
+template< typename DestinationDevice >
 struct MemoryOperations;
 
-// TODO: change "void" to "Execution::Sequential"
 template<>
-struct MemoryOperations< void >
+struct MemoryOperations< Devices::Sequential >
 {
    template< typename Element >
    __cuda_callable__
diff --git a/src/TNL/Algorithms/MemoryOperationsHost.hpp b/src/TNL/Algorithms/MemoryOperationsHost.hpp
index a88688685..cc85975f5 100644
--- a/src/TNL/Algorithms/MemoryOperationsHost.hpp
+++ b/src/TNL/Algorithms/MemoryOperationsHost.hpp
@@ -93,7 +93,7 @@ copyFromIterator( DestinationElement* destination,
                   SourceIterator first,
                   SourceIterator last )
 {
-   MemoryOperations< void >::copyFromIterator( destination, destinationSize, first, last );
+   MemoryOperations< Devices::Sequential >::copyFromIterator( destination, destinationSize, first, last );
 }
 
 template< typename DestinationElement,
@@ -137,7 +137,7 @@ containsValue( const Element* data,
    }
    else {
       // sequential algorithm can return as soon as it finds a match
-      return MemoryOperations< void >::containsValue( data, size, value );
+      return MemoryOperations< Devices::Sequential >::containsValue( data, size, value );
    }
 }
 
@@ -159,7 +159,7 @@ containsOnlyValue( const Element* data,
    }
    else {
       // sequential algorithm can return as soon as it finds a mismatch
-      return MemoryOperations< void >::containsOnlyValue( data, size, value );
+      return MemoryOperations< Devices::Sequential >::containsOnlyValue( data, size, value );
    }
 }
 
diff --git a/src/TNL/Algorithms/MemoryOperationsSequential.hpp b/src/TNL/Algorithms/MemoryOperationsSequential.hpp
index e427f00dd..9e5ad25b1 100644
--- a/src/TNL/Algorithms/MemoryOperationsSequential.hpp
+++ b/src/TNL/Algorithms/MemoryOperationsSequential.hpp
@@ -18,7 +18,7 @@ namespace Algorithms {
 template< typename Element >
 __cuda_callable__
 void
-MemoryOperations< void >::
+MemoryOperations< Devices::Sequential >::
 setElement( Element* data,
             const Element& value )
 {
@@ -28,7 +28,7 @@ setElement( Element* data,
 template< typename Element >
 __cuda_callable__
 Element
-MemoryOperations< void >::
+MemoryOperations< Devices::Sequential >::
 getElement( const Element* data )
 {
    return *data;
@@ -37,7 +37,7 @@ getElement( const Element* data )
 template< typename Element, typename Index >
 __cuda_callable__
 void
-MemoryOperations< void >::
+MemoryOperations< Devices::Sequential >::
 set( Element* data,
      const Element& value,
      const Index size )
@@ -51,7 +51,7 @@ template< typename DestinationElement,
           typename Index >
 __cuda_callable__
 void
-MemoryOperations< void >::
+MemoryOperations< Devices::Sequential >::
 copy( DestinationElement* destination,
       const SourceElement* source,
       const Index size )
@@ -64,7 +64,7 @@ template< typename DestinationElement,
           typename Index,
           typename SourceIterator >
 void
-MemoryOperations< void >::
+MemoryOperations< Devices::Sequential >::
 copyFromIterator( DestinationElement* destination,
                   Index destinationSize,
                   SourceIterator first,
@@ -82,7 +82,7 @@ template< typename Element1,
           typename Index >
 __cuda_callable__
 bool
-MemoryOperations< void >::
+MemoryOperations< Devices::Sequential >::
 compare( const Element1* destination,
          const Element2* source,
          const Index size )
@@ -97,7 +97,7 @@ template< typename Element,
           typename Index >
 __cuda_callable__
 bool
-MemoryOperations< void >::
+MemoryOperations< Devices::Sequential >::
 containsValue( const Element* data,
                const Index size,
                const Element& value )
@@ -116,7 +116,7 @@ template< typename Element,
           typename Index >
 __cuda_callable__
 bool
-MemoryOperations< void >::
+MemoryOperations< Devices::Sequential >::
 containsOnlyValue( const Element* data,
                    const Index size,
                    const Element& value )
diff --git a/src/TNL/Algorithms/Multireduction.h b/src/TNL/Algorithms/Multireduction.h
index ac67255fe..8e63fa7ea 100644
--- a/src/TNL/Algorithms/Multireduction.h
+++ b/src/TNL/Algorithms/Multireduction.h
@@ -14,6 +14,7 @@
 
 #include <functional>  // reduction functions like std::plus, std::logical_and, std::logical_or etc.
 
+#include <TNL/Devices/Sequential.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 
@@ -23,6 +24,35 @@ namespace Algorithms {
 template< typename Device >
 struct Multireduction;
 
+template<>
+struct Multireduction< Devices::Sequential >
+{
+   /**
+    * Parameters:
+    *    zero: starting value for reduction
+    *    dataFetcher: callable object such that `dataFetcher( i, j )` yields
+    *                 the i-th value to be reduced from the j-th dataset
+    *                 (i = 0,...,size-1; j = 0,...,n-1)
+    *    reduction: callable object representing the reduction operation
+    *               for example, it can be an instance of std::plus, std::logical_and,
+    *               std::logical_or etc.
+    *    size: the size of each dataset
+    *    n: number of datasets to be reduced
+    *    result: output array of size = n
+    */
+   template< typename Result,
+             typename DataFetcher,
+             typename Reduction,
+             typename Index >
+   static constexpr void
+   reduce( const Result zero,
+           DataFetcher dataFetcher,
+           const Reduction reduction,
+           const Index size,
+           const int n,
+           Result* result );
+};
+
 template<>
 struct Multireduction< Devices::Host >
 {
diff --git a/src/TNL/Algorithms/Multireduction.hpp b/src/TNL/Algorithms/Multireduction.hpp
index 25b91f026..0bfead287 100644
--- a/src/TNL/Algorithms/Multireduction.hpp
+++ b/src/TNL/Algorithms/Multireduction.hpp
@@ -29,6 +29,83 @@
 namespace TNL {
 namespace Algorithms {
 
+template< typename Result,
+          typename DataFetcher,
+          typename Reduction,
+          typename Index >
+void constexpr
+Multireduction< Devices::Sequential >::
+reduce( const Result zero,
+        DataFetcher dataFetcher,
+        const Reduction reduction,
+        const Index size,
+        const int n,
+        Result* result )
+{
+   TNL_ASSERT_GT( size, 0, "The size of datasets must be positive." );
+   TNL_ASSERT_GT( n, 0, "The number of datasets must be positive." );
+
+   constexpr int block_size = 128;
+   const int blocks = size / block_size;
+
+   if( blocks > 1 ) {
+      // initialize array for unrolled results
+      // (it is accessed as a row-major matrix with n rows and 4 columns)
+      Result r[ n * 4 ];
+      for( int k = 0; k < n * 4; k++ )
+         r[ k ] = zero;
+
+      // main reduction (explicitly unrolled loop)
+      for( int b = 0; b < blocks; b++ ) {
+         const Index offset = b * block_size;
+         for( int k = 0; k < n; k++ ) {
+            Result* _r = r + 4 * k;
+            for( int i = 0; i < block_size; i += 4 ) {
+               _r[ 0 ] = reduction( _r[ 0 ], dataFetcher( offset + i,     k ) );
+               _r[ 1 ] = reduction( _r[ 1 ], dataFetcher( offset + i + 1, k ) );
+               _r[ 2 ] = reduction( _r[ 2 ], dataFetcher( offset + i + 2, k ) );
+               _r[ 3 ] = reduction( _r[ 3 ], dataFetcher( offset + i + 3, k ) );
+            }
+         }
+      }
+
+      // reduction of the last, incomplete block (not unrolled)
+      for( int k = 0; k < n; k++ ) {
+         Result* _r = r + 4 * k;
+         for( Index i = blocks * block_size; i < size; i++ )
+            _r[ 0 ] = reduction( _r[ 0 ], dataFetcher( i, k ) );
+      }
+
+      // reduction of unrolled results
+      for( int k = 0; k < n; k++ ) {
+         Result* _r = r + 4 * k;
+         _r[ 0 ] = reduction( _r[ 0 ], _r[ 1 ] );
+         _r[ 0 ] = reduction( _r[ 0 ], _r[ 2 ] );
+         _r[ 0 ] = reduction( _r[ 0 ], _r[ 3 ] );
+
+         // copy the result into the output parameter
+         result[ k ] = _r[ 0 ];
+      }
+   }
+   else {
+      for( int k = 0; k < n; k++ )
+         result[ k ] = zero;
+
+      for( int b = 0; b < blocks; b++ ) {
+         const Index offset = b * block_size;
+         for( int k = 0; k < n; k++ ) {
+            for( int i = 0; i < block_size; i++ )
+               result[ k ] = reduction( result[ k ], dataFetcher( offset + i, k ) );
+         }
+      }
+
+      for( int k = 0; k < n; k++ ) {
+         for( Index i = blocks * block_size; i < size; i++ )
+            result[ k ] = reduction( result[ k ], dataFetcher( i, k ) );
+      }
+   }
+}
+
 template< typename Result,
           typename DataFetcher,
           typename Reduction,
@@ -45,10 +122,10 @@ reduce( const Result zero,
    TNL_ASSERT_GT( size, 0, "The size of datasets must be positive." );
    TNL_ASSERT_GT( n, 0, "The number of datasets must be positive." );
 
+#ifdef HAVE_OPENMP
    constexpr int block_size = 128;
    const int blocks = size / block_size;
 
-#ifdef HAVE_OPENMP
    if( Devices::Host::isOMPEnabled() && blocks >= 2 ) {
       const int threads = TNL::min( blocks, Devices::Host::getMaxThreadsCount() );
 #pragma omp parallel num_threads(threads)
@@ -106,67 +183,9 @@ reduce( const Result zero,
          }
       }
    }
-   else {
-#endif
-      if( blocks > 1 ) {
-         // initialize array for unrolled results
-         // (it is accessed as a row-major matrix with n rows and 4 columns)
-         Result r[ n * 4 ];
-         for( int k = 0; k < n * 4; k++ )
-            r[ k ] = zero;
-
-         // main reduction (explicitly unrolled loop)
-         for( int b = 0; b < blocks; b++ ) {
-            const Index offset = b * block_size;
-            for( int k = 0; k < n; k++ ) {
-               Result* _r = r + 4 * k;
-               for( int i = 0; i < block_size; i += 4 ) {
-                  _r[ 0 ] = reduction( _r[ 0 ], dataFetcher( offset + i,     k ) );
-                  _r[ 1 ] = reduction( _r[ 1 ], dataFetcher( offset + i + 1, k ) );
-                  _r[ 2 ] = reduction( _r[ 2 ], dataFetcher( offset + i + 2, k ) );
-                  _r[ 3 ] = reduction( _r[ 3 ], dataFetcher( offset + i + 3, k ) );
-               }
-            }
-         }
-
-         // reduction of the last, incomplete block (not unrolled)
-         for( int k = 0; k < n; k++ ) {
-            Result* _r = r + 4 * k;
-            for( Index i = blocks * block_size; i < size; i++ )
-               _r[ 0 ] = reduction( _r[ 0 ], dataFetcher( i, k ) );
-         }
-
-         // reduction of unrolled results
-         for( int k = 0; k < n; k++ ) {
-            Result* _r = r + 4 * k;
-            _r[ 0 ] = reduction( _r[ 0 ], _r[ 1 ] );
-            _r[ 0 ] = reduction( _r[ 0 ], _r[ 2 ] );
-            _r[ 0 ] = reduction( _r[ 0 ], _r[ 3 ] );
-
-            // copy the result into the output parameter
-            result[ k ] = _r[ 0 ];
-         }
-      }
-      else {
-         for( int k = 0; k < n; k++ )
-            result[ k ] = zero;
-
-         for( int b = 0; b < blocks; b++ ) {
-            const Index offset = b * block_size;
-            for( int k = 0; k < n; k++ ) {
-               for( int i = 0; i < block_size; i++ )
-                  result[ k ] = reduction( result[ k ], dataFetcher( offset + i, k ) );
-            }
-         }
-
-         for( int k = 0; k < n; k++ ) {
-            for( Index i = blocks * block_size; i < size; i++ )
-               result[ k ] = reduction( result[ k ], dataFetcher( i, k ) );
-         }
-      }
-#ifdef HAVE_OPENMP
-   }
+   else
 #endif
+      Multireduction< Devices::Sequential >::reduce( zero, dataFetcher, reduction, size, n, result );
 }
 
 template< typename Result,
@@ -204,7 +223,7 @@ reduce( const Result zero,
 
    // transfer the reduced data from device to host
    std::unique_ptr< Result[] > resultArray{ new Result[ n * reducedSize ] };
-   MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( resultArray.get(), deviceAux1, n * reducedSize );
+   MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( resultArray.get(), deviceAux1, n * reducedSize );
 
    #ifdef CUDA_REDUCTION_PROFILING
       timer.stop();
@@ -215,7 +234,7 @@ reduce( const Result zero,
 
    // finish the reduction on the host
    auto dataFetcherFinish = [&] ( int i, int k ) { return resultArray[ i + k * reducedSize ]; };
-   Multireduction< Devices::Host >::reduce( zero, dataFetcherFinish, reduction, reducedSize, n, hostResult );
+   Multireduction< Devices::Sequential >::reduce( zero, dataFetcherFinish, reduction, reducedSize, n, hostResult );
 
    #ifdef CUDA_REDUCTION_PROFILING
       timer.stop();
diff --git a/src/TNL/Algorithms/ParallelFor.h b/src/TNL/Algorithms/ParallelFor.h
index 20e87f222..6d5e917ba 100644
--- a/src/TNL/Algorithms/ParallelFor.h
+++ b/src/TNL/Algorithms/ParallelFor.h
@@ -10,11 +10,13 @@
 
 #pragma once
 
+#include <TNL/Devices/Sequential.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Cuda/CheckDevice.h>
 #include <TNL/Cuda/DeviceInfo.h>
 #include <TNL/Cuda/LaunchHelpers.h>
+#include <TNL/Exceptions/CudaSupportMissing.h>
 #include <TNL/Math.h>
 
 /****
@@ -33,9 +35,53 @@ namespace Algorithms {
 
 enum ParallelForMode { SynchronousMode, AsynchronousMode };
 
-template< typename Device = Devices::Host,
+template< typename Device = Devices::Sequential,
           ParallelForMode Mode = SynchronousMode >
 struct ParallelFor
+{
+   template< typename Index,
+             typename Function,
+             typename... FunctionArgs >
+   static void exec( Index start, Index end, Function f, FunctionArgs... args )
+   {
+      for( Index i = start; i < end; i++ )
+         f( i, args... );
+   }
+};
+
+template< typename Device = Devices::Sequential,
+          ParallelForMode Mode = SynchronousMode >
+struct ParallelFor2D
+{
+   template< typename Index,
+             typename Function,
+             typename... FunctionArgs >
+   static void exec( Index startX, Index startY, Index endX, Index endY, Function f, FunctionArgs... args )
+   {
+      for( Index j = startY; j < endY; j++ )
+      for( Index i = startX; i < endX; i++ )
+         f( i, j, args... );
+   }
+};
+
+template< typename Device = Devices::Sequential,
+          ParallelForMode Mode = SynchronousMode >
+struct ParallelFor3D
+{
+   template< typename Index,
+             typename Function,
+             typename... FunctionArgs >
+   static void exec( Index startX, Index startY, Index startZ, Index endX, Index endY, Index endZ, Function f, FunctionArgs... args )
+   {
+      for( Index k = startZ; k < endZ; k++ )
+      for( Index j = startY; j < endY; j++ )
+      for( Index i = startX; i < endX; i++ )
+         f( i, j, k, args... );
+   }
+};
+
+template< ParallelForMode Mode >
+struct ParallelFor< Devices::Host, Mode >
 {
    template< typename Index,
              typename Function,
@@ -44,26 +90,23 @@ struct ParallelFor
    {
 #ifdef HAVE_OPENMP
       // Benchmarks show that this is significantly faster compared
-      // to '#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() && end - start > 512 )'
-      if( TNL::Devices::Host::isOMPEnabled() && end - start > 512 )
+      // to '#pragma omp parallel for if( Devices::Host::isOMPEnabled() && end - start > 512 )'
+      if( Devices::Host::isOMPEnabled() && end - start > 512 )
       {
-#pragma omp parallel for
+         #pragma omp parallel for
          for( Index i = start; i < end; i++ )
             f( i, args... );
       }
       else
-         for( Index i = start; i < end; i++ )
-            f( i, args... );
+         ParallelFor< Devices::Sequential >::exec( start, end, f, args... );
 #else
-      for( Index i = start; i < end; i++ )
-         f( i, args... );
+      ParallelFor< Devices::Sequential >::exec( start, end, f, args... );
 #endif
    }
 };
 
-template< typename Device = Devices::Host,
-          ParallelForMode Mode = SynchronousMode >
-struct ParallelFor2D
+template< ParallelForMode Mode >
+struct ParallelFor2D< Devices::Host, Mode >
 {
    template< typename Index,
              typename Function,
@@ -72,30 +115,24 @@ struct ParallelFor2D
    {
 #ifdef HAVE_OPENMP
       // Benchmarks show that this is significantly faster compared
-      // to '#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() )'
-      if( TNL::Devices::Host::isOMPEnabled() )
+      // to '#pragma omp parallel for if( Devices::Host::isOMPEnabled() )'
+      if( Devices::Host::isOMPEnabled() )
       {
-#pragma omp parallel for
-         for( Index j = startY; j < endY; j++ )
-         for( Index i = startX; i < endX; i++ )
-            f( i, j, args... );
-      }
-      else {
+         #pragma omp parallel for
          for( Index j = startY; j < endY; j++ )
          for( Index i = startX; i < endX; i++ )
             f( i, j, args... );
       }
+      else
+         ParallelFor2D< Devices::Sequential >::exec( startX, startY, endX, endY, f, args... );
 #else
-      for( Index j = startY; j < endY; j++ )
-      for( Index i = startX; i < endX; i++ )
-         f( i, j, args... );
+      ParallelFor2D< Devices::Sequential >::exec( startX, startY, endX, endY, f, args... );
 #endif
    }
 };
 
-template< typename Device = Devices::Host,
-          ParallelForMode Mode = SynchronousMode >
-struct ParallelFor3D
+template< ParallelForMode Mode >
+struct ParallelFor3D< Devices::Host, Mode >
 {
    template< typename Index,
              typename Function,
@@ -104,27 +141,19 @@ struct ParallelFor3D
    {
 #ifdef HAVE_OPENMP
       // Benchmarks show that this is significantly faster compared
-      // to '#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() )'
-     if( TNL::Devices::Host::isOMPEnabled() )
-     {
-#pragma omp parallel for collapse(2)
-      for( Index k = startZ; k < endZ; k++ )
-      for( Index j = startY; j < endY; j++ )
-      for( Index i = startX; i < endX; i++ )
-         f( i, j, k, args... );
-      }
-      else
+      // to '#pragma omp parallel for if( Devices::Host::isOMPEnabled() )'
+      if( Devices::Host::isOMPEnabled() )
       {
+         #pragma omp parallel for collapse(2)
          for( Index k = startZ; k < endZ; k++ )
          for( Index j = startY; j < endY; j++ )
          for( Index i = startX; i < endX; i++ )
             f( i, j, k, args... );
       }
+      else
+         ParallelFor3D< Devices::Sequential >::exec( startX, startY, startZ, endX, endY, endZ, f, args... );
 #else
-      for( Index k = startZ; k < endZ; k++ )
-      for( Index j = startY; j < endY; j++ )
-      for( Index i = startX; i < endX; i++ )
-         f( i, j, k, args... );
+      ParallelFor3D< Devices::Sequential >::exec( startX, startY, startZ, endX, endY, endZ, f, args... );
 #endif
    }
 };
diff --git a/src/TNL/Algorithms/Reduction.h b/src/TNL/Algorithms/Reduction.h
index e77fa1206..c0d62684d 100644
--- a/src/TNL/Algorithms/Reduction.h
+++ b/src/TNL/Algorithms/Reduction.h
@@ -15,6 +15,7 @@
 #include <utility>  // std::pair
 #include <functional>  // reduction functions like std::plus, std::logical_and, std::logical_or etc.
 
+#include <TNL/Devices/Sequential.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 
@@ -36,6 +37,30 @@ namespace Algorithms {
 template< typename Device >
 struct Reduction;
 
+template<>
+struct Reduction< Devices::Sequential >
+{
+   template< typename Index,
+             typename Result,
+             typename ReductionOperation,
+             typename DataFetcher >
+   static constexpr Result
+   reduce( const Index size,
+           const ReductionOperation& reduction,
+           DataFetcher& dataFetcher,
+           const Result& zero );
+
+   template< typename Index,
+             typename Result,
+             typename ReductionOperation,
+             typename DataFetcher >
+   static constexpr std::pair< Index, Result >
+   reduceWithArgument( const Index size,
+                       const ReductionOperation& reduction,
+                       DataFetcher& dataFetcher,
+                       const Result& zero );
+};
+
 template<>
 struct Reduction< Devices::Host >
 {
diff --git a/src/TNL/Algorithms/Reduction.hpp b/src/TNL/Algorithms/Reduction.hpp
index 9fd56576e..b07f04445 100644
--- a/src/TNL/Algorithms/Reduction.hpp
+++ b/src/TNL/Algorithms/Reduction.hpp
@@ -17,8 +17,8 @@
 //#define CUDA_REDUCTION_PROFILING
 
 #include <TNL/Algorithms/Reduction.h>
-#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
 #include <TNL/Algorithms/CudaReductionKernel.h>
+#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
 
 #ifdef CUDA_REDUCTION_PROFILING
 #include <iostream>
@@ -35,8 +35,115 @@ namespace Algorithms {
  */
 static constexpr int Reduction_minGpuDataSize = 256;//65536; //16384;//1024;//256;
 
-////
-// Reduction on host
+template< typename Index,
+          typename Result,
+          typename ReductionOperation,
+          typename DataFetcher >
+constexpr Result
+Reduction< Devices::Sequential >::
+reduce( const Index size,
+        const ReductionOperation& reduction,
+        DataFetcher& dataFetcher,
+        const Result& zero )
+{
+   constexpr int block_size = 128;
+   const int blocks = size / block_size;
+
+   if( blocks > 1 ) {
+      // initialize array for unrolled results
+      Result r[ 4 ] = { zero, zero, zero, zero };
+
+      // main reduction (explicitly unrolled loop)
+      for( int b = 0; b < blocks; b++ ) {
+         const Index offset = b * block_size;
+         for( int i = 0; i < block_size; i += 4 ) {
+            r[ 0 ] = reduction( r[ 0 ], dataFetcher( offset + i ) );
+            r[ 1 ] = reduction( r[ 1 ], dataFetcher( offset + i + 1 ) );
+            r[ 2 ] = reduction( r[ 2 ], dataFetcher( offset + i + 2 ) );
+            r[ 3 ] = reduction( r[ 3 ], dataFetcher( offset + i + 3 ) );
+         }
+      }
+
+      // reduction of the last, incomplete block (not unrolled)
+      for( Index i = blocks * block_size; i < size; i++ )
+         r[ 0 ] = reduction( r[ 0 ], dataFetcher( i ) );
+
+      // reduction of unrolled results
+      r[ 0 ] = reduction( r[ 0 ], r[ 2 ] );
+      r[ 1 ] = reduction( r[ 1 ], r[ 3 ] );
+      r[ 0 ] = reduction( r[ 0 ], r[ 1 ] );
+      return r[ 0 ];
+   }
+   else {
+      Result result = zero;
+      for( Index i = 0; i < size; i++ )
+         result = reduction( result, dataFetcher( i ) );
+      return result;
+   }
+}
+
+template< typename Index,
+          typename Result,
+          typename ReductionOperation,
+          typename DataFetcher >
+constexpr std::pair< Index, Result >
+Reduction< Devices::Sequential >::
+reduceWithArgument( const Index size,
+                    const ReductionOperation& reduction,
+                    DataFetcher& dataFetcher,
+                    const Result& zero )
+{
+   constexpr int block_size = 128;
+   const int blocks = size / block_size;
+
+   if( blocks > 1 ) {
+      // initialize array for unrolled results
+      Index arg[ 4 ] = { 0, 0, 0, 0 };
+      Result r[ 4 ] = { zero, zero, zero, zero };
+      bool initialized( false );
+
+      // main reduction (explicitly unrolled loop)
+      for( int b = 0; b < blocks; b++ ) {
+         const Index offset = b * block_size;
+         for( int i = 0; i < block_size; i += 4 ) {
+            if( ! initialized )
+            {
+               arg[ 0 ] = offset + i;
+               arg[ 1 ] = offset + i + 1;
+               arg[ 2 ] = offset + i + 2;
+               arg[ 3 ] = offset + i + 3;
+               r[ 0 ] = dataFetcher( offset + i );
+               r[ 1 ] = dataFetcher( offset + i + 1 );
+               r[ 2 ] = dataFetcher( offset + i + 2 );
+               r[ 3 ] = dataFetcher( offset + i + 3 );
+               initialized = true;
+               continue;
+            }
+            reduction( arg[ 0 ], offset + i,     r[ 0 ], dataFetcher( offset + i ) );
+            reduction( arg[ 1 ], offset + i + 1, r[ 1 ], dataFetcher( offset + i + 1 ) );
+            reduction( arg[ 2 ], offset + i + 2, r[ 2 ], dataFetcher( offset + i + 2 ) );
+            reduction( arg[ 3 ], offset + i + 3, r[ 3 ], dataFetcher( offset + i + 3 ) );
+         }
+      }
+
+      // reduction of the last, incomplete block (not unrolled)
+      for( Index i = blocks * block_size; i < size; i++ )
+         reduction( arg[ 0 ], i, r[ 0 ], dataFetcher( i ) );
+
+      // reduction of unrolled results
+      reduction( arg[ 0 ], arg[ 2 ], r[ 0 ], r[ 2 ] );
+      reduction( arg[ 1 ], arg[ 3 ], r[ 1 ], r[ 3 ] );
+      reduction( arg[ 0 ], arg[ 1 ], r[ 0 ], r[ 1 ] );
+      return std::make_pair( arg[ 0 ], r[ 0 ] );
+   }
+   else {
+      std::pair< Index, Result > result( 0, dataFetcher( 0 ) );
+      for( Index i = 1; i < size; i++ )
+         reduction( result.first, i, result.second, dataFetcher( i ) );
+      return result;
+   }
+}
+
 template< typename Index,
           typename Result,
           typename ReductionOperation,
@@ -48,10 +155,10 @@ reduce( const Index size,
         DataFetcher& dataFetcher,
         const Result& zero )
 {
+#ifdef HAVE_OPENMP
    constexpr int block_size = 128;
    const int blocks = size / block_size;
 
-#ifdef HAVE_OPENMP
    if( Devices::Host::isOMPEnabled() && blocks >= 2 ) {
       // global result variable
       Result result = zero;
@@ -92,42 +199,9 @@ reduce( const Index size,
       }
       return result;
    }
-   else {
-#endif
-      if( blocks > 1 ) {
-         // initialize array for unrolled results
-         Result r[ 4 ] = { zero, zero, zero, zero };
-
-         // main reduction (explicitly unrolled loop)
-         for( int b = 0; b < blocks; b++ ) {
-            const Index offset = b * block_size;
-            for( int i = 0; i < block_size; i += 4 ) {
-               r[ 0 ] = reduction( r[ 0 ], dataFetcher( offset + i ) );
-               r[ 1 ] = reduction( r[ 1 ], dataFetcher( offset + i + 1 ) );
-               r[ 2 ] = reduction( r[ 2 ], dataFetcher( offset + i + 2 ) );
-               r[ 3 ] = reduction( r[ 3 ], dataFetcher( offset + i + 3 ) );
-            }
-         }
-
-         // reduction of the last, incomplete block (not unrolled)
-         for( Index i = blocks * block_size; i < size; i++ )
-            r[ 0 ] = reduction( r[ 0 ], dataFetcher( i ) );
-
-         // reduction of unrolled results
-         r[ 0 ] = reduction( r[ 0 ], r[ 2 ] );
-         r[ 1 ] = reduction( r[ 1 ], r[ 3 ] );
-         r[ 0 ] = reduction( r[ 0 ], r[ 1 ] );
-         return r[ 0 ];
-      }
-      else {
-         Result result = zero;
-         for( Index i = 0; i < size; i++ )
-            result = reduction( result, dataFetcher( i ) );
-         return result;
-      }
-#ifdef HAVE_OPENMP
-   }
+   else
 #endif
+      return Reduction< Devices::Sequential >::reduce( size, reduction, dataFetcher, zero );
 }
 
 template< typename Index,
@@ -141,10 +215,10 @@ reduceWithArgument( const Index size,
                     DataFetcher& dataFetcher,
                     const Result& zero )
 {
+#ifdef HAVE_OPENMP
    constexpr int block_size = 128;
    const int blocks = size / block_size;
 
-#ifdef HAVE_OPENMP
    if( Devices::Host::isOMPEnabled() && blocks >= 2 ) {
       // global result variable
       std::pair< Index, Result > result( -1, zero );
@@ -201,57 +275,9 @@ reduceWithArgument( const Index size,
       }
       return result;
    }
-   else {
-#endif
-      if( blocks > 1 ) {
-         // initialize array for unrolled results
-         Index arg[ 4 ] = { 0, 0, 0, 0 };
-         Result r[ 4 ] = { zero, zero, zero, zero };
-         bool initialized( false );
-
-         // main reduction (explicitly unrolled loop)
-         for( int b = 0; b < blocks; b++ ) {
-            const Index offset = b * block_size;
-            for( int i = 0; i < block_size; i += 4 ) {
-               if( ! initialized )
-               {
-                  arg[ 0 ] = offset + i;
-                  arg[ 1 ] = offset + i + 1;
-                  arg[ 2 ] = offset + i + 2;
-                  arg[ 3 ] = offset + i + 3;
-                  r[ 0 ] = dataFetcher( offset + i );
-                  r[ 1 ] = dataFetcher( offset + i + 1 );
-                  r[ 2 ] = dataFetcher( offset + i + 2 );
-                  r[ 3 ] = dataFetcher( offset + i + 3 );
-                  initialized = true;
-                  continue;
-               }
-               reduction( arg[ 0 ], offset + i,     r[ 0 ], dataFetcher( offset + i ) );
-               reduction( arg[ 1 ], offset + i + 1, r[ 1 ], dataFetcher( offset + i + 1 ) );
-               reduction( arg[ 2 ], offset + i + 2, r[ 2 ], dataFetcher( offset + i + 2 ) );
-               reduction( arg[ 3 ], offset + i + 3, r[ 3 ], dataFetcher( offset + i + 3 ) );
-            }
-         }
-
-         // reduction of the last, incomplete block (not unrolled)
-         for( Index i = blocks * block_size; i < size; i++ )
-            reduction( arg[ 0 ], i, r[ 0 ], dataFetcher( i ) );
-
-         // reduction of unrolled results
-         reduction( arg[ 0 ], arg[ 2 ], r[ 0 ], r[ 2 ] );
-         reduction( arg[ 1 ], arg[ 3 ], r[ 1 ], r[ 3 ] );
-         reduction( arg[ 0 ], arg[ 1 ], r[ 0 ], r[ 1 ] );
-         return std::make_pair( arg[ 0 ], r[ 0 ] );
-      }
-      else {
-         std::pair< Index, Result > result( 0, dataFetcher( 0 ) );
-         for( Index i = 1; i < size; i++ )
-            reduction( result.first, i, result.second, dataFetcher( i ) );
-         return result;
-      }
-#ifdef HAVE_OPENMP
-   }
+   else
 #endif
+      return Reduction< Devices::Sequential >::reduceWithArgument( size, reduction, dataFetcher, zero );
 }
 
 template< typename Index,
@@ -309,7 +335,7 @@ reduce( const Index size,
          new Result[ reducedSize ]
          #endif
       };
-      MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize );
+      MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize );
 
       #ifdef CUDA_REDUCTION_PROFILING
          timer.stop();
@@ -320,7 +346,7 @@ reduce( const Index size,
 
       // finish the reduction on the host
       auto fetch = [&] ( Index i ) { return resultArray[ i ]; };
-      const Result result = Reduction< Devices::Host >::reduce( reducedSize, reduction, fetch, zero );
+      const Result result = Reduction< Devices::Sequential >::reduce( reducedSize, reduction, fetch, zero );
 
       #ifdef CUDA_REDUCTION_PROFILING
          timer.stop();
@@ -414,8 +440,8 @@ reduceWithArgument( const Index size,
          new Index[ reducedSize ]
          #endif
       };
-      MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize );
-      MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( indexArray.get(), deviceIndexes, reducedSize );
+      MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize );
+      MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( indexArray.get(), deviceIndexes, reducedSize );
 
       #ifdef CUDA_REDUCTION_PROFILING
          timer.stop();
@@ -426,7 +452,7 @@ reduceWithArgument( const Index size,
 
       // finish the reduction on the host
 //      auto fetch = [&] ( Index i ) { return resultArray[ i ]; };
-//      const Result result = Reduction< Devices::Host >::reduceWithArgument( reducedSize, argument, reduction, fetch, zero );
+//      const Result result = Reduction< Devices::Sequential >::reduceWithArgument( reducedSize, argument, reduction, fetch, zero );
       for( Index i = 1; i < reducedSize; i++ )
          reduction( indexArray[ 0 ], indexArray[ i ], resultArray[ 0 ], resultArray[ i ] );
 
diff --git a/src/TNL/Algorithms/Scan.h b/src/TNL/Algorithms/Scan.h
index 2f2275c53..81a5d2f7e 100644
--- a/src/TNL/Algorithms/Scan.h
+++ b/src/TNL/Algorithms/Scan.h
@@ -12,6 +12,7 @@
 
 #pragma once
 
+#include <TNL/Devices/Sequential.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 
@@ -96,11 +97,71 @@ template< typename Device,
 struct SegmentedScan;
 
 
+template< ScanType Type >
+struct Scan< Devices::Sequential, Type >
+{
+   /**
+    * \brief Computes scan (prefix sum) sequentially.
+    *
+    * \tparam Vector type vector being used for the scan.
+    * \tparam Reduction lambda function defining the reduction operation
+    *
+    * \param v input vector, the result of scan is stored in the same vector
+    * \param begin the first element in the array to be scanned
+    * \param end the last element in the array to be scanned
+    * \param reduction lambda function implementing the reduction operation
+    * \param zero is the idempotent element for the reduction operation, i.e. element which
+    *             does not change the result of the reduction.
+    *
+    * The reduction lambda function takes two variables which are supposed to be reduced:
+    *
+    * ```
+    * auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... };
+    * ```
+    *
+    * \par Example
+    *
+    * \include ReductionAndScan/ScanExample.cpp
+    *
+    * \par Output
+    *
+    * \include ScanExample.out
+    */
+   template< typename Vector,
+             typename Reduction >
+   static void
+   perform( Vector& v,
+            const typename Vector::IndexType begin,
+            const typename Vector::IndexType end,
+            const Reduction& reduction,
+            const typename Vector::RealType zero );
+
+   template< typename Vector,
+             typename Reduction >
+   static auto
+   performFirstPhase( Vector& v,
+                      const typename Vector::IndexType begin,
+                      const typename Vector::IndexType end,
+                      const Reduction& reduction,
+                      const typename Vector::RealType zero );
+
+   template< typename Vector,
+             typename BlockShifts,
+             typename Reduction >
+   static void
+   performSecondPhase( Vector& v,
+                       const BlockShifts& blockShifts,
+                       const typename Vector::IndexType begin,
+                       const typename Vector::IndexType end,
+                       const Reduction& reduction,
+                       const typename Vector::RealType shift );
+};
+
 template< ScanType Type >
 struct Scan< Devices::Host, Type >
 {
    /**
-    * \brief Computes scan (prefix sum) on CPU.
+    * \brief Computes scan (prefix sum) using OpenMP.
     *
     * \tparam Vector type vector being used for the scan.
     * \tparam Reduction lambda function defining the reduction operation
@@ -216,11 +277,55 @@ struct Scan< Devices::Cuda, Type >
                        const typename Vector::RealType shift );
 };
 
+template< ScanType Type >
+struct SegmentedScan< Devices::Sequential, Type >
+{
+   /**
+    * \brief Computes segmented scan (prefix sum) sequentially.
+    *
+    * \tparam Vector type vector being used for the scan.
+    * \tparam Reduction lambda function defining the reduction operation
+    * \tparam Flags array type containing zeros and ones defining the segments begining
+    *
+    * \param v input vector, the result of scan is stored in the same vector
+    * \param flags is an array with zeros and ones defining the segments begining
+    * \param begin the first element in the array to be scanned
+    * \param end the last element in the array to be scanned
+    * \param reduction lambda function implementing the reduction operation
+    * \param zero is the idempotent element for the reduction operation, i.e. element which
+    *             does not change the result of the reduction.
+    *
+    * The reduction lambda function takes two variables which are supposed to be reduced:
+    *
+    * ```
+    * auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... };
+    * ```
+    *
+    * \par Example
+    *
+    * \include ReductionAndScan/SegmentedScanExample.cpp
+    *
+    * \par Output
+    *
+    * \include SegmentedScanExample.out
+    */
+   template< typename Vector,
+             typename Reduction,
+             typename Flags >
+   static void
+   perform( Vector& v,
+            Flags& flags,
+            const typename Vector::IndexType begin,
+            const typename Vector::IndexType end,
+            const Reduction& reduction,
+            const typename Vector::RealType zero );
+};
+
 template< ScanType Type >
 struct SegmentedScan< Devices::Host, Type >
 {
    /**
-    * \brief Computes segmented scan (prefix sum) on CPU.
+    * \brief Computes segmented scan (prefix sum) using OpenMP.
     *
     * \tparam Vector type vector being used for the scan.
     * \tparam Reduction lambda function defining the reduction operation
diff --git a/src/TNL/Algorithms/Scan.hpp b/src/TNL/Algorithms/Scan.hpp
index bb2288c6a..7b6d31ece 100644
--- a/src/TNL/Algorithms/Scan.hpp
+++ b/src/TNL/Algorithms/Scan.hpp
@@ -24,6 +24,78 @@
 namespace TNL {
 namespace Algorithms {
 
+template< ScanType Type >
+   template< typename Vector,
+             typename Reduction >
+void
+Scan< Devices::Sequential, Type >::
+perform( Vector& v,
+         const typename Vector::IndexType begin,
+         const typename Vector::IndexType end,
+         const Reduction& reduction,
+         const typename Vector::RealType zero )
+{
+   // sequential prefix-sum does not need a second phase
+   performFirstPhase( v, begin, end, reduction, zero );
+}
+
+template< ScanType Type >
+   template< typename Vector,
+             typename Reduction >
+auto
+Scan< Devices::Sequential, Type >::
+performFirstPhase( Vector& v,
+                   const typename Vector::IndexType begin,
+                   const typename Vector::IndexType end,
+                   const Reduction& reduction,
+                   const typename Vector::RealType zero )
+{
+   using RealType = typename Vector::RealType;
+   using IndexType = typename Vector::IndexType;
+
+   // FIXME: StaticArray does not have getElement() which is used in DistributedScan
+//   return Containers::StaticArray< 1, RealType > block_sums;
+   Containers::Array< RealType, Devices::Host > block_sums( 1 );
+   block_sums[ 0 ] = zero;
+
+   if( Type == ScanType::Inclusive ) {
+      for( IndexType i = begin + 1; i < end; i++ )
+         v[ i ] = reduction( v[ i ], v[ i - 1 ] );
+      block_sums[ 0 ] = v[ end - 1 ];
+   }
+   else // Exclusive prefix sum
+   {
+      RealType aux = zero;
+      for( IndexType i = begin; i < end; i++ ) {
+         const RealType x = v[ i ];
+         v[ i ] = aux;
+         aux = reduction( aux, x );
+      }
+      block_sums[ 0 ] = aux;
+   }
+
+   return block_sums;
+}
+
+template< ScanType Type >
+   template< typename Vector,
+             typename BlockShifts,
+             typename Reduction >
+void
+Scan< Devices::Sequential, Type >::
+performSecondPhase( Vector& v,
+                    const BlockShifts& blockShifts,
+                    const typename Vector::IndexType begin,
+                    const typename Vector::IndexType end,
+                    const Reduction& reduction,
+                    const typename Vector::RealType shift )
+{
+   using IndexType = typename Vector::IndexType;
+
+   for( IndexType i = begin; i < end; i++ )
+      v[ i ] = reduction( v[ i ], shift );
+}
+
 template< ScanType Type >
    template< typename Vector,
              typename Reduction >
@@ -39,8 +111,7 @@ perform( Vector& v,
    const auto blockShifts = performFirstPhase( v, begin, end, reduction, zero );
    performSecondPhase( v, blockShifts, begin, end, reduction, zero );
 #else
-   // sequential prefix-sum does not need a second phase
-   performFirstPhase( v, begin, end, reduction, zero );
+   Scan< Devices::Sequential, Type >::perform( v, begin, end, reduction, zero );
 #endif
 }
 
@@ -55,12 +126,12 @@ performFirstPhase( Vector& v,
                    const Reduction& reduction,
                    const typename Vector::RealType zero )
 {
+#ifdef HAVE_OPENMP
    using RealType = typename Vector::RealType;
    using IndexType = typename Vector::IndexType;
 
-#ifdef HAVE_OPENMP
    const int threads = Devices::Host::getMaxThreadsCount();
-   Containers::Array< RealType, Devices::Host > block_sums( threads + 1 );
+   Containers::Array< RealType > block_sums( threads + 1 );
    block_sums[ 0 ] = zero;
 
    #pragma omp parallel num_threads(threads)
@@ -98,28 +169,7 @@ performFirstPhase( Vector& v,
    // block_sums now contains shift values for each block - to be used in the second phase
    return block_sums;
 #else
-   // FIXME: StaticArray does not have getElement() which is used in DistributedScan
-//   return Containers::StaticArray< 1, RealType > block_sums;
-   Containers::Array< RealType, Devices::Host > block_sums( 1 );
-   block_sums[ 0 ] = zero;
-
-   if( Type == ScanType::Inclusive ) {
-      for( IndexType i = begin + 1; i < end; i++ )
-         v[ i ] = reduction( v[ i ], v[ i - 1 ] );
-      block_sums[ 0 ] = v[ end - 1 ];
-   }
-   else // Exclusive prefix sum
-   {
-      RealType aux = zero;
-      for( IndexType i = begin; i < end; i++ ) {
-         const RealType x = v[ i ];
-         v[ i ] = aux;
-         aux = reduction( aux, x );
-      }
-      block_sums[ 0 ] = aux;
-   }
-
-   return block_sums;
+   return Scan< Devices::Sequential, Type >::performFirstPhase( v, begin, end, reduction, zero );
 #endif
 }
 
@@ -136,10 +186,10 @@ performSecondPhase( Vector& v,
                     const Reduction& reduction,
                     const typename Vector::RealType shift )
 {
+#ifdef HAVE_OPENMP
    using RealType = typename Vector::RealType;
    using IndexType = typename Vector::IndexType;
 
-#ifdef HAVE_OPENMP
    const int threads = blockShifts.getSize() - 1;
 
    // launch exactly the same number of threads as in the first phase
@@ -154,8 +204,7 @@ performSecondPhase( Vector& v,
          v[ i ] = reduction( v[ i ], offset );
    }
 #else
-   for( IndexType i = begin; i < end; i++ )
-      v[ i ] = reduction( v[ i ], shift );
+   Scan< Devices::Sequential, Type >::performSecondPhase( v, blockShifts, begin, end, reduction, shift );
 #endif
 }
 
@@ -245,7 +294,7 @@ template< ScanType Type >
              typename Reduction,
              typename Flags >
 void
-SegmentedScan< Devices::Host, Type >::
+SegmentedScan< Devices::Sequential, Type >::
 perform( Vector& v,
          Flags& flags,
          const typename Vector::IndexType begin,
@@ -256,7 +305,6 @@ perform( Vector& v,
    using RealType = typename Vector::RealType;
    using IndexType = typename Vector::IndexType;
 
-   // TODO: parallelize with OpenMP
    if( Type == ScanType::Inclusive )
    {
       for( IndexType i = begin + 1; i < end; i++ )
@@ -278,6 +326,27 @@ perform( Vector& v,
    }
 }
 
+template< ScanType Type >
+   template< typename Vector,
+             typename Reduction,
+             typename Flags >
+void
+SegmentedScan< Devices::Host, Type >::
+perform( Vector& v,
+         Flags& flags,
+         const typename Vector::IndexType begin,
+         const typename Vector::IndexType end,
+         const Reduction& reduction,
+         const typename Vector::RealType zero )
+{
+#ifdef HAVE_OPENMP
+   // TODO: parallelize with OpenMP
+   SegmentedScan< Devices::Sequential, Type >::perform( v, flags, begin, end, reduction, zero );
+#else
+   SegmentedScan< Devices::Sequential, Type >::perform( v, flags, begin, end, reduction, zero );
+#endif
+}
+
 template< ScanType Type >
    template< typename Vector,
              typename Reduction,
@@ -295,7 +364,7 @@ perform( Vector& v,
    using RealType = typename Vector::RealType;
    using IndexType = typename Vector::IndexType;
 
-   throw Exceptions::NotImplementedError( "Segmented prefix sum is not implemented for CUDA." );
+   throw Exceptions::NotImplementedError( "Segmented scan (prefix sum) is not implemented for CUDA." );
 #else
    throw Exceptions::CudaSupportMissing();
 #endif
diff --git a/src/TNL/Allocators/Default.h b/src/TNL/Allocators/Default.h
index eed5c193b..109539d0c 100644
--- a/src/TNL/Allocators/Default.h
+++ b/src/TNL/Allocators/Default.h
@@ -14,6 +14,7 @@
 
 #include <TNL/Allocators/Host.h>
 #include <TNL/Allocators/Cuda.h>
+#include <TNL/Devices/Sequential.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 
@@ -27,6 +28,14 @@ namespace Allocators {
 template< typename Device >
 struct Default;
 
+//! Sets \ref Allocators::Host as the default allocator for \ref Devices::Sequential.
+template<>
+struct Default< Devices::Sequential >
+{
+   template< typename T >
+   using Allocator = Allocators::Host< T >;
+};
+
 //! Sets \ref Allocators::Host as the default allocator for \ref Devices::Host.
 template<>
 struct Default< Devices::Host >
diff --git a/src/TNL/Containers/NDArray.h b/src/TNL/Containers/NDArray.h
index 8472f4d71..3cbc8a7bc 100644
--- a/src/TNL/Containers/NDArray.h
+++ b/src/TNL/Containers/NDArray.h
@@ -352,13 +352,13 @@ class StaticNDArray
                          SizesHolder,
                          Permutation,
                          __ndarray_impl::NDArrayBase< SliceInfo< 0, 0 > >,
-                         void >
+                         Devices::Sequential >
 {
    using Base = NDArrayStorage< StaticArray< __ndarray_impl::StaticStorageSizeGetter< SizesHolder >::get(), Value >,
                          SizesHolder,
                          Permutation,
                          __ndarray_impl::NDArrayBase< SliceInfo< 0, 0 > >,
-                         void >;
+                         Devices::Sequential >;
    static_assert( __ndarray_impl::StaticStorageSizeGetter< SizesHolder >::get() > 0,
                   "All dimensions of a static array must to be positive." );
 
diff --git a/src/TNL/Devices/Host.h b/src/TNL/Devices/Host.h
index 115607583..4af7892ec 100644
--- a/src/TNL/Devices/Host.h
+++ b/src/TNL/Devices/Host.h
@@ -19,7 +19,6 @@
 #endif
 
 namespace TNL {
-//! \brief Namespace for TNL execution models
 namespace Devices {
 
 class Host
diff --git a/src/TNL/Devices/Sequential.h b/src/TNL/Devices/Sequential.h
new file mode 100644
index 000000000..f00660f19
--- /dev/null
+++ b/src/TNL/Devices/Sequential.h
@@ -0,0 +1,21 @@
+/***************************************************************************
+                          Sequential.h  -  description
+                             -------------------
+    begin                : Aug 17, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+//! \brief Namespace for TNL execution models
+namespace Devices {
+
+struct Sequential
+{};
+
+} // namespace Devices
+} // namespace TNL
-- 
GitLab


From 058aa8a9919f91916ff204903f7db5e76c97ba07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sun, 25 Aug 2019 16:04:25 +0200
Subject: [PATCH 22/35] Enforce builds without (more or less) any warnings

---
 .gitlab-ci.yml | 2 ++
 CMakeLists.txt | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index a26124c8a..1c8f367c0 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -45,6 +45,8 @@ stages:
           fi
         - export CTEST_OUTPUT_ON_FAILURE=1
         - export CTEST_PARALLEL_LEVEL=4
+        # enforce (more or less) warning-free builds
+        - export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized"
         - mkdir -p "./builddir/$CI_JOB_NAME"
         - pushd "./builddir/$CI_JOB_NAME"
         - cmake ../..
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4d7e0cedf..78c7f3dcd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -82,7 +82,7 @@ set( CMAKE_CXX_STANDARD_REQUIRED ON )
 set( CMAKE_CXX_EXTENSIONS OFF )
 
 # set Debug/Release options
-set( CMAKE_CXX_FLAGS "-pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-unknown-pragmas" )
+set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-unknown-pragmas" )
 set( CMAKE_CXX_FLAGS_DEBUG "-g" )
 set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native -DNDEBUG" )
 # pass -rdynamic only in Debug mode
-- 
GitLab


From 3ddc54a6295f74f3d9d330589356c785c035ba51 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Fri, 11 Oct 2019 13:20:40 +0200
Subject: [PATCH 23/35] Fixed handling of --build parameter in the install
 script

---
 build   |  7 ++++-
 install | 82 ++++++++++++++++++++++++++-------------------------------
 2 files changed, 43 insertions(+), 46 deletions(-)

diff --git a/build b/build
index bcd590860..914c65b19 100755
--- a/build
+++ b/build
@@ -108,7 +108,12 @@ else
    export CC=gcc
 fi
 
-if hash ninja 2>/dev/null; then
+if [[ ! $(command -v cmake) ]]; then
+   echo "Error: cmake is not installed. See http://www.cmake.org/download/" >&2
+   exit 1
+fi
+
+if [[ $(command -v ninja) ]]; then
    generator=Ninja
    make=ninja
    check_file="build.ninja"
diff --git a/install b/install
index 6f0770367..fe138dfaa 100755
--- a/install
+++ b/install
@@ -1,59 +1,51 @@
 #!/bin/bash
 
+set -e
+
 BUILD_DEBUG="yes"
 BUILD_RELEASE="yes"
 
 OPTIONS=""
 
-CMAKE_TEST=`which cmake`    
-if test x${CMAKE_TEST} = "x";
-then
-    echo "Cmake is not installed on your system. Please install it by:"
-    echo ""
-    echo "   sudo apt-get install cmake     on Ubuntu and Debian based systems"
-    echo "   sudo yum install cmake         on RedHat, Fedora or CentOS"
-    echo "   sudo zypper install cmake      on OpenSuse"
-    echo ""
-    echo "You may also install it from the source code at:"
-    echo " http://www.cmake.org/download/"
-    exit 1
-fi
-
-for option in "$@"
-do
-    case $option in
-        --no-debug                    ) BUILD_DEBUG="no" ;;
-        --no-release                  ) BUILD_RELEASE="no" ;;        
-        *                             ) OPTIONS="${OPTIONS} ${option}" ;;
-    esac
+for option in "$@"; do
+   case $option in
+      --no-debug)
+         BUILD_DEBUG="no"
+         ;;
+      --no-release)
+         BUILD_RELEASE="no"
+         ;;
+      --build=*                     )
+         BUILD="${option#*=}"
+         if [[ "$BUILD" != "Release" ]]; then
+            BUILD_RELEASE="no"
+         fi
+         if [[ "$BUILD" != "Debug" ]]; then
+            BUILD_DEBUG="no"
+         fi
+         ;;
+      *)
+         OPTIONS="${OPTIONS} ${option}"
+         ;;
+   esac
 done
 
-if test ${BUILD_DEBUG} = "yes";
-then
-    if [ ! -d Debug ];
-    then
-       mkdir Debug
-    fi
-    cd Debug
-    if ! ../build --root-dir=.. --build=Debug --install=yes ${OPTIONS}
-    then
-       exit 1
-    fi
-    cd ..
+if [[ ${BUILD_DEBUG} == "yes" ]]; then
+   if [[ ! -d Debug ]]; then
+      mkdir Debug
+   fi
+   pushd Debug
+   ../build --root-dir=.. --build=Debug --install=yes ${OPTIONS}
+   popd
 fi
 
-if test ${BUILD_RELEASE} = "yes";
-then
-    if [ ! -d Release ];
-    then
-       mkdir Release
-    fi
-    cd Release
-    if ! ../build --root-dir=.. --build=Release --install=yes ${OPTIONS};
-    then
-        exit 1
-    fi
-    cd ..
+if [[ ${BUILD_RELEASE} == "yes" ]]; then
+   if [[ ! -d Release ]]; then
+      mkdir Release
+   fi
+   pushd Release
+   ../build --root-dir=.. --build=Release --install=yes ${OPTIONS};
+   popd
 fi
 
 
-- 
GitLab


From 1b7361a959553a5f55eb0ac4efdd22969944fb03 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Fri, 11 Oct 2019 17:37:59 +0200
Subject: [PATCH 24/35] Removed Containers::List because it has no benefits
 over std::list

---
 Documentation/Examples/StringExample.cpp      |   1 -
 Documentation/Tutorials/Vectors/Reduction.cpp |   1 -
 src/Examples/CMakeLists.txt                   |   1 -
 src/Examples/ListExample.cpp                  |  24 --
 src/TNL/Containers/List.h                     | 226 ------------
 src/TNL/Containers/List_impl.h                | 346 ------------------
 src/TNL/Images/DicomSeries.h                  |   5 +-
 src/TNL/Images/DicomSeries_impl.h             |  23 +-
 src/Tools/tnl-dicom-reader.cpp                |   4 +-
 src/UnitTests/Containers/CMakeLists.txt       |   5 -
 src/UnitTests/Containers/ListTest.cpp         | 141 -------
 11 files changed, 17 insertions(+), 760 deletions(-)
 delete mode 100644 src/Examples/ListExample.cpp
 delete mode 100644 src/TNL/Containers/List.h
 delete mode 100644 src/TNL/Containers/List_impl.h
 delete mode 100644 src/UnitTests/Containers/ListTest.cpp

diff --git a/Documentation/Examples/StringExample.cpp b/Documentation/Examples/StringExample.cpp
index 609e2a269..a86182d65 100644
--- a/Documentation/Examples/StringExample.cpp
+++ b/Documentation/Examples/StringExample.cpp
@@ -1,6 +1,5 @@
 #include <iostream>
 #include <TNL/String.h>
-#include <TNL/Containers/List.h>
 #include <TNL/File.h>
 
 using namespace TNL;
diff --git a/Documentation/Tutorials/Vectors/Reduction.cpp b/Documentation/Tutorials/Vectors/Reduction.cpp
index 1d76d8d04..33768b07f 100644
--- a/Documentation/Tutorials/Vectors/Reduction.cpp
+++ b/Documentation/Tutorials/Vectors/Reduction.cpp
@@ -24,7 +24,6 @@ void expressions()
    b.evaluate( [] __cuda_callable__ ( int i )->RealType { return i - 5.0; } );
    c = -5;
 
-   int arg;
    std::cout << "a = " << a << std::endl;
    std::cout << "b = " << b << std::endl;
    std::cout << "c = " << c << std::endl;
diff --git a/src/Examples/CMakeLists.txt b/src/Examples/CMakeLists.txt
index 403809571..493f537d1 100644
--- a/src/Examples/CMakeLists.txt
+++ b/src/Examples/CMakeLists.txt
@@ -12,6 +12,5 @@ add_subdirectory( flow-vl )
 
 
 ADD_EXECUTABLE( ConfigDescriptionExample ConfigDescriptionExample.cpp )
-ADD_EXECUTABLE( ListExample ListExample.cpp )
 ADD_EXECUTABLE( LoggerExample LoggerExample.cpp )
 ADD_EXECUTABLE( MathExample MathExample.cpp )
diff --git a/src/Examples/ListExample.cpp b/src/Examples/ListExample.cpp
deleted file mode 100644
index 7196dc759..000000000
--- a/src/Examples/ListExample.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-#include <iostream>
-#include <TNL/Config/ConfigDescription.h>
-#include <TNL/Containers/List.h>
-#include <TNL/Containers/Array.h>
-
-using namespace TNL;
-using namespace std;
-       
-int main()
-{
-    Containers::List< int > lst;
-    lst.isEmpty();
-
-    lst.Append(1);
-    lst.Append(3);
-
-    lst.isEmpty();
-    lst.getSize();
-
-    lst.Insert(2,1);
-
-    Containers::Array<int> array;
-    lst.toArray(array);
-}
\ No newline at end of file
diff --git a/src/TNL/Containers/List.h b/src/TNL/Containers/List.h
deleted file mode 100644
index 3558a1b2c..000000000
--- a/src/TNL/Containers/List.h
+++ /dev/null
@@ -1,226 +0,0 @@
-/***************************************************************************
-                          List.h  -  description
-                             -------------------
-    begin                : Sat, 10 Apr 2004 15:58:51 +0100
-    copyright            : (C) 2004 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <iostream>
-
-#include <TNL/Assert.h>
-#include <TNL/File.h>
-#include <TNL/String.h>
-#include <TNL/TypeInfo.h>
-
-namespace TNL {
-namespace Containers {
-
-template< class T > class ListDataElement;
-
-/// \brief Template for double linked lists
-/*! To acces elements in the list one can use method getSize() and
-    operator[](). To add elements there are methods Append(),
-    Prepend() and Insert() to insert an element at given
-    position. To erase particular element there is method
-    Erase() taking the element position. To erase all elements
-    there is method reset(). There are also alternatives DeepErase()
-    and DeepEraseAll() to free dynamicaly allocated data inside the
-    data elements.
-    The list stores pointer to last accesed element so if one goes
-    seqeuntialy through the list there is no inefficiency. The
-    accesing algorithm is also able to deside whether to start from
-    the last accesed position or from the begining resp. from the end
-    of the list. So with common use one does not need to worry about
-    efficiency :-)
- */
-template< class T > class List
-{
-   public:
-      typedef T ValueType;
-
-      /// \brief Basic constructor.
-      ///
-      /// Constructs an empty list.
-      List();
-
-      /// \brief Copy constructor.
-      ///
-      /// Construct a copy of \e list.
-      /// \param list Name of another list.
-      List( const List& list );
-
-      /// \brief Destructor.
-      ///
-      /// Destroys the list. References to the values in the list become invalid.
-      ~List();
-
-      /// Returns \e true if the list contains no items, otherwise returns \e false.
-      bool isEmpty() const;
-
-      /// Returns number of items in the list.
-      int getSize() const;
-
-      /// Indexing operator.
-      T& operator[] ( const int& ind );
-
-      /// Indexing operator for constant instances.
-      const T& operator[] ( const int& ind ) const;
-
-      const List& operator = ( const List& lst );
-
-      bool operator == ( const List& lst ) const;
-
-      bool operator != ( const List& lst ) const;
-
-      /// \brief Appends new data element.
-      ///
-      /// Inserts \e data at the end of the list.
-      bool Append( const T& data );
-
-      /// \brief Prepends new data element.
-      ///
-      /// Inserts \e data at the beginning of the list.
-      bool Prepend( const T& data );
-
-      /// \brief Inserts new data element at given position.
-      ///
-      /// Inserts \e data at index position \e ind in the list.
-      bool Insert( const T& data, const int& ind );
-
-      /// Appends copy of another list.
-      ///
-      /// \param lst Name of another list.
-      bool AppendList( const List< T >& lst );
-
-      /// Prepends copy of another list.
-      ///
-      /// \param lst Name of another list.
-      bool PrependList( const List< T >& lst );
-
-      /// Transforms list to an \e array.
-      template< typename Array >
-      void toArray( Array& array );
-
-      /***
-       * \brief Checks if there is an element with value \e v in given array.
-       *
-       * \param v Reference to a value.
-       */
-      bool containsValue( const T& v ) const;
-
-      /// Erases data element at given position.
-      ///
-      /// \param ind Index of the data element one chooses to remove.
-      void Erase( const int& ind );
-
-      /// Erases data element with contained data at given position.
-      ///
-      /// \param ind Index of the data element one chooses to remove.
-      void DeepErase( const int& ind );
-
-      /// Erases all data elements.
-      void reset();
-
-      /// \brief Erases all data elements with contained data.
-      ///
-      /// Frees dynamicaly allocated data inside the data elements
-      void DeepEraseAll();
-
-      /// Saves the list in binary format.
-      ///
-      /// \param file Name of file.
-      bool Save( File& file ) const;
-
-      /// Saves the list in binary format using method save of type T.
-      ///
-      /// \param file Name of file.
-      bool DeepSave( File& file ) const;
-
-      /// Loads the list from file.
-      ///
-      /// \param file Name of file.
-      bool Load( File& file );
-
-      /// Loads the list from file using method Load of the type T.
-      ///
-      /// \param file Name of file.
-      bool DeepLoad( File& file );
-
-   protected:
-      /// Pointer to the first element.
-      ListDataElement< T >* first;
-
-      /// Pointer to the last element.
-      /*! We use pointer to last element while adding new element to keep order of elements
-       */
-      ListDataElement< T >* last;
-
-      /// List size.
-      int size;
-
-      /// Iterator.
-      mutable ListDataElement< T >* iterator;
-
-      /// Iterator index.
-      mutable int index;
-};
-
-template< typename T > std::ostream& operator << ( std::ostream& str, const List< T >& list );
-
-//! Data element for List and mStack
-template< class T > class ListDataElement
-{
-   //! Main data
-   T data;
-
-   //! Pointer to the next element
-   ListDataElement< T >* next;
-
-   //! Pointer to the previous element
-   ListDataElement< T >* previous;
-
-   public:
-   //! Basic constructor
-   ListDataElement()
-      : next( 0 ),
-        previous( 0 ){};
-
-   //! Constructor with given data and possibly pointer to next element
-   ListDataElement( const T& dt,
-                    ListDataElement< T >* prv = 0,
-                    ListDataElement< T >* nxt = 0 )
-      : data( dt ),
-        next( nxt ),
-        previous( prv ){};
-
-   //! Destructor
-   ~ListDataElement(){};
-
-   //! Return data for non-const instances
-   T& Data() { return data; };
-
-   //! Return data for const instances
-   const T& Data() const { return data; };
-
-   //! Return pointer to the next element for non-const instances
-   ListDataElement< T >*& Next() { return next; };
-
-   //! Return pointer to the next element for const instances
-   const ListDataElement< T >* Next() const { return next; };
-
-   //! Return pointer to the previous element for non-const instances
-   ListDataElement< T >*& Previous() { return previous; };
-
-   //! Return pointer to the previous element for const instances
-   const ListDataElement< T >* Previous() const { return previous; };
-};
-
-} // namespace Containers
-} // namespace TNL
-
-#include <TNL/Containers/List_impl.h>
diff --git a/src/TNL/Containers/List_impl.h b/src/TNL/Containers/List_impl.h
deleted file mode 100644
index 3068de315..000000000
--- a/src/TNL/Containers/List_impl.h
+++ /dev/null
@@ -1,346 +0,0 @@
-/***************************************************************************
-                          List_impl.h  -  description
-                             -------------------
-    begin                : Mar, 5 Apr 2016 12:46 PM
-    copyright            : (C) 2016 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <TNL/Containers/List.h>
-#include <TNL/Math.h>
-
-namespace TNL {
-namespace Containers {
-
-template< typename T >
-List< T >::List()
-   : first( 0 ),  last( 0 ), size( 0 ), iterator( 0 ), index( 0 )
-{
-}
-
-template< typename T >
-List< T >::List( const List& list )
-   : first( 0 ), last( 0 ), size( 0 ), iterator( 0 ), index( 0 )
-{
-   AppendList( list );
-}
-
-template< typename T >
-List< T >::~List()
-{
-   reset();
-}
-
-template< typename T >
-bool List< T >::isEmpty() const
-{
-   return ! size;
-}
- 
-template< typename T >
-int List< T >::getSize() const
-{
-   return size;
-}
-
-template< typename T >
-T& List< T >::operator[]( const int& ind )
-{
-   TNL_ASSERT( ind < size, );
-   int iter_dist = TNL::abs( index - ind );
-   if( ! iterator ||
-       iter_dist > ind ||
-       iter_dist > size - ind )
-   {
-      if( ind < size - ind )
-      {
-         //cout << "Setting curent index to 0." << std::endl;
-         index = 0;
-         iterator = first;
-      }
-      else
-      {
-         //cout << "Setting curent index to size - 1." << std::endl;
-         index = size - 1;
-         iterator = last;
-      }
-   }
-   while( index != ind )
-   {
-      //cout << " current index = " << index
-      //     << " index = " << ind << std::endl;
-      if( ind < index )
-      {
-         iterator = iterator -> Previous();
-         index --;
-      }
-      else
-      {
-         iterator = iterator -> Next();
-         index ++;
-      }
-      TNL_ASSERT( iterator, );
-   }
-   return iterator -> Data();
-};
- 
-template< typename T >
-const T& List< T >::operator[]( const int& ind ) const
-{
-   return const_cast< List< T >* >( this ) -> operator[]( ind );
-}
-
-template< typename T >
-const List< T >& List< T >::operator = ( const List& lst )
-{
-   AppendList( lst );
-   return( *this );
-}
-
-template< typename T >
-bool List< T >::operator == ( const List& lst ) const
-{
-   if( this->getSize() != lst.getSize() )
-      return false;
-   for( int i = 0; i < this->getSize(); i++ )
-      if( (*this)[ i ] != lst[ i ] )
-         return false;
-   return true;
-}
-
-template< typename T >
-bool List< T >::operator != ( const List& lst ) const
-{
-   return ! operator==( lst );
-}
-
-template< typename T >
-bool List< T >::Append( const T& data )
-{
-   if( ! first )
-   {
-      TNL_ASSERT( ! last, );
-      first = last = new ListDataElement< T >( data );
-   }
-   else
-   {
-      ListDataElement< T >* new_element =  new ListDataElement< T >( data, last, 0 );
-      TNL_ASSERT( last, );
-      last = last -> Next() = new_element;
-   }
-   size ++;
-   return true;
-};
-
-template< typename T >
-bool List< T >::Prepend( const T& data )
-{
-   if( ! first )
-   {
-      TNL_ASSERT( ! last, );
-      first = last = new ListDataElement< T >( data );
-   }
-   else
-   {
-      ListDataElement< T >* new_element =  new ListDataElement< T >( data, 0, first );
-      first = first -> Previous() = new_element;
-   }
-   size ++;
-   index ++;
-   return true;
-};
-
-template< typename T >
-bool List< T >::Insert( const T& data, const int& ind )
-{
-   TNL_ASSERT( ind <= size || ! size, );
-   if( ind == 0 ) return Prepend( data );
-   if( ind == size ) return Append( data );
-   operator[]( ind );
-   ListDataElement< T >* new_el =
-      new ListDataElement< T >( data,
-                             iterator -> Previous(),
-                             iterator );
-   iterator -> Previous() -> Next() = new_el;
-   iterator -> Previous() = new_el;
-   iterator = new_el;
-   size ++;
-   return true;
-};
-
-template< typename T >
-bool List< T >::AppendList( const List< T >& lst )
-{
-   int i;
-   for( i = 0; i < lst. getSize(); i ++ )
-   {
-      if( ! Append( lst[ i ] ) ) return false;
-   }
-   return true;
-};
- 
-template< typename T >
-bool List< T >::PrependList( const List< T >& lst )
-
-{
-   int i;
-   for( i = lst. getSize(); i > 0; i -- )
-      if( ! Prepend( lst[ i - 1 ] ) ) return false;
-   return true;
-};
-
-template< typename T >
-   template< typename Array >
-void List< T >::toArray( Array& array )
-{
-   array.setSize( this->getSize() );
-   for( int i = 0; i < this->getSize(); i++ )
-      array[ i ] = ( *this )[ i ];
-}
-template< typename T >
-bool List< T >::containsValue( const T& v ) const
-{
-   for( int i = 0; i < this->getSize(); i++ )
-      if( ( *this )[ i ] == v )
-         return true;
-   return false;
-}
-
-template< typename T >
-void List< T >::Erase( const int& ind )
-{
-   operator[]( ind );
-   ListDataElement< T >* tmp_it = iterator;
-   if( iterator -> Next() )
-      iterator -> Next() -> Previous() = iterator -> Previous();
-   if( iterator -> Previous() )
-     iterator -> Previous() -> Next() = iterator -> Next();
-   if( iterator -> Next() ) iterator = iterator -> Next();
-   else
-   {
-      iterator = iterator -> Previous();
-      index --;
-   }
-   if( first == tmp_it ) first = iterator;
-   if( last == tmp_it ) last = iterator;
-   delete tmp_it;
-   size --;
-};
-
-template< typename T >
-void List< T >::DeepErase( const int& ind )
-{
-   operator[]( ind );
-   delete iterator -> Data();
-   Erase( ind );
-};
-
-template< typename T >
-void List< T >::reset()
-{
-   iterator = first;
-   ListDataElement< T >* tmp_it;
-   while( iterator )
-   {
-      TNL_ASSERT( iterator, );
-      tmp_it = iterator;
-      iterator = iterator -> Next();
-      delete tmp_it;
-   }
-   first = last = 0;
-   size = 0;
-};
-
-template< typename T >
-void List< T >::DeepEraseAll()
-{
-   iterator = first;
-   ListDataElement< T >* tmp_it;
-   int i( 0 );
-   while( iterator )
-   {
-      tmp_it = iterator;
-      iterator = iterator -> Next();
-      delete tmp_it -> Data();
-      delete tmp_it;
-      i++;
-   }
-   first = last = 0;
-   size = 0;
-};
- 
-template< typename T >
-bool List< T >::Save( File& file ) const
-{
-   file.save( &size );
-   for( int i = 0; i < size; i ++ )
-      if( ! file. save( &operator[]( i ), 1 ) )
-         return false;
-   return true;
-}
-
-template< typename T >
-bool List< T >::DeepSave( File& file ) const
-{
-   file.save( &size );
-   for( int i = 0; i < size; i ++ )
-      if( ! operator[]( i ). save( file ) ) return false;
-   return true;
-}
-
-template< typename T >
-bool List< T >::Load( File& file )
-{
-   reset();
-   int _size;
-   file.load( &_size, 1 );
-   if( _size < 0 )
-   {
-      std::cerr << "The curve size is negative." << std::endl;
-      return false;
-   }
-   T t;
-   for( int i = 0; i < _size; i ++ )
-   {
-      if( ! file.load( &t, 1 ) )
-         return false;
-      Append( t );
-   }
-   return true;
-};
-
-template< typename T >
-bool List< T >::DeepLoad( File& file )
-{
-   reset();
-   int _size;
-   file.load( &_size );
-   if( _size < 0 )
-   {
-      std::cerr << "The list size is negative." << std::endl;
-      return false;
-   }
-   for( int i = 0; i < _size; i ++ )
-   {
-      T t;
-      if( ! t. load( file ) ) return false;
-      Append( t );
-   }
-   return true;
-};
- 
-template< typename T >
-std::ostream& operator << ( std::ostream& str, const List< T >& list )
-{
-   int i, size( list. getSize() );
-   for( i = 0; i < size; i ++ )
-      str << "Item " << i << ":" << list[ i ] << std::endl;
-   return str;
-};
-
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Images/DicomSeries.h b/src/TNL/Images/DicomSeries.h
index 50355bc34..b5aa77a57 100644
--- a/src/TNL/Images/DicomSeries.h
+++ b/src/TNL/Images/DicomSeries.h
@@ -14,8 +14,9 @@
 
 #pragma once
 
+#include <list>
+
 #include <TNL/Containers/Array.h>
-#include <TNL/Containers/List.h>
 #include <TNL/String.h>
 #include <TNL/TypeInfo.h>
 #include <TNL/Images//Image.h>
@@ -102,7 +103,7 @@ class DicomSeries : public Image< int >
  
       bool loadImage( const String& filePath, int number );
 
-      Containers::List< String > fileList;
+      std::list< String > fileList;
  
       Containers::Array<DicomHeader *,Devices::Host,int> dicomSeriesHeaders;
 
diff --git a/src/TNL/Images/DicomSeries_impl.h b/src/TNL/Images/DicomSeries_impl.h
index 350bf384b..533808b0d 100644
--- a/src/TNL/Images/DicomSeries_impl.h
+++ b/src/TNL/Images/DicomSeries_impl.h
@@ -155,22 +155,22 @@ inline bool DicomSeries::retrieveFileList( const String& filePath)
       String fileNamePrefix(fileName.getString(), 0, fileName.getLength() - separatorPosition);
 
       struct dirent **dirp;
-      Containers::List<String > files;
+      std::list< String > files;
 
       //scan and sort directory
       int ndirs = scandir(directoryPath.getString(), &dirp, filter, alphasort);
       for(int i = 0 ; i < ndirs; ++i)
       {
-         files.Append( String((char *)dirp[i]->d_name));
+         files.push_back( String((char *)dirp[i]->d_name) );
          delete dirp[i];
       }
 
-      for (int i = 0; i < files.getSize(); i++)
+      for (auto& file : files)
       {
          //check if file prefix contained
-         if (strstr(files[ i ].getString(), fileNamePrefix.getString()))
+         if (strstr(file.getString(), fileNamePrefix.getString()))
          {
-            fileList.Append( directoryPath + files[ i ] );
+            fileList.push_back( directoryPath + file );
          }
       }
    }
@@ -182,7 +182,7 @@ inline bool DicomSeries::loadImage( const String& filePath, int number)
 #ifdef HAVE_DCMTK_H
    //load header
    DicomHeader *header = new DicomHeader();
-   dicomSeriesHeaders.setSize( fileList.getSize() );
+   dicomSeriesHeaders.setSize( fileList.size() );
    dicomSeriesHeaders.setElement( number, header );
    if( !header->loadFromFile( filePath ) )
       return false;
@@ -283,7 +283,7 @@ inline bool DicomSeries::loadImage( const String& filePath, int number)
         imagesInfo.frameSize = size;
         if (pixelData)
             delete pixelData;
-        pixelData = new Uint16[imagesInfo.frameUintsCount * fileList.getSize()];
+        pixelData = new Uint16[imagesInfo.frameUintsCount * fileList.size()];
     }
     else
     {//check image size for compatibility
@@ -328,13 +328,14 @@ inline bool DicomSeries::loadDicomSeries( const String& filePath )
    }
 
    //load images
-   int imagesCountToLoad = fileList.getSize();
-   for( int i=0; i < imagesCountToLoad; i++ )
+   int counter = 0;
+   for( auto& file : fileList )
    {
-      if( !loadImage( fileList[ i ].getString(),i ) )
+      if( !loadImage( file.getString(), counter ) )
       {
-         std::cerr << fileList[ i ] << " skipped";
+         std::cerr << file << " skipped";
       }
+      counter++;
    }
    return true;
 }
diff --git a/src/Tools/tnl-dicom-reader.cpp b/src/Tools/tnl-dicom-reader.cpp
index f6931e5f4..c0f770e49 100644
--- a/src/Tools/tnl-dicom-reader.cpp
+++ b/src/Tools/tnl-dicom-reader.cpp
@@ -37,7 +37,7 @@ bool processDicomFiles( const Config::ParameterContainer& parameters )
 
 bool processDicomSeries( const Config::ParameterContainer& parameters )
 {
-   const Containers::List< String >& dicomSeriesNames = parameters.getParameter< Containers::List< String > >( "dicom-series" );
+   const std::vector< String >& dicomSeriesNames = parameters.getParameter< std::vector< String > >( "dicom-series" );
    String meshFile = parameters.getParameter< String >( "mesh-file" );
    bool verbose = parameters.getParameter< bool >( "verbose" );
 
@@ -45,7 +45,7 @@ bool processDicomSeries( const Config::ParameterContainer& parameters )
    GridType grid;
    Containers::Vector< double, Devices::Host, int > vector;
    Images::RegionOfInterest< int > roi;
-   for( int i = 0; i < dicomSeriesNames.getSize(); i++ )
+   for( std::size_t i = 0; i < dicomSeriesNames.size(); i++ )
    {
       const String& seriesName = dicomSeriesNames[ i ];
       std::cout << "Reading a file " << seriesName << std::endl;
diff --git a/src/UnitTests/Containers/CMakeLists.txt b/src/UnitTests/Containers/CMakeLists.txt
index 51060d770..6ff7570dd 100644
--- a/src/UnitTests/Containers/CMakeLists.txt
+++ b/src/UnitTests/Containers/CMakeLists.txt
@@ -1,7 +1,3 @@
-ADD_EXECUTABLE( ListTest ListTest.cpp )
-TARGET_COMPILE_OPTIONS( ListTest PRIVATE ${CXX_TESTS_FLAGS} )
-TARGET_LINK_LIBRARIES( ListTest ${GTEST_BOTH_LIBRARIES} )
-
 ADD_EXECUTABLE( ArrayTest ArrayTest.cpp )
 TARGET_COMPILE_OPTIONS( ArrayTest PRIVATE ${CXX_TESTS_FLAGS} )
 TARGET_LINK_LIBRARIES( ArrayTest ${GTEST_BOTH_LIBRARIES} )
@@ -73,7 +69,6 @@ TARGET_COMPILE_OPTIONS( StaticVectorOperationsTest PRIVATE ${CXX_TESTS_FLAGS} )
 TARGET_LINK_LIBRARIES( StaticVectorOperationsTest ${GTEST_BOTH_LIBRARIES} )
 
 
-ADD_TEST( ListTest ${EXECUTABLE_OUTPUT_PATH}/ListTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( ArrayTest ${EXECUTABLE_OUTPUT_PATH}/ArrayTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( ArrayViewTest ${EXECUTABLE_OUTPUT_PATH}/ArrayViewTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( VectorTest ${EXECUTABLE_OUTPUT_PATH}/VectorTest${CMAKE_EXECUTABLE_SUFFIX} )
diff --git a/src/UnitTests/Containers/ListTest.cpp b/src/UnitTests/Containers/ListTest.cpp
deleted file mode 100644
index 072b75003..000000000
--- a/src/UnitTests/Containers/ListTest.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-/***************************************************************************
-                          ListTest.cpp  -  description
-                             -------------------
-    begin                : Feb 15, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber et al.
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#ifdef HAVE_GTEST 
-#include <gtest/gtest.h>
-
-#include <TNL/Containers/List.h>
-
-using namespace TNL;
-using namespace TNL::Containers;
-
-// minimal custom data structure usable as ValueType in List
-struct MyData
-{
-   double data;
-
-   __cuda_callable__
-   MyData() : data(0) {}
-
-   template< typename T >
-   __cuda_callable__
-   MyData( T v ) : data(v) {}
-
-   __cuda_callable__
-   bool operator==( const MyData& v ) const { return data == v.data; }
-
-   __cuda_callable__
-   bool operator!=( const MyData& v ) const { return data != v.data; }
-};
-
-std::ostream& operator<<( std::ostream& str, const MyData& v )
-{
-   return str << v.data;
-}
-
-
-// test fixture for typed tests
-template< typename List >
-class ListTest : public ::testing::Test
-{
-protected:
-   using ListType = List;
-};
-
-// types for which ListTest is instantiated
-using ListTypes = ::testing::Types<
-   List< short  >,
-   List< int    >,
-   List< long   >,
-   List< float  >,
-   List< double >,
-   List< MyData >
->;
-
-TYPED_TEST_SUITE( ListTest, ListTypes );
-
-
-TYPED_TEST( ListTest, constructor )
-{
-   using ListType = typename TestFixture::ListType;
-   using ValueType = typename ListType::ValueType;
-
-   ListType list;
-   EXPECT_TRUE( list.isEmpty() );
-   EXPECT_EQ( list.getSize(), 0 );
-
-   list.Append( ( ValueType ) 0 );
-   EXPECT_EQ( list.getSize(), 1 );
-
-   ListType copy( list );
-   list.Append( ( ValueType ) 0 );
-   EXPECT_EQ( list.getSize(), 2 );
-   EXPECT_EQ( copy.getSize(), 1 );
-   EXPECT_EQ( copy[ 0 ], list[ 0 ] );
-}
-
-TYPED_TEST( ListTest, operations )
-{
-   using ListType = typename TestFixture::ListType;
-   using ValueType = typename ListType::ValueType;
-
-   ListType a, b;
-
-   a.Append( (ValueType) 0 );
-   a.Append( (ValueType) 1 );
-   a.Prepend( (ValueType) 2 );
-   a.Insert( (ValueType) 3, 1 );
-   EXPECT_EQ( a.getSize(), 4 );
-   EXPECT_EQ( a[ 0 ], (ValueType) 2 );
-   EXPECT_EQ( a[ 1 ], (ValueType) 3 );
-   EXPECT_EQ( a[ 2 ], (ValueType) 0 );
-   EXPECT_EQ( a[ 3 ], (ValueType) 1 );
-
-   b = a;
-   EXPECT_EQ( b.getSize(), 4 );
-   EXPECT_EQ( a, b );
-
-   b.Insert( ( ValueType ) 4, 4 );
-   EXPECT_NE( a, b );
-   EXPECT_EQ( b[ 4 ], (ValueType) 4 );
-
-   a.AppendList( b );
-   EXPECT_EQ( a.getSize(), 9 );
-   EXPECT_EQ( a[ 0 ], (ValueType) 2 );
-   EXPECT_EQ( a[ 1 ], (ValueType) 3 );
-   EXPECT_EQ( a[ 2 ], (ValueType) 0 );
-   EXPECT_EQ( a[ 3 ], (ValueType) 1 );
-   EXPECT_EQ( a[ 4 ], (ValueType) 2 );
-   EXPECT_EQ( a[ 5 ], (ValueType) 3 );
-   EXPECT_EQ( a[ 6 ], (ValueType) 0 );
-   EXPECT_EQ( a[ 7 ], (ValueType) 1 );
-   EXPECT_EQ( a[ 8 ], (ValueType) 4 );
-
-   a.PrependList( b );
-   EXPECT_EQ( a.getSize(), 14 );
-   EXPECT_EQ( a[ 0 ],  (ValueType) 2 );
-   EXPECT_EQ( a[ 1 ],  (ValueType) 3 );
-   EXPECT_EQ( a[ 2 ],  (ValueType) 0 );
-   EXPECT_EQ( a[ 3 ],  (ValueType) 1 );
-   EXPECT_EQ( a[ 4 ],  (ValueType) 4 );
-   EXPECT_EQ( a[ 5 ],  (ValueType) 2 );
-   EXPECT_EQ( a[ 6 ],  (ValueType) 3 );
-   EXPECT_EQ( a[ 7 ],  (ValueType) 0 );
-   EXPECT_EQ( a[ 8 ],  (ValueType) 1 );
-   EXPECT_EQ( a[ 9 ],  (ValueType) 2 );
-   EXPECT_EQ( a[ 10 ], (ValueType) 3 );
-   EXPECT_EQ( a[ 11 ], (ValueType) 0 );
-   EXPECT_EQ( a[ 12 ], (ValueType) 1 );
-   EXPECT_EQ( a[ 13 ], (ValueType) 4 );
-}
-#endif
-
-
-#include "../main.h"
-- 
GitLab


From 3a99723314087917821cc5460a6df0ac1f770168 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Fri, 11 Oct 2019 21:16:59 +0200
Subject: [PATCH 25/35] Removed useless typedefs such as ThisType

---
 src/Benchmarks/HeatEquation/TestGridEntity.h                | 3 +--
 src/Benchmarks/HeatEquation/Tuning/SimpleCell.h             | 3 +--
 src/Benchmarks/HeatEquation/tnlTestGrid2D.h                 | 6 ------
 src/Benchmarks/HeatEquation/tnlTestGridEntity.h             | 2 --
 .../HeatEquation/tnlTestNeighbourGridEntityGetter2D_impl.h  | 5 ++---
 src/Examples/flow-sw/DensityBoundaryConditionBoiler.h       | 3 ---
 src/Examples/flow-sw/DensityBoundaryConditionCavity.h       | 3 ---
 src/Examples/flow-sw/EnergyBoundaryConditionBoiler.h        | 3 ---
 src/Examples/flow-sw/EnergyBoundaryConditionCavity.h        | 3 ---
 src/Examples/flow-sw/MomentumXBoundaryConditionBoiler.h     | 3 ---
 src/Examples/flow-sw/MomentumXBoundaryConditionCavity.h     | 3 ---
 src/Examples/flow-sw/MomentumYBoundaryConditionBoiler.h     | 3 ---
 src/Examples/flow-sw/MomentumYBoundaryConditionCavity.h     | 3 ---
 src/Examples/flow-sw/MomentumZBoundaryConditionBoiler.h     | 3 ---
 src/Examples/flow-sw/MomentumZBoundaryConditionCavity.h     | 3 ---
 src/Examples/flow-vl/DensityBoundaryConditionBoiler.h       | 3 ---
 src/Examples/flow-vl/DensityBoundaryConditionCavity.h       | 3 ---
 src/Examples/flow-vl/EnergyBoundaryConditionBoiler.h        | 3 ---
 src/Examples/flow-vl/EnergyBoundaryConditionCavity.h        | 3 ---
 src/Examples/flow-vl/MomentumXBoundaryConditionBoiler.h     | 3 ---
 src/Examples/flow-vl/MomentumXBoundaryConditionCavity.h     | 3 ---
 src/Examples/flow-vl/MomentumYBoundaryConditionBoiler.h     | 3 ---
 src/Examples/flow-vl/MomentumYBoundaryConditionCavity.h     | 3 ---
 src/Examples/flow-vl/MomentumZBoundaryConditionBoiler.h     | 3 ---
 src/Examples/flow-vl/MomentumZBoundaryConditionCavity.h     | 3 ---
 src/Examples/flow/DensityBoundaryConditionBoiler.h          | 3 ---
 src/Examples/flow/DensityBoundaryConditionCavity.h          | 3 ---
 src/Examples/flow/EnergyBoundaryConditionBoiler.h           | 3 ---
 src/Examples/flow/EnergyBoundaryConditionCavity.h           | 3 ---
 src/Examples/flow/MomentumXBoundaryConditionBoiler.h        | 3 ---
 src/Examples/flow/MomentumXBoundaryConditionCavity.h        | 3 ---
 src/Examples/flow/MomentumYBoundaryConditionBoiler.h        | 3 ---
 src/Examples/flow/MomentumYBoundaryConditionCavity.h        | 3 ---
 src/Examples/flow/MomentumZBoundaryConditionBoiler.h        | 3 ---
 src/Examples/flow/MomentumZBoundaryConditionCavity.h        | 3 ---
 35 files changed, 4 insertions(+), 105 deletions(-)

diff --git a/src/Benchmarks/HeatEquation/TestGridEntity.h b/src/Benchmarks/HeatEquation/TestGridEntity.h
index 3492b2198..5be39bac1 100644
--- a/src/Benchmarks/HeatEquation/TestGridEntity.h
+++ b/src/Benchmarks/HeatEquation/TestGridEntity.h
@@ -78,8 +78,7 @@ class TestGridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension
       
       typedef Containers::StaticVector< meshDimension, IndexType > EntityOrientationType;
       typedef Containers::StaticVector< meshDimension, IndexType > EntityBasisType;
-      typedef TestGridEntity< GridType, entityDimension > ThisType;
-      typedef TestNeighborGridEntitiesStorage< ThisType > NeighborGridEntitiesStorageType;
+      typedef TestNeighborGridEntitiesStorage< TestGridEntity > NeighborGridEntitiesStorageType;
       
       __cuda_callable__ inline
       TestGridEntity( const GridType& grid )
diff --git a/src/Benchmarks/HeatEquation/Tuning/SimpleCell.h b/src/Benchmarks/HeatEquation/Tuning/SimpleCell.h
index 67254ab36..59de340f2 100644
--- a/src/Benchmarks/HeatEquation/Tuning/SimpleCell.h
+++ b/src/Benchmarks/HeatEquation/Tuning/SimpleCell.h
@@ -26,8 +26,7 @@ class SimpleCell
       typedef typename GridType::IndexType IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
       typedef typename GridType::PointType PointType;
-      typedef SimpleCell< GridType, Config > ThisType;
-      typedef Meshes::NeighborGridEntitiesStorage< ThisType, Config >
+      typedef Meshes::NeighborGridEntitiesStorage< SimpleCell, Config >
          NeighborGridEntitiesStorageType;
       typedef Config ConfigType;
       
diff --git a/src/Benchmarks/HeatEquation/tnlTestGrid2D.h b/src/Benchmarks/HeatEquation/tnlTestGrid2D.h
index a17e29c12..a7a6fe39e 100644
--- a/src/Benchmarks/HeatEquation/tnlTestGrid2D.h
+++ b/src/Benchmarks/HeatEquation/tnlTestGrid2D.h
@@ -52,9 +52,6 @@ class Meshes::Grid< 2, Real, Device, Index > : public tnlObject
    typedef Index IndexType;
    typedef Containers::StaticVector< 2, Real > PointType;
    typedef Containers::StaticVector< 2, Index > CoordinatesType;
-   typedef Meshes::Grid< 2, Real, Devices::Host, Index > HostType;
-   typedef Meshes::Grid< 2, Real, tnlCuda, Index > CudaType;   
-   typedef Meshes::Grid< 2, Real, Device, Index > ThisType;
    
    static const int meshDimension = 2;
 
@@ -808,9 +805,6 @@ class Meshes::Grid< 2, Real, Device, Index > : public tnlObject
    typedef Index IndexType;
    typedef Containers::StaticVector< 2, Real > PointType;
    typedef Containers::StaticVector< 2, Index > CoordinatesType;
-   typedef Meshes::Grid< 2, Real, Devices::Host, Index > HostType;
-   typedef Meshes::Grid< 2, Real, tnlCuda, Index > CudaType;   
-   typedef Meshes::Grid< 2, Real, Device, Index > ThisType;
    
    static const int meshDimension = 2;
 
diff --git a/src/Benchmarks/HeatEquation/tnlTestGridEntity.h b/src/Benchmarks/HeatEquation/tnlTestGridEntity.h
index aa8bd8d05..4401e1e72 100644
--- a/src/Benchmarks/HeatEquation/tnlTestGridEntity.h
+++ b/src/Benchmarks/HeatEquation/tnlTestGridEntity.h
@@ -55,8 +55,6 @@ class tnlTestGridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimensi
       
       typedef TNL::Containers::StaticVector< meshDimension, IndexType > EntityOrientationType;
       typedef TNL::Containers::StaticVector< meshDimension, IndexType > EntityBasisType;
-      typedef tnlTestGridEntity< GridType, entityDimension, Config > ThisType;
-      //typedef tnlTestNeighborGridEntitiesStorage< ThisType > NeighborGridEntitiesStorageType;
       
       /*template< int NeighborEntityDimension = entityDimension >
       using NeighborEntities = 
diff --git a/src/Benchmarks/HeatEquation/tnlTestNeighbourGridEntityGetter2D_impl.h b/src/Benchmarks/HeatEquation/tnlTestNeighbourGridEntityGetter2D_impl.h
index 13c7848de..a6434a013 100644
--- a/src/Benchmarks/HeatEquation/tnlTestNeighbourGridEntityGetter2D_impl.h
+++ b/src/Benchmarks/HeatEquation/tnlTestNeighbourGridEntityGetter2D_impl.h
@@ -93,7 +93,6 @@ class tnlTestNeighborGridEntityGetter<
       typedef typename GridType::CoordinatesType CoordinatesType;
       typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetter;
       typedef GridEntityStencilStorageTag< GridEntityCrossStencil > StencilStorage;
-      typedef tnlTestNeighborGridEntityGetter< GridEntityType, 2, StencilStorage > ThisType;
       
       
       static const int stencilSize = Config::getStencilSize();
@@ -110,7 +109,7 @@ class tnlTestNeighborGridEntityGetter<
          public:
             
             __cuda_callable__
-            static void exec( ThisType& neighborEntityGetter, const IndexType& entityIndex )
+            static void exec( tnlTestNeighborGridEntityGetter& neighborEntityGetter, const IndexType& entityIndex )
             {
                neighborEntityGetter.stencilX[ index + stencilSize ] = entityIndex + index;
             }
@@ -122,7 +121,7 @@ class tnlTestNeighborGridEntityGetter<
          public:
             
             __cuda_callable__
-            static void exec( ThisType& neighborEntityGetter, const IndexType& entityIndex )
+            static void exec( tnlTestNeighborGridEntityGetter& neighborEntityGetter, const IndexType& entityIndex )
             {
                neighborEntityGetter.stencilY[ index + stencilSize ] = 
                   entityIndex + index * neighborEntityGetter.entity.getMesh().getDimensions().x();
diff --git a/src/Examples/flow-sw/DensityBoundaryConditionBoiler.h b/src/Examples/flow-sw/DensityBoundaryConditionBoiler.h
index 6231f6780..e02f1b1e4 100644
--- a/src/Examples/flow-sw/DensityBoundaryConditionBoiler.h
+++ b/src/Examples/flow-sw/DensityBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshIn
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -234,7 +233,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -384,7 +382,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;   
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
diff --git a/src/Examples/flow-sw/DensityBoundaryConditionCavity.h b/src/Examples/flow-sw/DensityBoundaryConditionCavity.h
index 18eaff110..008a68bef 100644
--- a/src/Examples/flow-sw/DensityBoundaryConditionCavity.h
+++ b/src/Examples/flow-sw/DensityBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshIn
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef DensityBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -234,7 +233,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -381,7 +379,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsCavityBase< Function > BaseType;   
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
diff --git a/src/Examples/flow-sw/EnergyBoundaryConditionBoiler.h b/src/Examples/flow-sw/EnergyBoundaryConditionBoiler.h
index a99fdf015..0090bc245 100644
--- a/src/Examples/flow-sw/EnergyBoundaryConditionBoiler.h
+++ b/src/Examples/flow-sw/EnergyBoundaryConditionBoiler.h
@@ -113,7 +113,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshInd
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -252,7 +251,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -532,7 +530,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-sw/EnergyBoundaryConditionCavity.h b/src/Examples/flow-sw/EnergyBoundaryConditionCavity.h
index 3b49cd56e..0730c9ee2 100644
--- a/src/Examples/flow-sw/EnergyBoundaryConditionCavity.h
+++ b/src/Examples/flow-sw/EnergyBoundaryConditionCavity.h
@@ -113,7 +113,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshInd
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -252,7 +251,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -476,7 +474,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-sw/MomentumXBoundaryConditionBoiler.h b/src/Examples/flow-sw/MomentumXBoundaryConditionBoiler.h
index dfe63e076..6a921539c 100644
--- a/src/Examples/flow-sw/MomentumXBoundaryConditionBoiler.h
+++ b/src/Examples/flow-sw/MomentumXBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -418,7 +416,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-sw/MomentumXBoundaryConditionCavity.h b/src/Examples/flow-sw/MomentumXBoundaryConditionCavity.h
index 07abfdbeb..f27dda7f4 100644
--- a/src/Examples/flow-sw/MomentumXBoundaryConditionCavity.h
+++ b/src/Examples/flow-sw/MomentumXBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -407,7 +405,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-sw/MomentumYBoundaryConditionBoiler.h b/src/Examples/flow-sw/MomentumYBoundaryConditionBoiler.h
index 83b6282dd..2a8e06f2f 100644
--- a/src/Examples/flow-sw/MomentumYBoundaryConditionBoiler.h
+++ b/src/Examples/flow-sw/MomentumYBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -413,7 +411,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-sw/MomentumYBoundaryConditionCavity.h b/src/Examples/flow-sw/MomentumYBoundaryConditionCavity.h
index a83dd653f..35c01409c 100644
--- a/src/Examples/flow-sw/MomentumYBoundaryConditionCavity.h
+++ b/src/Examples/flow-sw/MomentumYBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -404,7 +402,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-sw/MomentumZBoundaryConditionBoiler.h b/src/Examples/flow-sw/MomentumZBoundaryConditionBoiler.h
index 9d887857c..cf790d77d 100644
--- a/src/Examples/flow-sw/MomentumZBoundaryConditionBoiler.h
+++ b/src/Examples/flow-sw/MomentumZBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -394,7 +392,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-sw/MomentumZBoundaryConditionCavity.h b/src/Examples/flow-sw/MomentumZBoundaryConditionCavity.h
index 5fe6f22e5..a771ab84f 100644
--- a/src/Examples/flow-sw/MomentumZBoundaryConditionCavity.h
+++ b/src/Examples/flow-sw/MomentumZBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -394,7 +392,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-vl/DensityBoundaryConditionBoiler.h b/src/Examples/flow-vl/DensityBoundaryConditionBoiler.h
index 6231f6780..e02f1b1e4 100644
--- a/src/Examples/flow-vl/DensityBoundaryConditionBoiler.h
+++ b/src/Examples/flow-vl/DensityBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshIn
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -234,7 +233,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -384,7 +382,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;   
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
diff --git a/src/Examples/flow-vl/DensityBoundaryConditionCavity.h b/src/Examples/flow-vl/DensityBoundaryConditionCavity.h
index 18eaff110..008a68bef 100644
--- a/src/Examples/flow-vl/DensityBoundaryConditionCavity.h
+++ b/src/Examples/flow-vl/DensityBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshIn
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef DensityBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -234,7 +233,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -381,7 +379,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsCavityBase< Function > BaseType;   
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
diff --git a/src/Examples/flow-vl/EnergyBoundaryConditionBoiler.h b/src/Examples/flow-vl/EnergyBoundaryConditionBoiler.h
index a99fdf015..0090bc245 100644
--- a/src/Examples/flow-vl/EnergyBoundaryConditionBoiler.h
+++ b/src/Examples/flow-vl/EnergyBoundaryConditionBoiler.h
@@ -113,7 +113,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshInd
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -252,7 +251,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -532,7 +530,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-vl/EnergyBoundaryConditionCavity.h b/src/Examples/flow-vl/EnergyBoundaryConditionCavity.h
index 3b49cd56e..0730c9ee2 100644
--- a/src/Examples/flow-vl/EnergyBoundaryConditionCavity.h
+++ b/src/Examples/flow-vl/EnergyBoundaryConditionCavity.h
@@ -113,7 +113,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshInd
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -252,7 +251,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -476,7 +474,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-vl/MomentumXBoundaryConditionBoiler.h b/src/Examples/flow-vl/MomentumXBoundaryConditionBoiler.h
index dfe63e076..6a921539c 100644
--- a/src/Examples/flow-vl/MomentumXBoundaryConditionBoiler.h
+++ b/src/Examples/flow-vl/MomentumXBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -418,7 +416,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-vl/MomentumXBoundaryConditionCavity.h b/src/Examples/flow-vl/MomentumXBoundaryConditionCavity.h
index 07abfdbeb..f27dda7f4 100644
--- a/src/Examples/flow-vl/MomentumXBoundaryConditionCavity.h
+++ b/src/Examples/flow-vl/MomentumXBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -407,7 +405,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-vl/MomentumYBoundaryConditionBoiler.h b/src/Examples/flow-vl/MomentumYBoundaryConditionBoiler.h
index 83b6282dd..2a8e06f2f 100644
--- a/src/Examples/flow-vl/MomentumYBoundaryConditionBoiler.h
+++ b/src/Examples/flow-vl/MomentumYBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -413,7 +411,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-vl/MomentumYBoundaryConditionCavity.h b/src/Examples/flow-vl/MomentumYBoundaryConditionCavity.h
index a83dd653f..35c01409c 100644
--- a/src/Examples/flow-vl/MomentumYBoundaryConditionCavity.h
+++ b/src/Examples/flow-vl/MomentumYBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -404,7 +402,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-vl/MomentumZBoundaryConditionBoiler.h b/src/Examples/flow-vl/MomentumZBoundaryConditionBoiler.h
index 9d887857c..cf790d77d 100644
--- a/src/Examples/flow-vl/MomentumZBoundaryConditionBoiler.h
+++ b/src/Examples/flow-vl/MomentumZBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -394,7 +392,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-vl/MomentumZBoundaryConditionCavity.h b/src/Examples/flow-vl/MomentumZBoundaryConditionCavity.h
index 5fe6f22e5..a771ab84f 100644
--- a/src/Examples/flow-vl/MomentumZBoundaryConditionCavity.h
+++ b/src/Examples/flow-vl/MomentumZBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -394,7 +392,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow/DensityBoundaryConditionBoiler.h b/src/Examples/flow/DensityBoundaryConditionBoiler.h
index 6231f6780..e02f1b1e4 100644
--- a/src/Examples/flow/DensityBoundaryConditionBoiler.h
+++ b/src/Examples/flow/DensityBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshIn
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -234,7 +233,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -384,7 +382,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;   
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
diff --git a/src/Examples/flow/DensityBoundaryConditionCavity.h b/src/Examples/flow/DensityBoundaryConditionCavity.h
index c753d324a..7611f682c 100644
--- a/src/Examples/flow/DensityBoundaryConditionCavity.h
+++ b/src/Examples/flow/DensityBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshIn
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef DensityBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -234,7 +233,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -381,7 +379,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsCavityBase< Function > BaseType;   
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
diff --git a/src/Examples/flow/EnergyBoundaryConditionBoiler.h b/src/Examples/flow/EnergyBoundaryConditionBoiler.h
index a99fdf015..0090bc245 100644
--- a/src/Examples/flow/EnergyBoundaryConditionBoiler.h
+++ b/src/Examples/flow/EnergyBoundaryConditionBoiler.h
@@ -113,7 +113,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshInd
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -252,7 +251,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -532,7 +530,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow/EnergyBoundaryConditionCavity.h b/src/Examples/flow/EnergyBoundaryConditionCavity.h
index 60e55f424..0ba8c80aa 100644
--- a/src/Examples/flow/EnergyBoundaryConditionCavity.h
+++ b/src/Examples/flow/EnergyBoundaryConditionCavity.h
@@ -113,7 +113,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshInd
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -252,7 +251,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -476,7 +474,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow/MomentumXBoundaryConditionBoiler.h b/src/Examples/flow/MomentumXBoundaryConditionBoiler.h
index dfe63e076..6a921539c 100644
--- a/src/Examples/flow/MomentumXBoundaryConditionBoiler.h
+++ b/src/Examples/flow/MomentumXBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -418,7 +416,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow/MomentumXBoundaryConditionCavity.h b/src/Examples/flow/MomentumXBoundaryConditionCavity.h
index 07abfdbeb..f27dda7f4 100644
--- a/src/Examples/flow/MomentumXBoundaryConditionCavity.h
+++ b/src/Examples/flow/MomentumXBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -407,7 +405,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow/MomentumYBoundaryConditionBoiler.h b/src/Examples/flow/MomentumYBoundaryConditionBoiler.h
index 83b6282dd..2a8e06f2f 100644
--- a/src/Examples/flow/MomentumYBoundaryConditionBoiler.h
+++ b/src/Examples/flow/MomentumYBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -413,7 +411,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow/MomentumYBoundaryConditionCavity.h b/src/Examples/flow/MomentumYBoundaryConditionCavity.h
index a83dd653f..35c01409c 100644
--- a/src/Examples/flow/MomentumYBoundaryConditionCavity.h
+++ b/src/Examples/flow/MomentumYBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -404,7 +402,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow/MomentumZBoundaryConditionBoiler.h b/src/Examples/flow/MomentumZBoundaryConditionBoiler.h
index 9d887857c..cf790d77d 100644
--- a/src/Examples/flow/MomentumZBoundaryConditionBoiler.h
+++ b/src/Examples/flow/MomentumZBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -394,7 +392,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow/MomentumZBoundaryConditionCavity.h b/src/Examples/flow/MomentumZBoundaryConditionCavity.h
index 5fe6f22e5..a771ab84f 100644
--- a/src/Examples/flow/MomentumZBoundaryConditionCavity.h
+++ b/src/Examples/flow/MomentumZBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -394,7 +392,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-- 
GitLab


From d070cc39053945afde7e4623e602ebaacd3dad8b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Fri, 11 Oct 2019 23:27:11 +0200
Subject: [PATCH 26/35] Removed HostType and CudaType aliases in containers,
 matrices and grids

They are not suitable for more than 2 devices/execution types; their design
breaks the Open-Closed Principle. Instead, a type template "Self" was
created, which allows to change any template parameter.
---
 .../DistSpMV/tnl-benchmark-distributed-spmv.h |  8 ++---
 .../tnl-benchmark-linear-solvers.h            | 10 +++---
 .../NDArray/tnl-benchmark-ndarray-boundary.h  |  3 +-
 .../NDArray/tnl-benchmark-ndarray.h           |  3 +-
 src/TNL/Containers/Array.h                    | 20 ++++++------
 src/TNL/Containers/ArrayView.h                | 19 ++++++-----
 src/TNL/Containers/DistributedArray.h         | 12 +++++--
 src/TNL/Containers/DistributedArrayView.h     | 12 +++++--
 src/TNL/Containers/DistributedVector.h        | 12 +++++--
 src/TNL/Containers/DistributedVectorView.h    | 12 +++++--
 src/TNL/Containers/Vector.h                   | 21 ++++++------
 src/TNL/Containers/VectorView.h               | 20 ++++++------
 src/TNL/Matrices/AdEllpack.h                  |  7 ++--
 src/TNL/Matrices/BiEllpack.h                  |  7 ++--
 src/TNL/Matrices/BiEllpackSymmetric.h         |  7 ++--
 src/TNL/Matrices/COOMatrix.h                  |  7 ++--
 src/TNL/Matrices/CSR.h                        |  7 ++--
 src/TNL/Matrices/ChunkedEllpack.h             |  7 ++--
 src/TNL/Matrices/Dense.h                      |  6 ++--
 src/TNL/Matrices/DistributedMatrix.h          |  9 ++++--
 src/TNL/Matrices/Ellpack.h                    |  7 ++--
 src/TNL/Matrices/EllpackSymmetric.h           |  6 ++--
 src/TNL/Matrices/EllpackSymmetricGraph.h      |  6 ++--
 src/TNL/Matrices/Ellpack_impl.h               |  4 +--
 src/TNL/Matrices/MatrixReader_impl.h          |  6 ++--
 src/TNL/Matrices/Multidiagonal.h              |  6 ++--
 src/TNL/Matrices/SlicedEllpack.h              |  7 ++--
 src/TNL/Matrices/SlicedEllpackSymmetric.h     |  7 ++--
 .../Matrices/SlicedEllpackSymmetricGraph.h    |  7 ++--
 src/TNL/Matrices/SlicedEllpack_impl.h         | 22 ++++++-------
 src/TNL/Matrices/SparseOperations_impl.h      |  6 ++--
 src/TNL/Matrices/Tridiagonal.h                |  7 ++--
 src/TNL/Meshes/GridDetails/Grid1D.h           |  2 --
 src/TNL/Meshes/GridDetails/Grid2D.h           |  2 --
 src/TNL/Meshes/GridDetails/Grid3D.h           |  2 --
 src/TNL/Meshes/Mesh.h                         |  4 ---
 .../MeshLayers/BoundaryTags/Layer.h           |  4 +--
 src/TNL/Solvers/Linear/GMRES.h                |  4 +--
 src/UnitTests/Algorithms/MultireductionTest.h |  4 +--
 src/UnitTests/Containers/ArrayTest.h          | 10 ++++--
 src/UnitTests/Containers/ArrayViewTest.h      | 21 ++++++++----
 .../Containers/DistributedVectorTest.h        |  3 +-
 .../Containers/VectorBinaryOperationsTest.h   |  7 ++--
 .../Containers/VectorHelperFunctions.h        | 15 ++++++---
 .../Containers/VectorPrefixSumTest.h          |  6 ++--
 .../Containers/VectorUnaryOperationsTest.h    | 16 ++++++----
 .../Matrices/DistributedMatrixTest.h          | 32 +++++++++++--------
 src/UnitTests/Matrices/SparseMatrixTest.hpp   |  6 ----
 48 files changed, 262 insertions(+), 176 deletions(-)

diff --git a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
index 81e5d3a6d..aa4b29424 100644
--- a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
+++ b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
@@ -73,8 +73,8 @@ benchmarkSpmvCuda( Benchmark& benchmark,
 {
    using RealType = typename Matrix::RealType;
    using IndexType = typename Matrix::IndexType;
-   using CudaMatrix = typename Matrix::CudaType;
-   using CudaVector = typename Vector::CudaType;
+   using CudaMatrix = typename Matrix::template Self< RealType, Devices::Cuda >;
+   using CudaVector = typename Vector::template Self< typename Vector::RealType, Devices::Cuda >;
 
    CudaVector cuda_x;
    cuda_x = x;
@@ -125,8 +125,8 @@ benchmarkDistributedSpmvCuda( Benchmark& benchmark,
 {
    using RealType = typename Matrix::RealType;
    using IndexType = typename Matrix::IndexType;
-   using CudaMatrix = typename Matrix::CudaType;
-   using CudaVector = typename Vector::CudaType;
+   using CudaMatrix = typename Matrix::template Self< RealType, Devices::Cuda >;
+   using CudaVector = typename Vector::template Self< typename Vector::RealType, Devices::Cuda >;
 
    CudaVector cuda_x;
    cuda_x = x;
diff --git a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
index ffb2f121a..0701b647a 100644
--- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
+++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
@@ -119,8 +119,8 @@ benchmarkIterativeSolvers( Benchmark& benchmark,
                            const Vector& b )
 {
 #ifdef HAVE_CUDA
-   using CudaMatrix = typename Matrix::CudaType;
-   using CudaVector = typename Vector::CudaType;
+   using CudaMatrix = typename Matrix::template Self< typename Matrix::RealType, Devices::Cuda >;
+   using CudaVector = typename Vector::template Self< typename Vector::RealType, Devices::Cuda >;
 
    CudaVector cuda_x0, cuda_b;
    cuda_x0 = x0;
@@ -461,9 +461,11 @@ struct LinearSolversBenchmark
          SharedPointer< CSR > matrixCopy;
          Matrices::copySparseMatrix( *matrixCopy, *matrixPointer );
 
-         SharedPointer< typename CSR::CudaType > cuda_matrixCopy;
+         using CudaCSR = Matrices::CSR< RealType, Devices::Cuda, IndexType >;
+         using CudaVector = typename VectorType::template Self< RealType, Devices::Cuda >;
+         SharedPointer< CudaCSR > cuda_matrixCopy;
          *cuda_matrixCopy = *matrixCopy;
-         typename VectorType::CudaType cuda_x0, cuda_b;
+         CudaVector cuda_x0, cuda_b;
          cuda_x0.setLike( x0 );
          cuda_b.setLike( b );
          cuda_x0 = x0;
diff --git a/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h b/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h
index b1fdd8c71..285dd6f3d 100644
--- a/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h
+++ b/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h
@@ -52,7 +52,8 @@ template< typename Array >
 void expect_eq( Array& a, Array& b )
 {
    if( std::is_same< typename Array::DeviceType, TNL::Devices::Cuda >::value ) {
-      typename Array::HostType a_host, b_host;
+      using HostArray = typename Array::template Self< typename Array::ValueType, TNL::Devices::Host >;
+      HostArray a_host, b_host;
       a_host = a;
       b_host = b;
       expect_eq_chunked( a_host, b_host );
diff --git a/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h b/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h
index d8865a40a..0c29b21b5 100644
--- a/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h
+++ b/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h
@@ -54,7 +54,8 @@ template< typename Array >
 void expect_eq( Array& a, Array& b )
 {
    if( std::is_same< typename Array::DeviceType, TNL::Devices::Cuda >::value ) {
-      typename Array::HostType a_host, b_host;
+      using HostArray = typename Array::template Self< typename Array::ValueType, TNL::Devices::Host >;
+      HostArray a_host, b_host;
       a_host = a;
       b_host = b;
       expect_eq_chunked( a_host, b_host );
diff --git a/src/TNL/Containers/Array.h b/src/TNL/Containers/Array.h
index 2d9848769..45ef1e272 100644
--- a/src/TNL/Containers/Array.h
+++ b/src/TNL/Containers/Array.h
@@ -73,7 +73,6 @@ template< typename Value,
 class Array
 {
    public:
-
       /**
        * \brief Type of elements stored in this array.
        */
@@ -98,16 +97,6 @@ class Array
        */
       using AllocatorType = Allocator;
 
-      /**
-       * \brief Defines the same array type but allocated on host (CPU).
-       */
-      using HostType = Array< Value, TNL::Devices::Host, Index >;
-
-      /**
-       * \brief Defines the same array type but allocated on CUDA device (GPU).
-       */
-      using CudaType = Array< Value, TNL::Devices::Cuda, Index >;
-
       /**
        * \brief Compatible ArrayView type.
        */
@@ -118,6 +107,15 @@ class Array
        */
       using ConstViewType = ArrayView< std::add_const_t< Value >, Device, Index >;
 
+      /**
+       * \brief A template which allows to quickly obtain an \ref Array type with changed template parameters.
+       */
+      template< typename _Value,
+                typename _Device = Device,
+                typename _Index = Index,
+                typename _Allocator = typename Allocators::Default< _Device >::template Allocator< _Value > >
+      using Self = Array< _Value, _Device, _Index, _Allocator >;
+
 
       /**
        * \brief Constructs an empty array with zero size.
diff --git a/src/TNL/Containers/ArrayView.h b/src/TNL/Containers/ArrayView.h
index 49a4a911d..d51f151f7 100644
--- a/src/TNL/Containers/ArrayView.h
+++ b/src/TNL/Containers/ArrayView.h
@@ -80,16 +80,6 @@ public:
     */
    using IndexType = Index;
 
-   /**
-    * \brief Defines the same array type but allocated on host (CPU).
-    */
-   using HostType = ArrayView< Value, TNL::Devices::Host, Index >;
-
-   /**
-    * \brief Defines the same array type but allocated on CUDA device (GPU).
-    */
-   using CudaType = ArrayView< Value, TNL::Devices::Cuda, Index >;
-
    /**
     * \brief Compatible ArrayView type.
     */
@@ -100,6 +90,15 @@ public:
     */
    using ConstViewType = ArrayView< std::add_const_t< Value >, Device, Index >;
 
+   /**
+    * \brief A template which allows to quickly obtain an \ref ArrayView type with changed template parameters.
+    */
+   template< typename _Value,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = ArrayView< _Value, _Device, _Index >;
+
+
    /**
     * \brief Constructs an empty array view.
     *
diff --git a/src/TNL/Containers/DistributedArray.h b/src/TNL/Containers/DistributedArray.h
index f7b0c383a..ce4e9ce5e 100644
--- a/src/TNL/Containers/DistributedArray.h
+++ b/src/TNL/Containers/DistributedArray.h
@@ -35,11 +35,19 @@ public:
    using LocalRangeType = Subrange< Index >;
    using LocalViewType = Containers::ArrayView< Value, Device, Index >;
    using ConstLocalViewType = Containers::ArrayView< std::add_const_t< Value >, Device, Index >;
-   using HostType = DistributedArray< Value, Devices::Host, Index, Communicator >;
-   using CudaType = DistributedArray< Value, Devices::Cuda, Index, Communicator >;
    using ViewType = DistributedArrayView< Value, Device, Index, Communicator >;
    using ConstViewType = DistributedArrayView< std::add_const_t< Value >, Device, Index, Communicator >;
 
+   /**
+    * \brief A template which allows to quickly obtain a \ref DistributedArray type with changed template parameters.
+    */
+   template< typename _Value,
+             typename _Device = Device,
+             typename _Index = Index,
+             typename _Communicator = Communicator >
+   using Self = DistributedArray< _Value, _Device, _Index, _Communicator >;
+
+
    DistributedArray() = default;
 
    DistributedArray( DistributedArray& ) = default;
diff --git a/src/TNL/Containers/DistributedArrayView.h b/src/TNL/Containers/DistributedArrayView.h
index 41557d784..6022521bc 100644
--- a/src/TNL/Containers/DistributedArrayView.h
+++ b/src/TNL/Containers/DistributedArrayView.h
@@ -34,11 +34,19 @@ public:
    using LocalRangeType = Subrange< Index >;
    using LocalViewType = Containers::ArrayView< Value, Device, Index >;
    using ConstLocalViewType = Containers::ArrayView< std::add_const_t< Value >, Device, Index >;
-   using HostType = DistributedArrayView< Value, Devices::Host, Index, Communicator >;
-   using CudaType = DistributedArrayView< Value, Devices::Cuda, Index, Communicator >;
    using ViewType = DistributedArrayView< Value, Device, Index, Communicator >;
    using ConstViewType = DistributedArrayView< std::add_const_t< Value >, Device, Index, Communicator >;
 
+   /**
+    * \brief A template which allows to quickly obtain a \ref DistributedArrayView type with changed template parameters.
+    */
+   template< typename _Value,
+             typename _Device = Device,
+             typename _Index = Index,
+             typename _Communicator = Communicator >
+   using Self = DistributedArrayView< _Value, _Device, _Index, _Communicator >;
+
+
    // Initialization by raw data
    __cuda_callable__
    DistributedArrayView( const LocalRangeType& localRange, IndexType globalSize, CommunicationGroup group, LocalViewType localData )
diff --git a/src/TNL/Containers/DistributedVector.h b/src/TNL/Containers/DistributedVector.h
index 27ea91033..f1736b378 100644
--- a/src/TNL/Containers/DistributedVector.h
+++ b/src/TNL/Containers/DistributedVector.h
@@ -34,11 +34,19 @@ public:
    using IndexType = Index;
    using LocalViewType = Containers::VectorView< Real, Device, Index >;
    using ConstLocalViewType = Containers::VectorView< std::add_const_t< Real >, Device, Index >;
-   using HostType = DistributedVector< Real, Devices::Host, Index, Communicator >;
-   using CudaType = DistributedVector< Real, Devices::Cuda, Index, Communicator >;
    using ViewType = DistributedVectorView< Real, Device, Index, Communicator >;
    using ConstViewType = DistributedVectorView< std::add_const_t< Real >, Device, Index, Communicator >;
 
+   /**
+    * \brief A template which allows to quickly obtain a \ref Vector type with changed template parameters.
+    */
+   template< typename _Real,
+             typename _Device = Device,
+             typename _Index = Index,
+             typename _Communicator = Communicator >
+   using Self = DistributedVector< _Real, _Device, _Index, _Communicator >;
+
+
    // inherit all constructors and assignment operators from Array
    using BaseType::DistributedArray;
    using BaseType::operator=;
diff --git a/src/TNL/Containers/DistributedVectorView.h b/src/TNL/Containers/DistributedVectorView.h
index cb23669ac..47ad78836 100644
--- a/src/TNL/Containers/DistributedVectorView.h
+++ b/src/TNL/Containers/DistributedVectorView.h
@@ -35,11 +35,19 @@ public:
    using IndexType = Index;
    using LocalViewType = Containers::VectorView< Real, Device, Index >;
    using ConstLocalViewType = Containers::VectorView< std::add_const_t< Real >, Device, Index >;
-   using HostType = DistributedVectorView< Real, Devices::Host, Index, Communicator >;
-   using CudaType = DistributedVectorView< Real, Devices::Cuda, Index, Communicator >;
    using ViewType = DistributedVectorView< Real, Device, Index, Communicator >;
    using ConstViewType = DistributedVectorView< std::add_const_t< Real >, Device, Index, Communicator >;
 
+   /**
+    * \brief A template which allows to quickly obtain a \ref VectorView type with changed template parameters.
+    */
+   template< typename _Real,
+             typename _Device = Device,
+             typename _Index = Index,
+             typename _Communicator = Communicator >
+   using Self = DistributedVectorView< _Real, _Device, _Index, _Communicator >;
+
+
    // inherit all constructors and assignment operators from ArrayView
    using BaseType::DistributedArrayView;
    using BaseType::operator=;
diff --git a/src/TNL/Containers/Vector.h b/src/TNL/Containers/Vector.h
index ba67df733..c23154e94 100644
--- a/src/TNL/Containers/Vector.h
+++ b/src/TNL/Containers/Vector.h
@@ -42,7 +42,6 @@ class Vector
 : public Array< Real, Device, Index, Allocator >
 {
 public:
-
    /**
     * \brief Type of elements stored in this vector.
     */
@@ -67,16 +66,6 @@ public:
     */
    using AllocatorType = Allocator;
 
-   /**
-    * \brief Defines the same vector type but allocated on host (CPU).
-    */
-   using HostType = Vector< Real, TNL::Devices::Host, Index >;
-
-   /**
-    * \brief Defines the same vector type but allocated on CUDA device (GPU).
-    */
-   using CudaType = Vector< Real, TNL::Devices::Cuda, Index >;
-
    /**
     * \brief Compatible VectorView type.
     */
@@ -87,6 +76,16 @@ public:
     */
    using ConstViewType = VectorView< std::add_const_t< Real >, Device, Index >;
 
+   /**
+    * \brief A template which allows to quickly obtain a \ref Vector type with changed template parameters.
+    */
+   template< typename _Real,
+             typename _Device = Device,
+             typename _Index = Index,
+             typename _Allocator = typename Allocators::Default< _Device >::template Allocator< _Real > >
+   using Self = Vector< _Real, _Device, _Index, _Allocator >;
+
+
    // constructors and assignment operators inherited from the class Array
    using Array< Real, Device, Index, Allocator >::Array;
    using Array< Real, Device, Index, Allocator >::operator=;
diff --git a/src/TNL/Containers/VectorView.h b/src/TNL/Containers/VectorView.h
index e99754d4b..8200b0d39 100644
--- a/src/TNL/Containers/VectorView.h
+++ b/src/TNL/Containers/VectorView.h
@@ -39,7 +39,6 @@ class VectorView
    using BaseType = ArrayView< Real, Device, Index >;
    using NonConstReal = typename std::remove_const< Real >::type;
 public:
-
    /**
     * \brief Type of elements stored in this vector.
     */
@@ -57,16 +56,6 @@ public:
     */
    using IndexType = Index;
 
-   /**
-    * \brief Defines the same vector type but allocated on host (CPU).
-    */
-   using HostType = VectorView< Real, TNL::Devices::Host, Index >;
-
-   /**
-    * \brief Defines the same vector type but allocated on CUDA device (GPU).
-    */
-   using CudaType = VectorView< Real, TNL::Devices::Cuda, Index >;
-
    /**
     * \brief Compatible VectorView type.
     */
@@ -77,6 +66,15 @@ public:
     */
    using ConstViewType = VectorView< std::add_const_t< Real >, Device, Index >;
 
+   /**
+    * \brief A template which allows to quickly obtain a \ref VectorView type with changed template parameters.
+    */
+   template< typename _Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = VectorView< _Real, _Device, _Index >;
+
+
    // constructors and assignment operators inherited from the class ArrayView
    using ArrayView< Real, Device, Index >::ArrayView;
    using ArrayView< Real, Device, Index >::operator=;
diff --git a/src/TNL/Matrices/AdEllpack.h b/src/TNL/Matrices/AdEllpack.h
index 200b54293..a50a17232 100644
--- a/src/TNL/Matrices/AdEllpack.h
+++ b/src/TNL/Matrices/AdEllpack.h
@@ -84,8 +84,11 @@ public:
     typedef Index IndexType;
     typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector;
     typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
-    typedef AdEllpack< Real, Devices::Host, Index > HostType;
-    typedef AdEllpack< Real, Devices::Cuda, Index > CudaType;
+
+    template< typename _Real = Real,
+              typename _Device = Device,
+              typename _Index = Index >
+    using Self = AdEllpack< _Real, _Device, _Index >;
 
     AdEllpack();
 
diff --git a/src/TNL/Matrices/BiEllpack.h b/src/TNL/Matrices/BiEllpack.h
index f3dcc366e..cfc132ccd 100644
--- a/src/TNL/Matrices/BiEllpack.h
+++ b/src/TNL/Matrices/BiEllpack.h
@@ -39,8 +39,11 @@ public:
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
 	typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
 	typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
-	typedef BiEllpack< Real, Devices::Host, Index > HostType;
-	typedef BiEllpack< Real, Devices::Cuda, Index > CudaType;
+
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = BiEllpack< _Real, _Device, _Index >;
 
 	BiEllpack();
 
diff --git a/src/TNL/Matrices/BiEllpackSymmetric.h b/src/TNL/Matrices/BiEllpackSymmetric.h
index 22f39cbb2..8a845a083 100644
--- a/src/TNL/Matrices/BiEllpackSymmetric.h
+++ b/src/TNL/Matrices/BiEllpackSymmetric.h
@@ -30,8 +30,11 @@ public:
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
 	typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
 	typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
-	typedef BiEllpackSymmetric< Real, Devices::Host, Index > HostType;
-	typedef BiEllpackSymmetric< Real, Devices::Cuda, Index > CudaType;
+
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = BiEllpackSymmetric< _Real, _Device, _Index >;
 
 	BiEllpackSymmetric();
 
diff --git a/src/TNL/Matrices/COOMatrix.h b/src/TNL/Matrices/COOMatrix.h
index 799fd60c7..c5ce76244 100644
--- a/src/TNL/Matrices/COOMatrix.h
+++ b/src/TNL/Matrices/COOMatrix.h
@@ -35,8 +35,11 @@ public:
 	typedef Index IndexType;
 	typedef typename Sparse< RealType, DeviceType, IndexType >:: CompressedRowLengthsVector CompressedRowLengthsVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
-	typedef COOMatrix< Real, Devices::Host, Index > HostType;
-	typedef COOMatrix< Real, Devices::Cuda, Index > CudaType;
+
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = COOMatrix< _Real, _Device, _Index >;
 
 	COOMatrix();
 
diff --git a/src/TNL/Matrices/CSR.h b/src/TNL/Matrices/CSR.h
index 15632f130..485176d1d 100644
--- a/src/TNL/Matrices/CSR.h
+++ b/src/TNL/Matrices/CSR.h
@@ -49,12 +49,15 @@ public:
    using IndexType = Index;
    typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
-   typedef CSR< Real, Devices::Host, Index > HostType;
-   typedef CSR< Real, Devices::Cuda, Index > CudaType;
    typedef Sparse< Real, Device, Index > BaseType;
    using MatrixRow = typename BaseType::MatrixRow;
    using ConstMatrixRow = typename BaseType::ConstMatrixRow;
 
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = CSR< _Real, _Device, _Index >;
+
    enum SPMVCudaKernel { scalar, vector, hybrid };
 
    CSR();
diff --git a/src/TNL/Matrices/ChunkedEllpack.h b/src/TNL/Matrices/ChunkedEllpack.h
index 94ab61a14..a66e1283a 100644
--- a/src/TNL/Matrices/ChunkedEllpack.h
+++ b/src/TNL/Matrices/ChunkedEllpack.h
@@ -75,12 +75,15 @@ public:
    typedef tnlChunkedEllpackSliceInfo< IndexType > ChunkedEllpackSliceInfo;
    typedef typename Sparse< RealType, DeviceType, IndexType >:: CompressedRowLengthsVector CompressedRowLengthsVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
-   typedef ChunkedEllpack< Real, Devices::Host, Index > HostType;
-   typedef ChunkedEllpack< Real, Devices::Cuda, Index > CudaType;
    typedef Sparse< Real, Device, Index > BaseType;
    typedef typename BaseType::MatrixRow MatrixRow;
    typedef SparseRow< const RealType, const IndexType > ConstMatrixRow;
 
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = ChunkedEllpack< _Real, _Device, _Index >;
+
    ChunkedEllpack();
 
    static String getSerializationType();
diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index a827f632e..c46992723 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -41,11 +41,13 @@ public:
    typedef Index IndexType;
    typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector;
    typedef typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
-   typedef Dense< Real, Devices::Host, Index > HostType;
-   typedef Dense< Real, Devices::Cuda, Index > CudaType;
    typedef Matrix< Real, Device, Index > BaseType;
    typedef DenseRow< Real, Index > MatrixRow;
 
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = Dense< _Real, _Device, _Index >;
 
    Dense();
 
diff --git a/src/TNL/Matrices/DistributedMatrix.h b/src/TNL/Matrices/DistributedMatrix.h
index 839273efd..76b6ea8c1 100644
--- a/src/TNL/Matrices/DistributedMatrix.h
+++ b/src/TNL/Matrices/DistributedMatrix.h
@@ -54,14 +54,17 @@ public:
    using CommunicatorType = Communicator;
    using LocalRangeType = Containers::Subrange< typename Matrix::IndexType >;
 
-   using HostType = DistributedMatrix< typename Matrix::HostType, Communicator >;
-   using CudaType = DistributedMatrix< typename Matrix::CudaType, Communicator >;
-
    using CompressedRowLengthsVector = Containers::DistributedVector< IndexType, DeviceType, IndexType, CommunicatorType >;
 
    using MatrixRow = Matrices::SparseRow< RealType, IndexType >;
    using ConstMatrixRow = Matrices::SparseRow< std::add_const_t< RealType >, std::add_const_t< IndexType > >;
 
+   template< typename _Real = RealType,
+             typename _Device = DeviceType,
+             typename _Index = IndexType,
+             typename _Communicator = Communicator >
+   using Self = DistributedMatrix< typename MatrixType::template Self< _Real, _Device, _Index >, _Communicator >;
+
    DistributedMatrix() = default;
 
    DistributedMatrix( DistributedMatrix& ) = default;
diff --git a/src/TNL/Matrices/Ellpack.h b/src/TNL/Matrices/Ellpack.h
index ee3941034..6536f5f6c 100644
--- a/src/TNL/Matrices/Ellpack.h
+++ b/src/TNL/Matrices/Ellpack.h
@@ -39,12 +39,15 @@ public:
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
-   typedef Ellpack< Real, Devices::Host, Index > HostType;
-   typedef Ellpack< Real, Devices::Cuda, Index > CudaType;
    typedef Sparse< Real, Device, Index > BaseType;
    typedef typename BaseType::MatrixRow MatrixRow;
    typedef SparseRow< const RealType, const IndexType > ConstMatrixRow;
 
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = Ellpack< _Real, _Device, _Index >;
+
    Ellpack();
 
    static String getSerializationType();
diff --git a/src/TNL/Matrices/EllpackSymmetric.h b/src/TNL/Matrices/EllpackSymmetric.h
index 495fcdd07..d92fc77ee 100644
--- a/src/TNL/Matrices/EllpackSymmetric.h
+++ b/src/TNL/Matrices/EllpackSymmetric.h
@@ -31,9 +31,11 @@ class EllpackSymmetric : public Sparse< Real, Device, Index >
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
-   typedef EllpackSymmetric< Real, Devices::Host, Index > HostType;
-   typedef EllpackSymmetric< Real, Devices::Cuda, Index > CudaType;
 
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = EllpackSymmetric< _Real, _Device, _Index >;
 
    EllpackSymmetric();
 
diff --git a/src/TNL/Matrices/EllpackSymmetricGraph.h b/src/TNL/Matrices/EllpackSymmetricGraph.h
index c232ad085..03e329855 100644
--- a/src/TNL/Matrices/EllpackSymmetricGraph.h
+++ b/src/TNL/Matrices/EllpackSymmetricGraph.h
@@ -31,9 +31,11 @@ class EllpackSymmetricGraph : public Sparse< Real, Device, Index >
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
-   typedef EllpackSymmetricGraph< Real, Devices::Host, Index > HostType;
-   typedef EllpackSymmetricGraph< Real, Devices::Cuda, Index > CudaType;
 
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = EllpackSymmetricGraph< _Real, _Device, _Index >;
 
    EllpackSymmetricGraph();
 
diff --git a/src/TNL/Matrices/Ellpack_impl.h b/src/TNL/Matrices/Ellpack_impl.h
index 7651ea0d7..5ac812cf2 100644
--- a/src/TNL/Matrices/Ellpack_impl.h
+++ b/src/TNL/Matrices/Ellpack_impl.h
@@ -648,8 +648,8 @@ Ellpack< Real, Device, Index >::operator=( const Ellpack< Real2, Device2, Index2
 
    // host -> cuda
    if( std::is_same< Device, Devices::Cuda >::value ) {
-      typename ValuesVector::HostType tmpValues;
-      typename ColumnIndexesVector::HostType tmpColumnIndexes;
+      typename ValuesVector::template Self< typename ValuesVector::ValueType, Devices::Sequential > tmpValues;
+      typename ColumnIndexesVector::template Self< typename ColumnIndexesVector::ValueType, Devices::Sequential > tmpColumnIndexes;
       tmpValues.setLike( this->values );
       tmpColumnIndexes.setLike( this->columnIndexes );
 
diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h
index eacf8911a..418e6f5b3 100644
--- a/src/TNL/Matrices/MatrixReader_impl.h
+++ b/src/TNL/Matrices/MatrixReader_impl.h
@@ -425,11 +425,11 @@ class MatrixReaderDeviceDependentCode< Devices::Cuda >
                             bool verbose,
                             bool symReader )
    {
-      typedef typename Matrix::HostType HostMatrixType;
-      typedef typename HostMatrixType::CompressedRowLengthsVector CompressedRowLengthsVector;
+      using HostMatrixType = typename Matrix::template Self< typename Matrix::RealType, Devices::Sequential >;
+      using CompressedRowLengthsVector = typename HostMatrixType::CompressedRowLengthsVector;
 
       HostMatrixType hostMatrix;
-      typename Matrix::CompressedRowLengthsVector rowLengths;
+      CompressedRowLengthsVector rowLengths;
       return MatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose, symReader );
 
       matrix = hostMatrix;
diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h
index 2da69f790..1ee6a25e9 100644
--- a/src/TNL/Matrices/Multidiagonal.h
+++ b/src/TNL/Matrices/Multidiagonal.h
@@ -38,11 +38,13 @@ public:
    typedef Index IndexType;
    typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector;
    typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
-   typedef Multidiagonal< Real, Devices::Host, Index > HostType;
-   typedef Multidiagonal< Real, Devices::Cuda, Index > CudaType;
    typedef Matrix< Real, Device, Index > BaseType;
    typedef MultidiagonalRow< Real, Index > MatrixRow;
 
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = Multidiagonal< _Real, _Device, _Index >;
 
    Multidiagonal();
 
diff --git a/src/TNL/Matrices/SlicedEllpack.h b/src/TNL/Matrices/SlicedEllpack.h
index 548916e25..5051fc218 100644
--- a/src/TNL/Matrices/SlicedEllpack.h
+++ b/src/TNL/Matrices/SlicedEllpack.h
@@ -68,12 +68,15 @@ public:
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
-   typedef SlicedEllpack< Real, Devices::Host, Index, SliceSize > HostType;
-   typedef SlicedEllpack< Real, Devices::Cuda, Index, SliceSize > CudaType;
    typedef Sparse< Real, Device, Index > BaseType;
    typedef typename BaseType::MatrixRow MatrixRow;
    typedef SparseRow< const RealType, const IndexType > ConstMatrixRow;
 
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index,
+             int _SliceSize = SliceSize >
+   using Self = SlicedEllpack< _Real, _Device, _Index, _SliceSize >;
 
    SlicedEllpack();
 
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric.h b/src/TNL/Matrices/SlicedEllpackSymmetric.h
index 272ae510e..835eccf83 100644
--- a/src/TNL/Matrices/SlicedEllpackSymmetric.h
+++ b/src/TNL/Matrices/SlicedEllpackSymmetric.h
@@ -49,9 +49,12 @@ class SlicedEllpackSymmetric : public Sparse< Real, Device, Index >
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
-   typedef SlicedEllpackSymmetric< Real, Devices::Host, Index > HostType;
-   typedef SlicedEllpackSymmetric< Real, Devices::Cuda, Index > CudaType;
 
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index,
+             int _SliceSize = SliceSize >
+   using Self = SlicedEllpackSymmetric< _Real, _Device, _Index, _SliceSize >;
 
    SlicedEllpackSymmetric();
 
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h b/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h
index 36d02f9bd..5fed4082b 100644
--- a/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h
+++ b/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h
@@ -49,9 +49,12 @@ class SlicedEllpackSymmetricGraph : public Sparse< Real, Device, Index >
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
-   typedef SlicedEllpackSymmetricGraph< Real, Devices::Host, Index > HostType;
-   typedef SlicedEllpackSymmetricGraph< Real, Devices::Cuda, Index > CudaType;
 
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index,
+             int _SliceSize = SliceSize >
+   using Self = SlicedEllpackSymmetricGraph< _Real, _Device, _Index, _SliceSize >;
 
    SlicedEllpackSymmetricGraph();
 
diff --git a/src/TNL/Matrices/SlicedEllpack_impl.h b/src/TNL/Matrices/SlicedEllpack_impl.h
index d1caef3ec..aa76caf7b 100644
--- a/src/TNL/Matrices/SlicedEllpack_impl.h
+++ b/src/TNL/Matrices/SlicedEllpack_impl.h
@@ -620,19 +620,14 @@ template< typename Real,
 SlicedEllpack< Real, Device, Index, SliceSize >&
 SlicedEllpack< Real, Device, Index, SliceSize >::operator=( const SlicedEllpack< Real2, Device2, Index2, SliceSize >& matrix )
 {
-   static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
-                  "unknown device" );
-   static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value,
-                  "unknown device" );
-
    this->setLike( matrix );
    this->slicePointers = matrix.slicePointers;
    this->sliceCompressedRowLengths = matrix.sliceCompressedRowLengths;
 
    // host -> cuda
    if( std::is_same< Device, Devices::Cuda >::value ) {
-      typename ValuesVector::HostType tmpValues;
-      typename ColumnIndexesVector::HostType tmpColumnIndexes;
+      typename ValuesVector::template Self< typename ValuesVector::ValueType, Devices::Sequential > tmpValues;
+      typename ColumnIndexesVector::template Self< typename ColumnIndexesVector::ValueType, Devices::Sequential > tmpColumnIndexes;
       tmpValues.setLike( matrix.values );
       tmpColumnIndexes.setLike( matrix.columnIndexes );
 
@@ -654,7 +649,7 @@ SlicedEllpack< Real, Device, Index, SliceSize >::operator=( const SlicedEllpack<
    }
 
    // cuda -> host
-   if( std::is_same< Device, Devices::Host >::value ) {
+   else {
       ValuesVector tmpValues;
       ColumnIndexesVector tmpColumnIndexes;
       tmpValues.setLike( matrix.values );
@@ -724,7 +719,7 @@ template< typename Real,
           int SliceSize >
 void SlicedEllpack< Real, Device, Index, SliceSize >::print( std::ostream& str ) const
 {
-   if( std::is_same< Device, Devices::Host >::value ) {
+   if( ! std::is_same< Device, Devices::Cuda >::value ) {
       for( IndexType row = 0; row < this->getRows(); row++ )
       {
          str <<"Row: " << row << " -> ";
@@ -745,7 +740,7 @@ void SlicedEllpack< Real, Device, Index, SliceSize >::print( std::ostream& str )
       }
    }
    else {
-      HostType hostMatrix;
+      Self< Real, Devices::Sequential > hostMatrix;
       hostMatrix = *this;
       hostMatrix.print( str );
    }
@@ -778,12 +773,13 @@ __device__ void SlicedEllpack< Real, Device, Index, SliceSize >::computeMaximalR
 }
 #endif
 
-template<>
-class SlicedEllpackDeviceDependentCode< Devices::Host >
+// implementation for host types
+template< typename Device_ >
+class SlicedEllpackDeviceDependentCode
 {
    public:
 
-      typedef Devices::Host Device;
+      typedef Device_ Device;
 
       template< typename Real,
                 typename Index,
diff --git a/src/TNL/Matrices/SparseOperations_impl.h b/src/TNL/Matrices/SparseOperations_impl.h
index 8771d24dc..ff507c326 100644
--- a/src/TNL/Matrices/SparseOperations_impl.h
+++ b/src/TNL/Matrices/SparseOperations_impl.h
@@ -170,7 +170,8 @@ typename std::enable_if< ! std::is_same< typename Matrix1::DeviceType, typename
                            std::is_same< typename Matrix2::DeviceType, Devices::Host >::value >::type
 copySparseMatrix_impl( Matrix1& A, const Matrix2& B )
 {
-   typename Matrix2::CudaType B_tmp;
+   using CudaMatrix2 = typename Matrix2::template Self< typename Matrix2::RealType, Devices::Cuda >;
+   CudaMatrix2 B_tmp;
    B_tmp = B;
    copySparseMatrix_impl( A, B_tmp );
 }
@@ -182,7 +183,8 @@ typename std::enable_if< ! std::is_same< typename Matrix1::DeviceType, typename
                            std::is_same< typename Matrix2::DeviceType, Devices::Cuda >::value >::type
 copySparseMatrix_impl( Matrix1& A, const Matrix2& B )
 {
-   typename Matrix1::CudaType A_tmp;
+   using CudaMatrix1 = typename Matrix1::template Self< typename Matrix1::RealType, Devices::Cuda >;
+   CudaMatrix1 A_tmp;
    copySparseMatrix_impl( A_tmp, B );
    A = A_tmp;
 }
diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h
index 4e01c90bb..3f57fe1c3 100644
--- a/src/TNL/Matrices/Tridiagonal.h
+++ b/src/TNL/Matrices/Tridiagonal.h
@@ -40,11 +40,14 @@ public:
    typedef Index IndexType;
    typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector;
    typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
-   typedef Tridiagonal< Real, Devices::Host, Index > HostType;
-   typedef Tridiagonal< Real, Devices::Cuda, Index > CudaType;
    typedef Matrix< Real, Device, Index > BaseType;
    typedef TridiagonalRow< Real, Index > MatrixRow;
 
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = Tridiagonal< _Real, _Device, _Index >;
+
    Tridiagonal();
 
    static String getSerializationType();
diff --git a/src/TNL/Meshes/GridDetails/Grid1D.h b/src/TNL/Meshes/GridDetails/Grid1D.h
index 3e36fd025..81811fe90 100644
--- a/src/TNL/Meshes/GridDetails/Grid1D.h
+++ b/src/TNL/Meshes/GridDetails/Grid1D.h
@@ -34,8 +34,6 @@ class Grid< 1, Real, Device, Index > : public Object
    typedef Index GlobalIndexType;
    typedef Containers::StaticVector< 1, Real > PointType;
    typedef Containers::StaticVector< 1, Index > CoordinatesType;
-   typedef Grid< 1, Real, Devices::Host, Index > HostType;
-   typedef Grid< 1, Real, Devices::Cuda, Index > CudaType;
 
    typedef DistributedMeshes::DistributedMesh <Grid> DistributedMeshType;
 
diff --git a/src/TNL/Meshes/GridDetails/Grid2D.h b/src/TNL/Meshes/GridDetails/Grid2D.h
index ecaed0cd4..b24be9ba2 100644
--- a/src/TNL/Meshes/GridDetails/Grid2D.h
+++ b/src/TNL/Meshes/GridDetails/Grid2D.h
@@ -34,8 +34,6 @@ class Grid< 2, Real, Device, Index > : public Object
    typedef Index GlobalIndexType;
    typedef Containers::StaticVector< 2, Real > PointType;
    typedef Containers::StaticVector< 2, Index > CoordinatesType;
-   typedef Grid< 2, Real, Devices::Host, Index > HostType;
-   typedef Grid< 2, Real, Devices::Cuda, Index > CudaType;
 
    typedef DistributedMeshes::DistributedMesh <Grid> DistributedMeshType;
  
diff --git a/src/TNL/Meshes/GridDetails/Grid3D.h b/src/TNL/Meshes/GridDetails/Grid3D.h
index 873cc6872..881fb0074 100644
--- a/src/TNL/Meshes/GridDetails/Grid3D.h
+++ b/src/TNL/Meshes/GridDetails/Grid3D.h
@@ -33,8 +33,6 @@ class Grid< 3, Real, Device, Index > : public Object
    typedef Index GlobalIndexType;
    typedef Containers::StaticVector< 3, Real > PointType;
    typedef Containers::StaticVector< 3, Index > CoordinatesType;
-   typedef Grid< 3, Real, Devices::Host, Index > HostType;
-   typedef Grid< 3, Real, Devices::Cuda, Index > CudaType;
 
    typedef DistributedMeshes::DistributedMesh <Grid> DistributedMeshType;
  
diff --git a/src/TNL/Meshes/Mesh.h b/src/TNL/Meshes/Mesh.h
index 27d70814c..4d71e3ac9 100644
--- a/src/TNL/Meshes/Mesh.h
+++ b/src/TNL/Meshes/Mesh.h
@@ -79,10 +79,6 @@ class Mesh
       using RealType        = typename PointType::RealType;
       using GlobalIndexVector = Containers::Vector< GlobalIndexType, DeviceType, GlobalIndexType >;
 
-      // shortcuts, compatibility with grids
-      using HostType = Mesh< MeshConfig, Devices::Host >;
-      using CudaType = Mesh< MeshConfig, Devices::Cuda >;
-
       template< int Dimension >
       using EntityTraits = typename MeshTraitsType::template EntityTraits< Dimension >;
 
diff --git a/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Layer.h b/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Layer.h
index 873475bca..e31c76dae 100644
--- a/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Layer.h
+++ b/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Layer.h
@@ -116,8 +116,8 @@ public:
       }
       // TODO: parallelize directly on the device
       else {
-         using BoundaryTagsHostArray = typename BoundaryTagsArray::HostType;
-         using OrderingHostArray     = typename OrderingArray::HostType;
+         using BoundaryTagsHostArray = typename BoundaryTagsArray::template Self< typename BoundaryTagsArray::ValueType, Devices::Host >;
+         using OrderingHostArray     = typename OrderingArray::template Self< typename OrderingArray::ValueType, Devices::Host >;
 
          BoundaryTagsHostArray hostBoundaryTags;
          OrderingHostArray hostBoundaryIndices;
diff --git a/src/TNL/Solvers/Linear/GMRES.h b/src/TNL/Solvers/Linear/GMRES.h
index e1eb5e1cb..f1a4b8732 100644
--- a/src/TNL/Solvers/Linear/GMRES.h
+++ b/src/TNL/Solvers/Linear/GMRES.h
@@ -49,9 +49,9 @@ protected:
    // local vectors/views
    using ConstDeviceView = typename Traits::ConstLocalViewType;
    using DeviceView = typename Traits::LocalViewType;
-   using HostView = typename DeviceView::HostType;
    using DeviceVector = typename Traits::LocalVectorType;
-   using HostVector = typename DeviceVector::HostType;
+   using HostView = typename DeviceView::template Self< RealType, Devices::Host >;
+   using HostVector = typename DeviceVector::template Self< RealType, Devices::Host >;;
 
    enum class Variant { MGS, MGSR, CWY };
 
diff --git a/src/UnitTests/Algorithms/MultireductionTest.h b/src/UnitTests/Algorithms/MultireductionTest.h
index 5e11efd16..ec674d935 100644
--- a/src/UnitTests/Algorithms/MultireductionTest.h
+++ b/src/UnitTests/Algorithms/MultireductionTest.h
@@ -50,8 +50,8 @@ class MultireductionTest : public ::testing::Test
 protected:
    using DeviceVector = Vector;
    using DeviceView = VectorView< typename Vector::RealType, typename Vector::DeviceType, typename Vector::IndexType >;
-   using HostVector = typename DeviceVector::HostType;
-   using HostView = typename DeviceView::HostType;
+   using HostVector = typename DeviceVector::template Self< typename DeviceVector::RealType, Devices::Sequential >;
+   using HostView = typename DeviceView::template Self< typename DeviceView::RealType, Devices::Sequential >;
 
    // should be small enough to have fast tests, but larger than minGPUReductionDataSize
    // and large enough to require multiple CUDA blocks for reduction
diff --git a/src/UnitTests/Containers/ArrayTest.h b/src/UnitTests/Containers/ArrayTest.h
index 69dd2d252..ef3119365 100644
--- a/src/UnitTests/Containers/ArrayTest.h
+++ b/src/UnitTests/Containers/ArrayTest.h
@@ -367,9 +367,10 @@ TYPED_TEST( ArrayTest, containsOnlyValue )
 TYPED_TEST( ArrayTest, comparisonOperator )
 {
    using ArrayType = typename TestFixture::ArrayType;
+   using HostArrayType = typename ArrayType::template Self< typename ArrayType::ValueType, Devices::Sequential >;
 
    ArrayType u( 10 ), v( 10 ), w( 10 );
-   typename ArrayType::HostType u_host( 10 );
+   HostArrayType u_host( 10 );
    for( int i = 0; i < 10; i ++ ) {
       u.setElement( i, i );
       u_host.setElement( i, i );
@@ -422,9 +423,10 @@ TYPED_TEST( ArrayTest, comparisonOperatorWithDifferentType )
 TYPED_TEST( ArrayTest, assignmentOperator )
 {
    using ArrayType = typename TestFixture::ArrayType;
+   using HostArrayType = typename ArrayType::template Self< typename ArrayType::ValueType, Devices::Sequential >;
 
    ArrayType u( 10 ), v( 10 );
-   typename ArrayType::HostType u_host( 10 );
+   HostArrayType u_host( 10 );
    for( int i = 0; i < 10; i++ ) {
       u.setElement( i, i );
       u_host.setElement( i, i );
@@ -451,10 +453,12 @@ template< typename ArrayType,
           typename = typename std::enable_if< std::is_arithmetic< typename ArrayType::ValueType >::value >::type >
 void testArrayAssignmentWithDifferentType()
 {
+   using HostArrayType = typename ArrayType::template Self< typename ArrayType::ValueType, Devices::Sequential >;
+
    ArrayType u( 10 );
    Array< short, typename ArrayType::DeviceType, short > v( 10 );
    Array< short, Devices::Host, short > v_host( 10 );
-   typename ArrayType::HostType u_host( 10 );
+   HostArrayType u_host( 10 );
    for( int i = 0; i < 10; i++ ) {
       u.setElement( i, i );
       u_host.setElement( i, i );
diff --git a/src/UnitTests/Containers/ArrayViewTest.h b/src/UnitTests/Containers/ArrayViewTest.h
index 6c8465bd5..e5a9d5a20 100644
--- a/src/UnitTests/Containers/ArrayViewTest.h
+++ b/src/UnitTests/Containers/ArrayViewTest.h
@@ -336,9 +336,10 @@ TYPED_TEST( ArrayViewTest, comparisonOperator )
 {
    using ArrayType = typename TestFixture::ArrayType;
    using ViewType = typename TestFixture::ViewType;
+   using HostArrayType = typename ArrayType::template Self< typename ArrayType::ValueType, Devices::Sequential >;
 
    ArrayType a( 10 ), b( 10 );
-   typename ArrayType::HostType a_host( 10 );
+   HostArrayType a_host( 10 );
    for( int i = 0; i < 10; i ++ ) {
       a.setElement( i, i );
       a_host.setElement( i, i );
@@ -411,9 +412,11 @@ TYPED_TEST( ArrayViewTest, assignmentOperator )
    using ArrayType = typename TestFixture::ArrayType;
    using ViewType = typename TestFixture::ViewType;
    using ConstViewType = VectorView< const typename ArrayType::ValueType, typename ArrayType::DeviceType, typename ArrayType::IndexType >;
+   using HostArrayType = typename ArrayType::template Self< typename ArrayType::ValueType, Devices::Sequential >;
+   using HostViewType = typename HostArrayType::ViewType;
 
    ArrayType a( 10 ), b( 10 );
-   typename ArrayType::HostType a_host( 10 );
+   HostArrayType a_host( 10 );
    for( int i = 0; i < 10; i++ ) {
       a.setElement( i, i );
       a_host.setElement( i, i );
@@ -421,7 +424,7 @@ TYPED_TEST( ArrayViewTest, assignmentOperator )
 
    ViewType u = a.getView();
    ViewType v = b.getView();
-   typename ViewType::HostType u_host = a_host.getView();
+   HostViewType u_host = a_host.getView();
 
    v.setValue( 0 );
    v = u;
@@ -457,21 +460,25 @@ template< typename ArrayType,
           typename = typename std::enable_if< std::is_arithmetic< typename ArrayType::ValueType >::value >::type >
 void testArrayAssignmentWithDifferentType()
 {
+   using HostArrayType = typename ArrayType::template Self< typename ArrayType::ValueType, Devices::Sequential >;
+
    ArrayType a( 10 );
    Array< short, typename ArrayType::DeviceType, short > b( 10 );
-   Array< short, Devices::Host, short > b_host( 10 );
-   typename ArrayType::HostType a_host( 10 );
+   Array< short, Devices::Sequential, short > b_host( 10 );
+   HostArrayType a_host( 10 );
    for( int i = 0; i < 10; i++ ) {
       a.setElement( i, i );
       a_host.setElement( i, i );
    }
 
    using ViewType = ArrayView< typename ArrayType::ValueType, typename ArrayType::DeviceType, typename ArrayType::IndexType >;
+   using HostViewType = typename ViewType::template Self< typename ViewType::ValueType, Devices::Sequential >;
    ViewType u = a.getView();
-   typename ViewType::HostType u_host( a_host );
+   HostViewType u_host( a_host );
    using ShortViewType = ArrayView< short, typename ArrayType::DeviceType, short >;
+   using HostShortViewType = ArrayView< short, Devices::Sequential, short >;
    ShortViewType v( b );
-   typename ShortViewType::HostType v_host( b_host );
+   HostShortViewType v_host( b_host );
 
    v.setValue( 0 );
    v = u;
diff --git a/src/UnitTests/Containers/DistributedVectorTest.h b/src/UnitTests/Containers/DistributedVectorTest.h
index b1844be3b..69dd543de 100644
--- a/src/UnitTests/Containers/DistributedVectorTest.h
+++ b/src/UnitTests/Containers/DistributedVectorTest.h
@@ -42,12 +42,13 @@ protected:
    using DistributedVectorType = DistributedVector;
    using VectorViewType = typename DistributedVectorType::LocalViewType;
    using DistributedVectorView = Containers::DistributedVectorView< RealType, DeviceType, IndexType, CommunicatorType >;
+   using HostDistributedVectorType = typename DistributedVectorType::template Self< RealType, Devices::Sequential >;
 
    const typename CommunicatorType::CommunicationGroup group = CommunicatorType::AllGroup;
 
    DistributedVectorType v;
    DistributedVectorView v_view;
-   typename DistributedVectorType::HostType v_host;
+   HostDistributedVectorType v_host;
 
    const int rank = CommunicatorType::GetRank(group);
    const int nproc = CommunicatorType::GetSize(group);
diff --git a/src/UnitTests/Containers/VectorBinaryOperationsTest.h b/src/UnitTests/Containers/VectorBinaryOperationsTest.h
index 93283483c..bae5ce5f3 100644
--- a/src/UnitTests/Containers/VectorBinaryOperationsTest.h
+++ b/src/UnitTests/Containers/VectorBinaryOperationsTest.h
@@ -595,8 +595,11 @@ TYPED_TEST( VectorBinaryOperationsTest, comparisonOnDifferentDevices )
 {
    SETUP_BINARY_TEST_ALIASES;
 
-   typename TestFixture::RightVector::HostType _R1_h; _R1_h = this->_R1;
-   typename TestFixture::Right::HostType R1_h( _R1_h );
+   using RightHostVector = typename TestFixture::RightVector::Self< typename TestFixture::RightVector::RealType, Devices::Sequential >;
+   using RightHost = typename TestFixture::Right::Self< typename TestFixture::Right::RealType, Devices::Sequential >;
+
+   RightHostVector _R1_h; _R1_h = this->_R1;
+   RightHost R1_h( _R1_h );
 
    // L1 and L2 are device vectors
    EXPECT_EQ( L1, R1_h );
diff --git a/src/UnitTests/Containers/VectorHelperFunctions.h b/src/UnitTests/Containers/VectorHelperFunctions.h
index b1a596c6a..4e8c64fae 100644
--- a/src/UnitTests/Containers/VectorHelperFunctions.h
+++ b/src/UnitTests/Containers/VectorHelperFunctions.h
@@ -9,7 +9,8 @@ void setLinearSequence( Vector& deviceVector )
 #ifdef STATIC_VECTOR
    Vector a;
 #else
-   typename Vector::HostType a;
+   using HostVector = typename Vector::template Self< typename Vector::RealType, TNL::Devices::Host >;
+   HostVector a;
    a.setLike( deviceVector );
 #endif
 #ifdef DISTRIBUTED_VECTOR
@@ -34,7 +35,8 @@ void setConstantSequence( Vector& deviceVector,
 template< typename Vector >
 void setOscilatingLinearSequence( Vector& deviceVector )
 {
-   typename Vector::HostType a;
+   using HostVector = typename Vector::template Self< typename Vector::RealType, TNL::Devices::Host >;
+   HostVector a;
    a.setLike( deviceVector );
    for( int i = 0; i < a.getSize(); i++ )
       a[ i ] = i % 30 - 15;
@@ -45,7 +47,8 @@ template< typename Vector >
 void setOscilatingConstantSequence( Vector& deviceVector,
                                     typename Vector::RealType v )
 {
-   typename Vector::HostType a;
+   using HostVector = typename Vector::template Self< typename Vector::RealType, TNL::Devices::Host >;
+   HostVector a;
    a.setLike( deviceVector );
    for( int i = 0; i < a.getSize(); i++ )
       a[ i ] = TNL::sign( i % 30 - 15 );
@@ -55,7 +58,8 @@ void setOscilatingConstantSequence( Vector& deviceVector,
 template< typename Vector >
 void setNegativeLinearSequence( Vector& deviceVector )
 {
-   typename Vector::HostType a;
+   using HostVector = typename Vector::template Self< typename Vector::RealType, TNL::Devices::Host >;
+   HostVector a;
    a.setLike( deviceVector );
 #ifdef DISTRIBUTED_VECTOR
    for( int i = 0; i < a.getLocalView().getSize(); i++ ) {
@@ -76,7 +80,8 @@ void setOscilatingSequence( Vector& deviceVector,
 #ifdef STATIC_VECTOR
    Vector a;
 #else
-   typename Vector::HostType a;
+   using HostVector = typename Vector::template Self< typename Vector::RealType, TNL::Devices::Host >;
+   HostVector a;
    a.setLike( deviceVector );
 #endif
 #ifdef DISTRIBUTED_VECTOR
diff --git a/src/UnitTests/Containers/VectorPrefixSumTest.h b/src/UnitTests/Containers/VectorPrefixSumTest.h
index be295001a..67281dba0 100644
--- a/src/UnitTests/Containers/VectorPrefixSumTest.h
+++ b/src/UnitTests/Containers/VectorPrefixSumTest.h
@@ -24,6 +24,7 @@ TYPED_TEST( VectorTest, prefixSum )
    using RealType = typename VectorType::RealType;
    using DeviceType = typename VectorType::DeviceType;
    using IndexType = typename VectorType::IndexType;
+   using HostVectorType = typename VectorType::template Self< RealType, Devices::Sequential >;
    const int size = VECTOR_TEST_SIZE;
 
    // FIXME: tests should work in all cases
@@ -32,7 +33,7 @@ TYPED_TEST( VectorTest, prefixSum )
 
    VectorType v( size );
    ViewType v_view( v );
-   typename VectorType::HostType v_host( size );
+   HostVectorType v_host( size );
 
    setConstantSequence( v, 0 );
    v_host = -1;
@@ -145,6 +146,7 @@ TYPED_TEST( VectorTest, exclusiveScan )
    using RealType = typename VectorType::RealType;
    using DeviceType = typename VectorType::DeviceType;
    using IndexType = typename VectorType::IndexType;
+   using HostVectorType = typename VectorType::template Self< RealType, Devices::Sequential >;
    const int size = VECTOR_TEST_SIZE;
 
    // FIXME: tests should work in all cases
@@ -154,7 +156,7 @@ TYPED_TEST( VectorTest, exclusiveScan )
    VectorType v;
    v.setSize( size );
    ViewType v_view( v );
-   typename VectorType::HostType v_host( size );
+   HostVectorType v_host( size );
 
    setConstantSequence( v, 0 );
    v_host = -1;
diff --git a/src/UnitTests/Containers/VectorUnaryOperationsTest.h b/src/UnitTests/Containers/VectorUnaryOperationsTest.h
index 122404253..827147cd5 100644
--- a/src/UnitTests/Containers/VectorUnaryOperationsTest.h
+++ b/src/UnitTests/Containers/VectorUnaryOperationsTest.h
@@ -168,14 +168,16 @@ TYPED_TEST_SUITE( VectorUnaryOperationsTest, VectorTypes );
       using VectorOrView = typename TestFixture::VectorOrView; \
       using RealType = typename VectorType::RealType;          \
       using ExpectedVector = typename TestFixture::template Vector< decltype(function(RealType{})) >; \
+      using HostVector = typename VectorType::template Self< RealType, Devices::Host >; \
+      using HostExpectedVector = typename ExpectedVector::template Self< decltype(function(RealType{})), Devices::Host >; \
       constexpr int size = _size;                              \
       using CommunicatorType = typename VectorOrView::CommunicatorType; \
       const auto group = CommunicatorType::AllGroup; \
       using LocalRangeType = typename VectorOrView::LocalRangeType; \
       const LocalRangeType localRange = Partitioner< typename VectorOrView::IndexType, CommunicatorType >::splitRange( size, group ); \
                                                                \
-      typename VectorType::HostType _V1h;                      \
-      typename ExpectedVector::HostType expected_h;            \
+      HostVector _V1h;                                         \
+      HostExpectedVector expected_h;                           \
       _V1h.setDistribution( localRange, size, group );         \
       expected_h.setDistribution( localRange, size, group );   \
                                                                \
@@ -209,10 +211,12 @@ TYPED_TEST_SUITE( VectorUnaryOperationsTest, VectorTypes );
       using VectorOrView = typename TestFixture::VectorOrView; \
       using RealType = typename VectorType::RealType;          \
       using ExpectedVector = typename TestFixture::template Vector< decltype(function(RealType{})) >; \
+      using HostVector = typename VectorType::template Self< RealType, Devices::Host >; \
+      using HostExpectedVector = typename ExpectedVector::template Self< decltype(function(RealType{})), Devices::Host >; \
       constexpr int size = _size;                              \
                                                                \
-      typename VectorType::HostType _V1h( size );              \
-      typename ExpectedVector::HostType expected_h( size );    \
+      HostVector _V1h( size );                                 \
+      HostExpectedVector expected_h( size );                   \
                                                                \
       const double h = (double) (end - begin) / size;          \
       for( int i = 0; i < size; i++ )                          \
@@ -254,8 +258,8 @@ void expect_vectors_near( const Left& _v1, const Right& _v2 )
    using LeftVector = Vector< LeftNonConstReal, typename Left::DeviceType, typename Left::IndexType >;
    using RightVector = Vector< RightNonConstReal, typename Right::DeviceType, typename Right::IndexType >;
 #endif
-   using LeftHostVector = typename LeftVector::HostType;
-   using RightHostVector = typename RightVector::HostType;
+   using LeftHostVector = typename LeftVector::template Self< LeftNonConstReal, Devices::Sequential >;
+   using RightHostVector = typename RightVector::template Self< RightNonConstReal, Devices::Sequential >;
 
    // first evaluate expressions
    LeftVector v1; v1 = _v1;
diff --git a/src/UnitTests/Matrices/DistributedMatrixTest.h b/src/UnitTests/Matrices/DistributedMatrixTest.h
index 7c74e7704..93673a290 100644
--- a/src/UnitTests/Matrices/DistributedMatrixTest.h
+++ b/src/UnitTests/Matrices/DistributedMatrixTest.h
@@ -6,10 +6,22 @@
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
 
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+#include <TNL/Communicators/MpiCommunicator.h>
+#include <TNL/Communicators/NoDistrCommunicator.h>
+#include <TNL/Matrices/DistributedMatrix.h>
+#include <TNL/Containers/Partitioner.h>
+#include <TNL/Matrices/CSR.h>
+
+using namespace TNL;
+
 template< typename Vector >
 void setLinearSequence( Vector& deviceVector, typename Vector::RealType offset = 0 )
 {
-   typename Vector::HostType a;
+   using HostVector = typename Vector::template Self< typename Vector::RealType, TNL::Devices::Sequential >;
+   HostVector a;
    a.setLike( deviceVector );
    for( int i = 0; i < a.getLocalView().getSize(); i++ ) {
       const auto gi = a.getLocalRange().getGlobalIndex( i );
@@ -21,8 +33,11 @@ void setLinearSequence( Vector& deviceVector, typename Vector::RealType offset =
 template< typename Matrix, typename RowLengths >
 void setMatrix( Matrix& matrix, const RowLengths& rowLengths )
 {
-   typename Matrix::HostType hostMatrix;
-   typename RowLengths::HostType hostRowLengths;
+   using HostMatrix = Matrices::DistributedMatrix< typename Matrix::MatrixType::template Self< typename Matrix::RealType, TNL::Devices::Sequential >, typename Matrix::CommunicatorType >;
+   using HostRowLengths = typename RowLengths::template Self< typename RowLengths::RealType, TNL::Devices::Sequential >;
+
+   HostMatrix hostMatrix;
+   HostRowLengths hostRowLengths;
    hostMatrix.setLike( matrix );
    hostRowLengths = rowLengths;
    hostMatrix.setCompressedRowLengths( hostRowLengths );
@@ -36,17 +51,6 @@ void setMatrix( Matrix& matrix, const RowLengths& rowLengths )
    matrix = hostMatrix;
 }
 
-#ifdef HAVE_GTEST
-#include <gtest/gtest.h>
-
-#include <TNL/Communicators/MpiCommunicator.h>
-#include <TNL/Communicators/NoDistrCommunicator.h>
-#include <TNL/Matrices/DistributedMatrix.h>
-#include <TNL/Containers/Partitioner.h>
-#include <TNL/Matrices/CSR.h>
-
-using namespace TNL;
-
 /*
  * Light check of DistributedMatrix.
  *
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp
index 9de7b70a2..03b80259d 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp
@@ -10,12 +10,6 @@
 
 // TODO
 /*
- * getType()                        ::HOW?  How to test this for each format? edit string how?
- *      Found the mistake for Cuda instead of Devices::Cuda. Incorrect String in src/TNL/Devices/Cuda.cpp
- *      MISSING: indexType is missing in CSR_impl.h
- * getTypeVirtual()                 ::TEST? This just calls getType().
- * getSerializationType()           ::TEST? This just calls HostType::getType().
- * getSerializationTypeVirtual()    ::TEST? This just calls getSerializationType().
  * setDimensions()                      ::DONE
  * setCompressedRowLengths()            ::DONE
  * getRowLength()                   ::USED! In test_SetCompressedRowLengths() to verify the test itself.
-- 
GitLab


From afba52d9805c5f8af2c0dfcafd4dc79e56571ebd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sun, 13 Oct 2019 09:26:22 +0200
Subject: [PATCH 27/35] Renamed prefixSum methods to scan

Closes #49
---
 src/Benchmarks/BLAS/vector-operations.h       | 32 +++----
 src/TNL/Algorithms/DistributedScan.h          |  2 +-
 src/TNL/Containers/DistributedVector.h        |  2 +-
 src/TNL/Containers/DistributedVector.hpp      |  2 +-
 src/TNL/Containers/DistributedVectorView.h    |  2 +-
 src/TNL/Containers/DistributedVectorView.hpp  |  2 +-
 src/TNL/Containers/Vector.h                   | 95 ++++++++++---------
 src/TNL/Containers/Vector.hpp                 | 12 +--
 src/TNL/Containers/VectorView.h               | 95 ++++++++++---------
 src/TNL/Containers/VectorView.hpp             | 12 +--
 src/TNL/Matrices/BiEllpack_impl.h             |  2 +-
 src/TNL/Matrices/CSR_impl.h                   |  2 +-
 src/TNL/Matrices/ChunkedEllpack_impl.h        |  2 +-
 .../Matrices/SlicedEllpackSymmetric_impl.h    |  2 +-
 src/TNL/Matrices/SlicedEllpack_impl.h         |  2 +-
 .../Containers/DistributedVectorTest.h        | 54 +++++------
 .../Containers/VectorPrefixSumTest.h          | 52 +++++-----
 17 files changed, 195 insertions(+), 177 deletions(-)

diff --git a/src/Benchmarks/BLAS/vector-operations.h b/src/Benchmarks/BLAS/vector-operations.h
index 5f5cd989f..7254ba9f4 100644
--- a/src/Benchmarks/BLAS/vector-operations.h
+++ b/src/Benchmarks/BLAS/vector-operations.h
@@ -562,31 +562,31 @@ benchmarkVectorOperations( Benchmark & benchmark,
 #endif
 
    ////
-   // Inclusive prefix sum
-   auto inclusivePrefixSumHost = [&]() {
-      hostVector.prefixSum();
+   // Inclusive scan
+   auto inclusiveScanHost = [&]() {
+      hostVector.scan();
    };
-   benchmark.setOperation( "inclusive prefix sum", 2 * datasetSize );
-   benchmark.time< Devices::Host >( reset1, "CPU ET", inclusivePrefixSumHost );
+   benchmark.setOperation( "inclusive scan", 2 * datasetSize );
+   benchmark.time< Devices::Host >( reset1, "CPU ET", inclusiveScanHost );
 #ifdef HAVE_CUDA
-   auto inclusivePrefixSumCuda = [&]() {
-      deviceVector.prefixSum();
+   auto inclusiveScanCuda = [&]() {
+      deviceVector.scan();
    };
-   benchmark.time< Devices::Cuda >( reset1, "GPU ET", inclusivePrefixSumCuda );
+   benchmark.time< Devices::Cuda >( reset1, "GPU ET", inclusiveScanCuda );
 #endif
 
    ////
-   // Exclusive prefix sum
-   auto exclusivePrefixSumHost = [&]() {
-      hostVector.template prefixSum< Algorithms::ScanType::Exclusive >();
+   // Exclusive scan
+   auto exclusiveScanHost = [&]() {
+      hostVector.template scan< Algorithms::ScanType::Exclusive >();
    };
-   benchmark.setOperation( "exclusive prefix sum", 2 * datasetSize );
-   benchmark.time< Devices::Host >( reset1, "CPU ET", exclusivePrefixSumHost );
+   benchmark.setOperation( "exclusive scan", 2 * datasetSize );
+   benchmark.time< Devices::Host >( reset1, "CPU ET", exclusiveScanHost );
 #ifdef HAVE_CUDA
-   auto exclusivePrefixSumCuda = [&]() {
-      deviceVector.template prefixSum< Algorithms::ScanType::Exclusive >();
+   auto exclusiveScanCuda = [&]() {
+      deviceVector.template scan< Algorithms::ScanType::Exclusive >();
    };
-   benchmark.time< Devices::Cuda >( reset1, "GPU ET", exclusivePrefixSumCuda );
+   benchmark.time< Devices::Cuda >( reset1, "GPU ET", exclusiveScanCuda );
 #endif
 
 #ifdef HAVE_CUDA
diff --git a/src/TNL/Algorithms/DistributedScan.h b/src/TNL/Algorithms/DistributedScan.h
index f294b0cf3..742acd5ed 100644
--- a/src/TNL/Algorithms/DistributedScan.h
+++ b/src/TNL/Algorithms/DistributedScan.h
@@ -54,7 +54,7 @@ struct DistributedScan
          // NOTE: exchanging general data types does not work with MPI
          CommunicatorType::Alltoall( dataForScatter, 1, rankSums.getData(), 1, group );
 
-         // compute prefix-sum of the per-rank sums
+         // compute the scan of the per-rank sums
          Scan< Devices::Host, ScanType::Exclusive >::perform( rankSums, 0, nproc, reduction, zero );
 
          // perform second phase: shift by the per-block and per-rank offsets
diff --git a/src/TNL/Containers/DistributedVector.h b/src/TNL/Containers/DistributedVector.h
index f1736b378..db4e46e68 100644
--- a/src/TNL/Containers/DistributedVector.h
+++ b/src/TNL/Containers/DistributedVector.h
@@ -131,7 +131,7 @@ public:
    DistributedVector& operator/=( const Vector& vector );
 
    template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive >
-   void prefixSum( IndexType begin = 0, IndexType end = 0 );
+   void scan( IndexType begin = 0, IndexType end = 0 );
 };
 
 } // namespace Containers
diff --git a/src/TNL/Containers/DistributedVector.hpp b/src/TNL/Containers/DistributedVector.hpp
index dbe9760f6..fa49591e8 100644
--- a/src/TNL/Containers/DistributedVector.hpp
+++ b/src/TNL/Containers/DistributedVector.hpp
@@ -273,7 +273,7 @@ template< typename Real,
    template< Algorithms::ScanType Type >
 void
 DistributedVector< Real, Device, Index, Communicator >::
-prefixSum( IndexType begin, IndexType end )
+scan( IndexType begin, IndexType end )
 {
    if( end == 0 )
       end = this->getSize();
diff --git a/src/TNL/Containers/DistributedVectorView.h b/src/TNL/Containers/DistributedVectorView.h
index 47ad78836..70452c50d 100644
--- a/src/TNL/Containers/DistributedVectorView.h
+++ b/src/TNL/Containers/DistributedVectorView.h
@@ -134,7 +134,7 @@ public:
    DistributedVectorView& operator/=( const Vector& vector );
 
    template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive >
-   void prefixSum( IndexType begin = 0, IndexType end = 0 );
+   void scan( IndexType begin = 0, IndexType end = 0 );
 };
 
 } // namespace Containers
diff --git a/src/TNL/Containers/DistributedVectorView.hpp b/src/TNL/Containers/DistributedVectorView.hpp
index 5669a52b6..70f61979f 100644
--- a/src/TNL/Containers/DistributedVectorView.hpp
+++ b/src/TNL/Containers/DistributedVectorView.hpp
@@ -261,7 +261,7 @@ template< typename Real,
    template< Algorithms::ScanType Type >
 void
 DistributedVectorView< Real, Device, Index, Communicator >::
-prefixSum( IndexType begin, IndexType end )
+scan( IndexType begin, IndexType end )
 {
    if( end == 0 )
       end = this->getSize();
diff --git a/src/TNL/Containers/Vector.h b/src/TNL/Containers/Vector.h
index c23154e94..be08266b6 100644
--- a/src/TNL/Containers/Vector.h
+++ b/src/TNL/Containers/Vector.h
@@ -244,75 +244,84 @@ public:
    Vector& operator/=( const VectorExpression& expression );
 
    /**
-    * \brief Computes prefix sum of the vector elements.
+    * \brief Computes the scan (prefix sum) of the vector elements.
     *
-    * Computes prefix sum for elements within the index range [ \e begin to \e end ).
-    * The other elements of this vector remain unchanged.
+    * By default, scan is computed for the whole vector. If \e begin
+    * or \e end is set to a non-zero value, only elements in the sub-interval
+    * `[begin, end)` are scanned.
     *
-    * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive.
-    * 
-    * \param begin beginning of the index range
-    * \param end end of the index range.
+    * \tparam Type The scan type - either \e Inclusive or \e Exclusive.
+    *
+    * \param begin The beginning of the vector sub-interval. It is 0 by
+    *              default.
+    * \param end The end of the vector sub-interval. The default value is 0
+    *            which is, however, replaced with the array size.
     */
    template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive >
-   void prefixSum( IndexType begin = 0, IndexType end = 0 );
+   void scan( IndexType begin = 0, IndexType end = 0 );
 
    /**
-    * \brief Computes segmented prefix sum of the vector elements.
+    * \brief Computes the segmented scan (prefix sum) of the vector elements.
     *
-    * Computes segmented prefix sum for elements within the index range [ \e begin to \e end ).
-    * The other elements of this vector remain unchanged. Whole vector is assumed
-    * by default, i.e. when \e begin and \e end are set to zero.
+    * By default, segmented scan is computed for the whole vector. If \e begin
+    * or \e end is set to a non-zero value, only elements in the sub-interval
+    * `[begin, end)` are scanned.
     *
-    * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive.
-    * \tparam FlagsArray is an array type describing beginnings of the segments.
-    * 
-    * \param flags is an array having `1` at the beginning of each segment and `0` on any other position
-    * \param begin beginning of the index range
-    * \param end end of the index range.
+    * \tparam Type The scan type - either \e Inclusive or \e Exclusive.
+    *
+    * \param flags A binary array where ones indicate the beginning of each
+    *              segment.
+    * \param begin The beginning of the vector sub-interval. It is 0 by
+    *              default.
+    * \param end The end of the vector sub-interval. The default value is 0
+    *            which is, however, replaced with the array size.
     */
    template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive,
              typename FlagsArray >
-   void segmentedPrefixSum( FlagsArray& flags, IndexType begin = 0, IndexType end = 0 );
+   void segmentedScan( FlagsArray& flags, IndexType begin = 0, IndexType end = 0 );
 
    /**
-    * \brief Computes prefix sum of the vector expression.
+    * \brief Computes the scan (prefix sum) of the vector expression.
     *
-    * Computes prefix sum for elements within the index range [ \e begin to \e end ).
-    * The other elements of this vector remain unchanged. Whole vector expression is assumed
-    * by default, i.e. when \e begin and \e end are set to zero.
+    * By default, scan is computed for the whole vector. If \e begin
+    * or \e end is set to a non-zero value, only elements in the sub-interval
+    * `[begin, end)` are scanned.
     *
-    * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive.
-    * \tparam VectorExpression is the vector expression.
-    * 
-    * \param expression is the vector expression.
-    * \param begin beginning of the index range
-    * \param end end of the index range.
+    * \tparam Type The scan type - either \e Inclusive or \e Exclusive.
+    *
+    * \param expression A vector expression for which scan is computed and
+    *                   stored in this vector.
+    * \param begin The beginning of the vector sub-interval. It is 0 by
+    *              default.
+    * \param end The end of the vector sub-interval. The default value is 0
+    *            which is, however, replaced with the array size.
     */
    template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive,
              typename VectorExpression >
-   void prefixSum( const VectorExpression& expression, IndexType begin = 0, IndexType end = 0 );
+   void scan( const VectorExpression& expression, IndexType begin = 0, IndexType end = 0 );
 
    /**
-    * \brief Computes segmented prefix sum of a vector expression.
+    * \brief Computes the segmented scan (prefix sum) of a vector expression.
     *
-    * Computes segmented prefix sum for elements within the index range [ \e begin to \e end ).
-    * The other elements of this vector remain unchanged. Whole vector expression is assumed
-    * by default, i.e. when \e begin and \e end are set to zero.
+    * By default, segmented scan is computed for the whole vector. If \e begin
+    * or \e end is set to a non-zero value, only elements in the sub-interval
+    * `[begin, end)` are scanned.
     *
-    * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive.
-    * \tparam VectorExpression is the vector expression.
-    * \tparam FlagsArray is an array type describing beginnings of the segments.
-    * 
-    * \param expression is the vector expression.
-    * \param flags is an array having `1` at the beginning of each segment and `0` on any other position
-    * \param begin beginning of the index range
-    * \param end end of the index range.
+    * \tparam Type The scan type - either \e Inclusive or \e Exclusive.
+    *
+    * \param expression A vector expression for which scan is computed and
+    *                   stored in this vector.
+    * \param flags A binary array where ones indicate the beginning of each
+    *              segment.
+    * \param begin The beginning of the vector sub-interval. It is 0 by
+    *              default.
+    * \param end The end of the vector sub-interval. The default value is 0
+    *            which is, however, replaced with the array size.
     */
    template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive,
              typename VectorExpression,
              typename FlagsArray >
-   void segmentedPrefixSum( const VectorExpression& expression, FlagsArray& flags, IndexType begin = 0, IndexType end = 0 );
+   void segmentedScan( const VectorExpression& expression, FlagsArray& flags, IndexType begin = 0, IndexType end = 0 );
 };
 
 } // namespace Containers
diff --git a/src/TNL/Containers/Vector.hpp b/src/TNL/Containers/Vector.hpp
index a5c20d596..5fdce0d09 100644
--- a/src/TNL/Containers/Vector.hpp
+++ b/src/TNL/Containers/Vector.hpp
@@ -146,7 +146,7 @@ template< typename Real,
    template< Algorithms::ScanType Type >
 void
 Vector< Real, Device, Index, Allocator >::
-prefixSum( IndexType begin, IndexType end )
+scan( IndexType begin, IndexType end )
 {
    if( end == 0 )
       end = this->getSize();
@@ -161,7 +161,7 @@ template< typename Real,
              typename FlagsArray >
 void
 Vector< Real, Device, Index, Allocator >::
-segmentedPrefixSum( FlagsArray& flags, IndexType begin, IndexType end )
+segmentedScan( FlagsArray& flags, IndexType begin, IndexType end )
 {
    if( end == 0 )
       end = this->getSize();
@@ -176,9 +176,9 @@ template< typename Real,
              typename VectorExpression >
 void
 Vector< Real, Device, Index, Allocator >::
-prefixSum( const VectorExpression& expression, IndexType begin, IndexType end )
+scan( const VectorExpression& expression, IndexType begin, IndexType end )
 {
-   throw Exceptions::NotImplementedError( "Prefix sum with vector expressions is not implemented." );
+   throw Exceptions::NotImplementedError( "Scan (prefix sum) with vector expressions is not implemented." );
 }
 
 template< typename Real,
@@ -190,9 +190,9 @@ template< typename Real,
              typename FlagsArray >
 void
 Vector< Real, Device, Index, Allocator >::
-segmentedPrefixSum( const VectorExpression& expression, FlagsArray& flags, IndexType begin, IndexType end )
+segmentedScan( const VectorExpression& expression, FlagsArray& flags, IndexType begin, IndexType end )
 {
-   throw Exceptions::NotImplementedError( "Prefix sum with vector expressions is not implemented." );
+   throw Exceptions::NotImplementedError( "Segmented scan (prefix sum) with vector expressions is not implemented." );
 }
 
 } // namespace Containers
diff --git a/src/TNL/Containers/VectorView.h b/src/TNL/Containers/VectorView.h
index 8200b0d39..1a144ea5c 100644
--- a/src/TNL/Containers/VectorView.h
+++ b/src/TNL/Containers/VectorView.h
@@ -214,75 +214,84 @@ public:
    VectorView& operator/=( const VectorExpression& expression );
 
    /**
-    * \brief Computes prefix sum of the vector view elements.
+    * \brief Computes the scan (prefix sum) of the vector elements.
     *
-    * Computes prefix sum for elements within the index range [ \e begin to \e end ).
-    * The other elements of this vector view remain unchanged.
+    * By default, scan is computed for the whole vector. If \e begin
+    * or \e end is set to a non-zero value, only elements in the sub-interval
+    * `[begin, end)` are scanned.
     *
-    * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive.
-    * 
-    * \param begin beginning of the index range
-    * \param end end of the index range.
+    * \tparam Type The scan type - either \e Inclusive or \e Exclusive.
+    *
+    * \param begin The beginning of the vector sub-interval. It is 0 by
+    *              default.
+    * \param end The end of the vector sub-interval. The default value is 0
+    *            which is, however, replaced with the array size.
     */
    template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive >
-   void prefixSum( IndexType begin = 0, IndexType end = 0 );
+   void scan( IndexType begin = 0, IndexType end = 0 );
 
    /**
-    * \brief Computes segmented prefix sum of the vector view elements.
+    * \brief Computes the segmented scan (prefix sum) of the vector elements.
     *
-    * Computes segmented prefix sum for elements within the index range [ \e begin to \e end ).
-    * The other elements of this vector view remain unchanged. Whole vector view is assumed
-    * by default, i.e. when \e begin and \e end are set to zero.
+    * By default, segmented scan is computed for the whole vector. If \e begin
+    * or \e end is set to a non-zero value, only elements in the sub-interval
+    * `[begin, end)` are scanned.
     *
-    * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive.
-    * \tparam FlagsArray is an array type describing beginnings of the segments.
-    * 
-    * \param flags is an array having `1` at the beginning of each segment and `0` on any other position
-    * \param begin beginning of the index range
-    * \param end end of the index range.
+    * \tparam Type The scan type - either \e Inclusive or \e Exclusive.
+    *
+    * \param flags A binary array where ones indicate the beginning of each
+    *              segment.
+    * \param begin The beginning of the vector sub-interval. It is 0 by
+    *              default.
+    * \param end The end of the vector sub-interval. The default value is 0
+    *            which is, however, replaced with the array size.
     */
    template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive,
              typename FlagsArray >
-   void segmentedPrefixSum( FlagsArray& flags, IndexType begin = 0, IndexType end = 0 );
+   void segmentedScan( FlagsArray& flags, IndexType begin = 0, IndexType end = 0 );
 
    /**
-    * \brief Computes prefix sum of the vector expression.
+    * \brief Computes the scan (prefix sum) of the vector expression.
     *
-    * Computes prefix sum for elements within the index range [ \e begin to \e end ).
-    * The other elements of this vector remain unchanged. Whole vector expression is assumed
-    * by default, i.e. when \e begin and \e end are set to zero.
+    * By default, scan is computed for the whole vector. If \e begin
+    * or \e end is set to a non-zero value, only elements in the sub-interval
+    * `[begin, end)` are scanned.
     *
-    * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive.
-    * \tparam VectorExpression is the vector expression.
-    * 
-    * \param expression is the vector expression.
-    * \param begin beginning of the index range
-    * \param end end of the index range.
+    * \tparam Type The scan type - either \e Inclusive or \e Exclusive.
+    *
+    * \param expression A vector expression for which scan is computed and
+    *                   stored in this vector.
+    * \param begin The beginning of the vector sub-interval. It is 0 by
+    *              default.
+    * \param end The end of the vector sub-interval. The default value is 0
+    *            which is, however, replaced with the array size.
     */
    template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive,
              typename VectorExpression >
-   void prefixSum( const VectorExpression& expression, IndexType begin = 0, IndexType end = 0 );
+   void scan( const VectorExpression& expression, IndexType begin = 0, IndexType end = 0 );
 
    /**
-    * \brief Computes segmented prefix sum of a vector expression.
+    * \brief Computes the segmented scan (prefix sum) of a vector expression.
     *
-    * Computes segmented prefix sum for elements within the index range [ \e begin to \e end ).
-    * The other elements of this vector remain unchanged. Whole vector expression is assumed
-    * by default, i.e. when \e begin and \e end are set to zero.
+    * By default, segmented scan is computed for the whole vector. If \e begin
+    * or \e end is set to a non-zero value, only elements in the sub-interval
+    * `[begin, end)` are scanned.
     *
-    * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive.
-    * \tparam VectorExpression is the vector expression.
-    * \tparam FlagsArray is an array type describing beginnings of the segments.
-    * 
-    * \param expression is the vector expression.
-    * \param flags is an array having `1` at the beginning of each segment and `0` on any other position
-    * \param begin beginning of the index range
-    * \param end end of the index range.
+    * \tparam Type The scan type - either \e Inclusive or \e Exclusive.
+    *
+    * \param expression A vector expression for which scan is computed and
+    *                   stored in this vector.
+    * \param flags A binary array where ones indicate the beginning of each
+    *              segment.
+    * \param begin The beginning of the vector sub-interval. It is 0 by
+    *              default.
+    * \param end The end of the vector sub-interval. The default value is 0
+    *            which is, however, replaced with the array size.
     */
    template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive,
              typename VectorExpression,
              typename FlagsArray >
-   void segmentedPrefixSum( const VectorExpression& expression, FlagsArray& flags, IndexType begin = 0, IndexType end = 0 );
+   void segmentedScan( const VectorExpression& expression, FlagsArray& flags, IndexType begin = 0, IndexType end = 0 );
 };
 
 } // namespace Containers
diff --git a/src/TNL/Containers/VectorView.hpp b/src/TNL/Containers/VectorView.hpp
index 490288e6b..2c1cd02c8 100644
--- a/src/TNL/Containers/VectorView.hpp
+++ b/src/TNL/Containers/VectorView.hpp
@@ -108,7 +108,7 @@ template< typename Real,
    template< Algorithms::ScanType Type >
 void
 VectorView< Real, Device, Index >::
-prefixSum( IndexType begin, IndexType end )
+scan( IndexType begin, IndexType end )
 {
    if( end == 0 )
       end = this->getSize();
@@ -122,7 +122,7 @@ template< typename Real,
              typename FlagsArray >
 void
 VectorView< Real, Device, Index >::
-segmentedPrefixSum( FlagsArray& flags, IndexType begin, IndexType end )
+segmentedScan( FlagsArray& flags, IndexType begin, IndexType end )
 {
    if( end == 0 )
       end = this->getSize();
@@ -136,9 +136,9 @@ template< typename Real,
              typename VectorExpression >
 void
 VectorView< Real, Device, Index >::
-prefixSum( const VectorExpression& expression, IndexType begin, IndexType end )
+scan( const VectorExpression& expression, IndexType begin, IndexType end )
 {
-   throw Exceptions::NotImplementedError( "Prefix sum with vector expressions is not implemented." );
+   throw Exceptions::NotImplementedError( "Scan (prefix sum) with vector expressions is not implemented." );
 }
 
 template< typename Real,
@@ -149,9 +149,9 @@ template< typename Real,
              typename FlagsArray >
 void
 VectorView< Real, Device, Index >::
-segmentedPrefixSum( const VectorExpression& expression, FlagsArray& flags, IndexType begin, IndexType end )
+segmentedScan( const VectorExpression& expression, FlagsArray& flags, IndexType begin, IndexType end )
 {
-   throw Exceptions::NotImplementedError( "Prefix sum with vector expressions is not implemented." );
+   throw Exceptions::NotImplementedError( "Segmented scan (prefix sum) with vector expressions is not implemented." );
 }
 
 } // namespace Containers
diff --git a/src/TNL/Matrices/BiEllpack_impl.h b/src/TNL/Matrices/BiEllpack_impl.h
index 53f61903e..51646152e 100644
--- a/src/TNL/Matrices/BiEllpack_impl.h
+++ b/src/TNL/Matrices/BiEllpack_impl.h
@@ -94,7 +94,7 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
 	//DeviceDependentCode::performRowBubbleSort( *this, rowLengths );
 	//DeviceDependentCode::computeColumnSizes( *this, rowLengths );
 
-	this->groupPointers.template prefixSum< Algorithms::ScanType::Exclusive >();
+	this->groupPointers.template scan< Algorithms::ScanType::Exclusive >();
 
 	// uncomment to perform structure test
 	//DeviceDependentCode::verifyRowPerm( *this, rowLengths );
diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/CSR_impl.h
index 9d8fd6456..327d25002 100644
--- a/src/TNL/Matrices/CSR_impl.h
+++ b/src/TNL/Matrices/CSR_impl.h
@@ -88,7 +88,7 @@ void CSR< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLeng
    rowPtrs.bind( this->rowPointers.getData(), this->getRows() );
    rowPtrs = rowLengths;
    this->rowPointers.setElement( this->rows, 0 );
-   this->rowPointers.template prefixSum< Algorithms::ScanType::Exclusive >();
+   this->rowPointers.template scan< Algorithms::ScanType::Exclusive >();
    this->maxRowLength = max( rowLengths );
 
    /****
diff --git a/src/TNL/Matrices/ChunkedEllpack_impl.h b/src/TNL/Matrices/ChunkedEllpack_impl.h
index 89e525e87..48119c659 100644
--- a/src/TNL/Matrices/ChunkedEllpack_impl.h
+++ b/src/TNL/Matrices/ChunkedEllpack_impl.h
@@ -232,7 +232,7 @@ void ChunkedEllpack< Real, Device, Index >::setCompressedRowLengths( ConstCompre
       this->rowPointers.setElement( 0, 0 );
       for( IndexType sliceIndex = 0; sliceIndex < numberOfSlices; sliceIndex++ )
          this->setSlice( rowLengths, sliceIndex, elementsToAllocation );
-      this->rowPointers.prefixSum();
+      this->rowPointers.scan();
    }
    
 //   std::cout << "\ngetRowLength after first if: " << std::endl;
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h
index 00df43cd4..c403fd4c8 100644
--- a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h
+++ b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h
@@ -58,7 +58,7 @@ void SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::setCompressedRowL
 
    this->maxRowLength = max( rowLengths );
 
-   this->slicePointers.template prefixSum< Algorithms::ScanType::Exclusive >();
+   this->slicePointers.template scan< Algorithms::ScanType::Exclusive >();
    this->allocateMatrixElements( this->slicePointers.getElement( slices ) );
 }
 
diff --git a/src/TNL/Matrices/SlicedEllpack_impl.h b/src/TNL/Matrices/SlicedEllpack_impl.h
index aa76caf7b..45e8cdee7 100644
--- a/src/TNL/Matrices/SlicedEllpack_impl.h
+++ b/src/TNL/Matrices/SlicedEllpack_impl.h
@@ -79,7 +79,7 @@ void SlicedEllpack< Real, Device, Index, SliceSize >::setCompressedRowLengths( C
 
    this->maxRowLength = max( rowLengths );
 
-   this->slicePointers.template prefixSum< Algorithms::ScanType::Exclusive >();
+   this->slicePointers.template scan< Algorithms::ScanType::Exclusive >();
    this->allocateMatrixElements( this->slicePointers.getElement( slices ) );
 }
 
diff --git a/src/UnitTests/Containers/DistributedVectorTest.h b/src/UnitTests/Containers/DistributedVectorTest.h
index 69dd543de..2a1834f31 100644
--- a/src/UnitTests/Containers/DistributedVectorTest.h
+++ b/src/UnitTests/Containers/DistributedVectorTest.h
@@ -54,7 +54,7 @@ protected:
    const int nproc = CommunicatorType::GetSize(group);
 
    // should be small enough to have fast tests, but large enough to test
-   // prefix-sum with multiple CUDA grids
+   // scan with multiple CUDA grids
    const int globalSize = 10000 * nproc;
 
    DistributedVectorTest()
@@ -80,7 +80,7 @@ using DistributedVectorTypes = ::testing::Types<
 
 TYPED_TEST_SUITE( DistributedVectorTest, DistributedVectorTypes );
 
-TYPED_TEST( DistributedVectorTest, prefixSum )
+TYPED_TEST( DistributedVectorTest, scan )
 {
    using RealType = typename TestFixture::DistributedVectorType::RealType;
    using DeviceType = typename TestFixture::DistributedVectorType::DeviceType;
@@ -97,21 +97,21 @@ TYPED_TEST( DistributedVectorTest, prefixSum )
 
    setConstantSequence( v, 0 );
    v_host = -1;
-   v.prefixSum();
+   v.scan();
    v_host = v;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;
 
    setConstantSequence( v, 1 );
    v_host = -1;
-   v.prefixSum();
+   v.scan();
    v_host = v_view;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i;
 
    setLinearSequence( v );
    v_host = -1;
-   v.prefixSum();
+   v.scan();
    v_host = v;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i;
@@ -119,21 +119,21 @@ TYPED_TEST( DistributedVectorTest, prefixSum )
    // test views
    setConstantSequence( v, 0 );
    v_host = -1;
-   v_view.prefixSum();
+   v_view.scan();
    v_host = v;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;
 
    setConstantSequence( v, 1 );
    v_host = -1;
-   v_view.prefixSum();
+   v_view.scan();
    v_host = v_view;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i;
 
    setLinearSequence( v );
    v_host = -1;
-   v_view.prefixSum();
+   v_view.scan();
    v_host = v;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i;
@@ -147,7 +147,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum )
 
       setConstantSequence( v, 0 );
       v_host = -1;
-      v.prefixSum();
+      v.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
@@ -155,7 +155,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum )
 
       setConstantSequence( v, 1 );
       v_host = -1;
-      v.prefixSum();
+      v.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v_view;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
@@ -163,7 +163,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum )
 
       setLinearSequence( v );
       v_host = -1;
-      v.prefixSum();
+      v.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
@@ -172,7 +172,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum )
       // test views
       setConstantSequence( v, 0 );
       v_host = -1;
-      v_view.prefixSum();
+      v_view.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
@@ -180,7 +180,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum )
 
       setConstantSequence( v, 1 );
       v_host = -1;
-      v_view.prefixSum();
+      v_view.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v_view;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
@@ -188,7 +188,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum )
 
       setLinearSequence( v );
       v_host = -1;
-      v_view.prefixSum();
+      v_view.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
@@ -199,7 +199,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum )
    }
 }
 
-TYPED_TEST( DistributedVectorTest, exclusivePrefixSum )
+TYPED_TEST( DistributedVectorTest, exclusiveScan )
 {
    using RealType = typename TestFixture::DistributedVectorType::RealType;
    using DeviceType = typename TestFixture::DistributedVectorType::DeviceType;
@@ -216,21 +216,21 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum )
 
    setConstantSequence( v, 0 );
    v_host = -1;
-   v.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;
 
    setConstantSequence( v, 1 );
    v_host = -1;
-   v.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v_view;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], i ) << "i = " << i;
 
    setLinearSequence( v );
    v_host = -1;
-   v.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i;
@@ -238,21 +238,21 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum )
    // test views
    setConstantSequence( v, 0 );
    v_host = -1;
-   v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v_view.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;
 
    setConstantSequence( v, 1 );
    v_host = -1;
-   v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v_view.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v_view;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], i ) << "i = " << i;
 
    setLinearSequence( v );
    v_host = -1;
-   v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v_view.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i;
@@ -266,7 +266,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum )
 
       setConstantSequence( v, 0 );
       v_host = -1;
-      v.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
@@ -274,7 +274,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum )
 
       setConstantSequence( v, 1 );
       v_host = -1;
-      v.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v_view;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
@@ -282,7 +282,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum )
 
       setLinearSequence( v );
       v_host = -1;
-      v.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
@@ -291,7 +291,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum )
       // test views
       setConstantSequence( v, 0 );
       v_host = -1;
-      v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v_view.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
@@ -299,7 +299,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum )
 
       setConstantSequence( v, 1 );
       v_host = -1;
-      v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v_view.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v_view;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
@@ -307,7 +307,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum )
 
       setLinearSequence( v );
       v_host = -1;
-      v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v_view.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
diff --git a/src/UnitTests/Containers/VectorPrefixSumTest.h b/src/UnitTests/Containers/VectorPrefixSumTest.h
index 67281dba0..7f2151c5e 100644
--- a/src/UnitTests/Containers/VectorPrefixSumTest.h
+++ b/src/UnitTests/Containers/VectorPrefixSumTest.h
@@ -17,7 +17,7 @@
 // and large enough to require multiple CUDA blocks for reduction
 constexpr int VECTOR_TEST_SIZE = 10000;
 
-TYPED_TEST( VectorTest, prefixSum )
+TYPED_TEST( VectorTest, scan )
 {
    using VectorType = typename TestFixture::VectorType;
    using ViewType = typename TestFixture::ViewType;
@@ -37,21 +37,21 @@ TYPED_TEST( VectorTest, prefixSum )
 
    setConstantSequence( v, 0 );
    v_host = -1;
-   v.prefixSum();
+   v.scan();
    v_host = v;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;
 
    setConstantSequence( v, 1 );
    v_host = -1;
-   v.prefixSum();
+   v.scan();
    v_host = v_view;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i;
 
    setLinearSequence( v );
    v_host = -1;
-   v.prefixSum();
+   v.scan();
    v_host = v;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i;
@@ -59,21 +59,21 @@ TYPED_TEST( VectorTest, prefixSum )
    // test views
    setConstantSequence( v, 0 );
    v_host = -1;
-   v_view.prefixSum();
+   v_view.scan();
    v_host = v;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;
 
    setConstantSequence( v, 1 );
    v_host = -1;
-   v_view.prefixSum();
+   v_view.scan();
    v_host = v_view;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i;
 
    setLinearSequence( v );
    v_host = -1;
-   v_view.prefixSum();
+   v_view.scan();
    v_host = v;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i;
@@ -87,7 +87,7 @@ TYPED_TEST( VectorTest, prefixSum )
 
       setConstantSequence( v, 0 );
       v_host = -1;
-      v.prefixSum();
+      v.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = 0; i < size; i++ )
@@ -95,7 +95,7 @@ TYPED_TEST( VectorTest, prefixSum )
 
       setConstantSequence( v, 1 );
       v_host = -1;
-      v.prefixSum();
+      v.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v_view;
       for( int i = 0; i < size; i++ )
@@ -103,7 +103,7 @@ TYPED_TEST( VectorTest, prefixSum )
 
       setLinearSequence( v );
       v_host = -1;
-      v.prefixSum();
+      v.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = 0; i < size; i++ )
@@ -112,7 +112,7 @@ TYPED_TEST( VectorTest, prefixSum )
       // test views
       setConstantSequence( v, 0 );
       v_host = -1;
-      v_view.prefixSum();
+      v_view.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = 0; i < size; i++ )
@@ -120,7 +120,7 @@ TYPED_TEST( VectorTest, prefixSum )
 
       setConstantSequence( v, 1 );
       v_host = -1;
-      v_view.prefixSum();
+      v_view.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v_view;
       for( int i = 0; i < size; i++ )
@@ -128,7 +128,7 @@ TYPED_TEST( VectorTest, prefixSum )
 
       setLinearSequence( v );
       v_host = -1;
-      v_view.prefixSum();
+      v_view.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = 0; i < size; i++ )
@@ -160,21 +160,21 @@ TYPED_TEST( VectorTest, exclusiveScan )
 
    setConstantSequence( v, 0 );
    v_host = -1;
-   v.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;
 
    setConstantSequence( v, 1 );
    v_host = -1;
-   v.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], i ) << "i = " << i;
 
    setLinearSequence( v );
    v_host = -1;
-   v.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i;
@@ -182,21 +182,21 @@ TYPED_TEST( VectorTest, exclusiveScan )
    // test views
    setConstantSequence( v, 0 );
    v_host = -1;
-   v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v_view.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;
 
    setConstantSequence( v, 1 );
    v_host = -1;
-   v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v_view.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], i ) << "i = " << i;
 
    setLinearSequence( v );
    v_host = -1;
-   v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v_view.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i;
@@ -210,7 +210,7 @@ TYPED_TEST( VectorTest, exclusiveScan )
 
       setConstantSequence( v, 0 );
       v_host = -1;
-      v.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
       v_host = v;
       for( int i = 0; i < size; i++ )
@@ -218,7 +218,7 @@ TYPED_TEST( VectorTest, exclusiveScan )
 
       setConstantSequence( v, 1 );
       v_host = -1;
-      v.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
       v_host = v;
       for( int i = 0; i < size; i++ )
@@ -226,7 +226,7 @@ TYPED_TEST( VectorTest, exclusiveScan )
 
       setLinearSequence( v );
       v_host = -1;
-      v.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
       v_host = v;
       for( int i = 0; i < size; i++ )
@@ -235,7 +235,7 @@ TYPED_TEST( VectorTest, exclusiveScan )
       // test views
       setConstantSequence( v, 0 );
       v_host = -1;
-      v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v_view.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
       v_host = v;
       for( int i = 0; i < size; i++ )
@@ -243,7 +243,7 @@ TYPED_TEST( VectorTest, exclusiveScan )
 
       setConstantSequence( v, 1 );
       v_host = -1;
-      v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v_view.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
       v_host = v;
       for( int i = 0; i < size; i++ )
@@ -251,7 +251,7 @@ TYPED_TEST( VectorTest, exclusiveScan )
 
       setLinearSequence( v );
       v_host = -1;
-      v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v_view.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
       v_host = v;
       for( int i = 0; i < size; i++ )
@@ -262,7 +262,7 @@ TYPED_TEST( VectorTest, exclusiveScan )
    }
 }
 
-// TODO: test prefix sum with custom begin and end parameters
+// TODO: test scan with custom begin and end parameters
 
 
 template< typename FlagsView >
-- 
GitLab


From 87bf36052ded07dc9172e3815f60793e084ca481 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sun, 13 Oct 2019 09:45:27 +0200
Subject: [PATCH 28/35] Updated documentation in README.md

---
 README.md | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index a8a4b749e..371782d71 100644
--- a/README.md
+++ b/README.md
@@ -12,13 +12,20 @@ Similarly to the STL, features provided by the TNL can be grouped into
 several modules:
 
 - _Core concepts_.
-  The main concept used in the TNL is the `Device` type which is used in most of
-  the other parts of the library. For data structures such as `Array` it
-  specifies where the data should be allocated, whereas for algorithms such as
-  `ParallelFor` it specifies how the algorithm should be executed.
+  The main concepts used in TNL are the _memory space_, which represents the
+  part of memory where given data is allocated, and the _execution model_,
+  which represents the way how given (typically parallel) algorithm is executed.
+  For example, data can be allocated in the main system memory, in the GPU
+  memory, or using the CUDA Unified Memory which can be accessed from the host
+  as well as from the GPU. On the other hand, algorithms can be executed using
+  either the host CPU or an accelerator (GPU), and for each there are many ways
+  to manage parallel execution. The usage of memory spaces is abstracted with
+  [allocators][allocators] and the execution model is represented by
+  [devices][devices]. See the [Core concepts][core concepts] page for details.
 - _[Containers][containers]_.
   TNL provides generic containers such as array, multidimensional array or array
-  views, which abstract data management on different hardware architectures.
+  views, which abstract data management and execution of common operations on
+  different hardware architectures.
 - _Linear algebra._
   TNL provides generic data structures and algorithms for linear algebra, such
   as [vectors][vectors], [sparse matrices][matrices],
@@ -39,6 +46,9 @@ several modules:
   [libpng](http://www.libpng.org/pub/png/libpng.html) for PNG files, or
   [libjpeg](http://libjpeg.sourceforge.net/) for JPEG files.
 
+[allocators]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/namespaceTNL_1_1Allocators.html
+[devices]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/namespaceTNL_1_1Devices.html
+[core concepts]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/core_concepts.html
 [containers]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/namespaceTNL_1_1Containers.html
 [vectors]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/classTNL_1_1Containers_1_1Vector.html
 [matrices]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/namespaceTNL_1_1Matrices.html
-- 
GitLab


From 1c31eac955a47a63233c0e8b7e714c35c1c80cfe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Fri, 18 Oct 2019 15:05:44 +0200
Subject: [PATCH 29/35] Swapped template parameters for methods in
 Meshes::Traverser so that UserData can be deduced

---
 src/TNL/Meshes/MeshDetails/Traverser_impl.h | 36 +++++++++----------
 src/TNL/Meshes/Traverser.h                  | 40 ++++++++++-----------
 2 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/src/TNL/Meshes/MeshDetails/Traverser_impl.h b/src/TNL/Meshes/MeshDetails/Traverser_impl.h
index 33832d4f1..b1d4233d9 100644
--- a/src/TNL/Meshes/MeshDetails/Traverser_impl.h
+++ b/src/TNL/Meshes/MeshDetails/Traverser_impl.h
@@ -22,12 +22,12 @@ namespace Meshes {
 template< typename Mesh,
           typename MeshEntity,
           int EntitiesDimension >
-   template< typename UserData,
-             typename EntitiesProcessor >
+   template< typename EntitiesProcessor,
+             typename UserData >
 void
 Traverser< Mesh, MeshEntity, EntitiesDimension >::
 processBoundaryEntities( const MeshPointer& meshPointer,
-                         UserData& userData ) const
+                         UserData userData ) const
 {
    auto entitiesCount = meshPointer->template getBoundaryEntitiesCount< EntitiesDimension >();
 #ifdef HAVE_OPENMP
@@ -44,12 +44,12 @@ processBoundaryEntities( const MeshPointer& meshPointer,
 template< typename Mesh,
           typename MeshEntity,
           int EntitiesDimension >
-   template< typename UserData,
-             typename EntitiesProcessor >
+   template< typename EntitiesProcessor,
+             typename UserData >
 void
 Traverser< Mesh, MeshEntity, EntitiesDimension >::
 processInteriorEntities( const MeshPointer& meshPointer,
-                         UserData& userData ) const
+                         UserData userData ) const
 {
    auto entitiesCount = meshPointer->template getInteriorEntitiesCount< EntitiesDimension >();
 #ifdef HAVE_OPENMP
@@ -66,12 +66,12 @@ processInteriorEntities( const MeshPointer& meshPointer,
 template< typename Mesh,
           typename MeshEntity,
           int EntitiesDimension >
-   template< typename UserData,
-             typename EntitiesProcessor >
+   template< typename EntitiesProcessor,
+             typename UserData >
 void
 Traverser< Mesh, MeshEntity, EntitiesDimension >::
 processAllEntities( const MeshPointer& meshPointer,
-                    UserData& userData ) const
+                    UserData userData ) const
 {
    auto entitiesCount = meshPointer->template getEntitiesCount< EntitiesDimension >();
 #ifdef HAVE_OPENMP
@@ -149,12 +149,12 @@ MeshTraverserAllEntitiesKernel( const Mesh* mesh,
 template< typename MeshConfig,
           typename MeshEntity,
           int EntitiesDimension >
-   template< typename UserData,
-             typename EntitiesProcessor >
+   template< typename EntitiesProcessor,
+             typename UserData >
 void
 Traverser< Mesh< MeshConfig, Devices::Cuda >, MeshEntity, EntitiesDimension >::
 processBoundaryEntities( const MeshPointer& meshPointer,
-                         UserData& userData ) const
+                         UserData userData ) const
 {
 #ifdef HAVE_CUDA
    auto entitiesCount = meshPointer->template getBoundaryEntitiesCount< EntitiesDimension >();
@@ -180,12 +180,12 @@ processBoundaryEntities( const MeshPointer& meshPointer,
 template< typename MeshConfig,
           typename MeshEntity,
           int EntitiesDimension >
-   template< typename UserData,
-             typename EntitiesProcessor >
+   template< typename EntitiesProcessor,
+             typename UserData >
 void
 Traverser< Mesh< MeshConfig, Devices::Cuda >, MeshEntity, EntitiesDimension >::
 processInteriorEntities( const MeshPointer& meshPointer,
-                         UserData& userData ) const
+                         UserData userData ) const
 {
 #ifdef HAVE_CUDA
    auto entitiesCount = meshPointer->template getInteriorEntitiesCount< EntitiesDimension >();
@@ -211,12 +211,12 @@ processInteriorEntities( const MeshPointer& meshPointer,
 template< typename MeshConfig,
           typename MeshEntity,
           int EntitiesDimension >
-   template< typename UserData,
-             typename EntitiesProcessor >
+   template< typename EntitiesProcessor,
+             typename UserData >
 void
 Traverser< Mesh< MeshConfig, Devices::Cuda >, MeshEntity, EntitiesDimension >::
 processAllEntities( const MeshPointer& meshPointer,
-                    UserData& userData ) const
+                    UserData userData ) const
 {
 #ifdef HAVE_CUDA
    auto entitiesCount = meshPointer->template getEntitiesCount< EntitiesDimension >();
diff --git a/src/TNL/Meshes/Traverser.h b/src/TNL/Meshes/Traverser.h
index 017084ae8..4cf707386 100644
--- a/src/TNL/Meshes/Traverser.h
+++ b/src/TNL/Meshes/Traverser.h
@@ -23,23 +23,23 @@ class Traverser
 {
    public:
       using MeshType = Mesh;
-      using MeshPointer = Pointers::SharedPointer<  MeshType >;
+      using MeshPointer = Pointers::SharedPointer< MeshType >;
       using DeviceType = typename MeshType::DeviceType;
 
-      template< typename UserData,
-                typename EntitiesProcessor >
+      template< typename EntitiesProcessor,
+                typename UserData >
       void processBoundaryEntities( const MeshPointer& meshPointer,
-                                    UserData& userData ) const;
+                                    UserData userData ) const;
 
-      template< typename UserData,
-                typename EntitiesProcessor >
+      template< typename EntitiesProcessor,
+                typename UserData >
       void processInteriorEntities( const MeshPointer& meshPointer,
-                                    UserData& userData ) const;
+                                    UserData userData ) const;
 
-      template< typename UserData,
-                typename EntitiesProcessor >
+      template< typename EntitiesProcessor,
+                typename UserData >
       void processAllEntities( const MeshPointer& meshPointer,
-                               UserData& userData ) const;
+                               UserData userData ) const;
 };
 
 template< typename MeshConfig,
@@ -49,23 +49,23 @@ class Traverser< Mesh< MeshConfig, Devices::Cuda >, MeshEntity, EntitiesDimensio
 {
    public:
       using MeshType = Mesh< MeshConfig, Devices::Cuda >;
-      using MeshPointer = Pointers::SharedPointer<  MeshType >;
+      using MeshPointer = Pointers::SharedPointer< MeshType >;
       using DeviceType = typename MeshType::DeviceType;
 
-      template< typename UserData,
-                typename EntitiesProcessor >
+      template< typename EntitiesProcessor,
+                typename UserData >
       void processBoundaryEntities( const MeshPointer& meshPointer,
-                                       UserData& userData ) const;
+                                       UserData userData ) const;
 
-      template< typename UserData,
-                typename EntitiesProcessor >
+      template< typename EntitiesProcessor,
+                typename UserData >
       void processInteriorEntities( const MeshPointer& meshPointer,
-                                    UserData& userData ) const;
+                                    UserData userData ) const;
 
-      template< typename UserData,
-                typename EntitiesProcessor >
+      template< typename EntitiesProcessor,
+                typename UserData >
       void processAllEntities( const MeshPointer& meshPointer,
-                               UserData& userData ) const;
+                               UserData userData ) const;
 };
 
 } // namespace Meshes
-- 
GitLab


From 11ba9c9f1f009b0c2ec3deb5e3911a9324bd4e0b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Fri, 18 Oct 2019 15:07:08 +0200
Subject: [PATCH 30/35] Added MeshTraverserTest

---
 src/UnitTests/Meshes/CMakeLists.txt        |   9 +
 src/UnitTests/Meshes/MeshTraverserTest.cpp |   1 +
 src/UnitTests/Meshes/MeshTraverserTest.cu  |   1 +
 src/UnitTests/Meshes/MeshTraverserTest.h   | 434 +++++++++++++++++++++
 4 files changed, 445 insertions(+)
 create mode 100644 src/UnitTests/Meshes/MeshTraverserTest.cpp
 create mode 100644 src/UnitTests/Meshes/MeshTraverserTest.cu
 create mode 100644 src/UnitTests/Meshes/MeshTraverserTest.h

diff --git a/src/UnitTests/Meshes/CMakeLists.txt b/src/UnitTests/Meshes/CMakeLists.txt
index c71bde352..91bf37215 100644
--- a/src/UnitTests/Meshes/CMakeLists.txt
+++ b/src/UnitTests/Meshes/CMakeLists.txt
@@ -10,6 +10,10 @@ if( ${BUILD_CUDA} AND ${CUDA_VERSION_MAJOR} GREATER_EQUAL 9 )
                         OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( MeshTest ${GTEST_BOTH_LIBRARIES} )
 
+   CUDA_ADD_EXECUTABLE( MeshTraverserTest MeshTraverserTest.cu
+                        OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MeshTraverserTest ${GTEST_BOTH_LIBRARIES} )
+
    CUDA_ADD_EXECUTABLE( MeshOrderingTest MeshOrderingTest.cu
                         OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( MeshOrderingTest ${GTEST_BOTH_LIBRARIES} )
@@ -18,6 +22,10 @@ else()
    TARGET_COMPILE_OPTIONS( MeshTest PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( MeshTest ${GTEST_BOTH_LIBRARIES} )
 
+   ADD_EXECUTABLE( MeshTraverserTest MeshTraverserTest.cpp )
+   TARGET_COMPILE_OPTIONS( MeshTraverserTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MeshTraverserTest ${GTEST_BOTH_LIBRARIES} )
+
    ADD_EXECUTABLE( MeshOrderingTest MeshOrderingTest.cpp )
    TARGET_COMPILE_OPTIONS( MeshOrderingTest PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( MeshOrderingTest ${GTEST_BOTH_LIBRARIES} )
@@ -30,6 +38,7 @@ TARGET_LINK_LIBRARIES( MeshEntityTest ${GTEST_BOTH_LIBRARIES} )
 
 ADD_TEST( BoundaryTagsTest ${EXECUTABLE_OUTPUT_PATH}/BoundaryTagsTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( MeshTest ${EXECUTABLE_OUTPUT_PATH}/MeshTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( MeshTraverserTest ${EXECUTABLE_OUTPUT_PATH}/MeshTraverserTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( MeshOrderingTest ${EXECUTABLE_OUTPUT_PATH}/MeshOrderingTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( MeshEntityTest ${EXECUTABLE_OUTPUT_PATH}/MeshEntityTest${CMAKE_EXECUTABLE_SUFFIX} )
 
diff --git a/src/UnitTests/Meshes/MeshTraverserTest.cpp b/src/UnitTests/Meshes/MeshTraverserTest.cpp
new file mode 100644
index 000000000..426d5fef5
--- /dev/null
+++ b/src/UnitTests/Meshes/MeshTraverserTest.cpp
@@ -0,0 +1 @@
+#include "MeshTraverserTest.h"
diff --git a/src/UnitTests/Meshes/MeshTraverserTest.cu b/src/UnitTests/Meshes/MeshTraverserTest.cu
new file mode 100644
index 000000000..426d5fef5
--- /dev/null
+++ b/src/UnitTests/Meshes/MeshTraverserTest.cu
@@ -0,0 +1 @@
+#include "MeshTraverserTest.h"
diff --git a/src/UnitTests/Meshes/MeshTraverserTest.h b/src/UnitTests/Meshes/MeshTraverserTest.h
new file mode 100644
index 000000000..b6c8208ad
--- /dev/null
+++ b/src/UnitTests/Meshes/MeshTraverserTest.h
@@ -0,0 +1,434 @@
+#pragma once
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+#include <TNL/Meshes/Mesh.h>
+#include <TNL/Meshes/MeshEntity.h>
+#include <TNL/Meshes/DefaultConfig.h>
+#include <TNL/Meshes/Topologies/Quadrilateral.h>
+#include <TNL/Meshes/Topologies/Hexahedron.h>
+#include <TNL/Meshes/MeshBuilder.h>
+#include <TNL/Meshes/Traverser.h>
+
+namespace MeshTest {
+
+using namespace TNL;
+using namespace TNL::Meshes;
+
+using RealType = double;
+using Device = Devices::Host;
+using IndexType = int;
+
+static const char* TEST_FILE_NAME = "test_MeshTest.tnl";
+
+// FIXME: Traverser does not work with Id = void
+//class TestQuadrilateralMeshConfig : public DefaultConfig< Topologies::Quadrilateral >
+class TestQuadrilateralMeshConfig : public DefaultConfig< Topologies::Quadrilateral, 2, double, int, int, int >
+{
+public:
+   static constexpr bool entityStorage( int dimensions ) { return true; }
+   template< typename EntityTopology > static constexpr bool subentityStorage( EntityTopology, int SubentityDimensions ) { return true; }
+   template< typename EntityTopology > static constexpr bool subentityOrientationStorage( EntityTopology, int SubentityDimensions ) { return ( SubentityDimensions % 2 != 0 ); }
+   template< typename EntityTopology > static constexpr bool superentityStorage( EntityTopology, int SuperentityDimensions ) { return true; }
+};
+
+// FIXME: Traverser does not work with Id = void
+//class TestHexahedronMeshConfig : public DefaultConfig< Topologies::Hexahedron >
+class TestHexahedronMeshConfig : public DefaultConfig< Topologies::Hexahedron, 3, double, int, int, int >
+{
+public:
+   static constexpr bool entityStorage( int dimensions ) { return true; }
+   template< typename EntityTopology > static constexpr bool subentityStorage( EntityTopology, int SubentityDimensions ) { return true; }
+   template< typename EntityTopology > static constexpr bool subentityOrientationStorage( EntityTopology, int SubentityDimensions ) {  return ( SubentityDimensions % 2 != 0 ); }
+   template< typename EntityTopology > static constexpr bool superentityStorage( EntityTopology, int SuperentityDimensions ) { return true; }
+};
+
+struct TestEntitiesProcessor
+{
+   template< typename Mesh, typename UserData, typename Entity >
+   __cuda_callable__
+   static void processEntity( const Mesh& mesh, UserData& userData, const Entity& entity )
+   {
+      userData[ entity.getIndex() ] += 1;
+   }
+};
+
+template< typename EntityType, typename DeviceMeshPointer, typename HostArray >
+void testCudaTraverser( const DeviceMeshPointer& deviceMeshPointer,
+                        const HostArray& host_array_boundary,
+                        const HostArray& host_array_interior,
+                        const HostArray& host_array_all )
+{
+   using MeshType = typename DeviceMeshPointer::ObjectType;
+   Traverser< MeshType, EntityType > traverser;
+
+   Containers::Array< int, Devices::Cuda > array_boundary( deviceMeshPointer->template getEntitiesCount< EntityType >() );
+   Containers::Array< int, Devices::Cuda > array_interior( deviceMeshPointer->template getEntitiesCount< EntityType >() );
+   Containers::Array< int, Devices::Cuda > array_all     ( deviceMeshPointer->template getEntitiesCount< EntityType >() );
+
+   array_boundary.setValue( 0 );
+   array_interior.setValue( 0 );
+   array_all     .setValue( 0 );
+
+   traverser.template processBoundaryEntities< TestEntitiesProcessor >( deviceMeshPointer, array_boundary.getView() );
+   traverser.template processInteriorEntities< TestEntitiesProcessor >( deviceMeshPointer, array_interior.getView() );
+   traverser.template processAllEntities     < TestEntitiesProcessor >( deviceMeshPointer, array_all.getView() );
+
+   EXPECT_EQ( array_boundary, host_array_boundary );
+   EXPECT_EQ( array_interior, host_array_interior );
+   EXPECT_EQ( array_all,      host_array_all      );
+}
+
+TEST( MeshTest, RegularMeshOfQuadrilateralsTest )
+{
+   using QuadrilateralMeshEntityType = MeshEntity< TestQuadrilateralMeshConfig, Devices::Host, Topologies::Quadrilateral >;
+   using EdgeMeshEntityType = typename QuadrilateralMeshEntityType::SubentityTraits< 1 >::SubentityType;
+   using VertexMeshEntityType = typename QuadrilateralMeshEntityType::SubentityTraits< 0 >::SubentityType;
+
+   using PointType = typename VertexMeshEntityType::PointType;
+   static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value,
+                  "unexpected PointType" );
+
+   const IndexType xSize( 3 ), ySize( 4 );
+   const RealType width( 1.0 ), height( 1.0 );
+   const RealType hx( width / ( RealType ) xSize ),
+                  hy( height / ( RealType ) ySize );
+   const IndexType numberOfCells = xSize * ySize;
+   const IndexType numberOfVertices = ( xSize + 1 ) * ( ySize + 1 );
+
+   using TestQuadrilateralMesh = Mesh< TestQuadrilateralMeshConfig >;
+   Pointers::SharedPointer< TestQuadrilateralMesh > meshPointer;
+   MeshBuilder< TestQuadrilateralMesh > meshBuilder;
+   meshBuilder.setPointsCount( numberOfVertices );
+   meshBuilder.setCellsCount( numberOfCells );
+
+   /****
+    * Setup vertices
+    */
+   for( IndexType j = 0; j <= ySize; j++ )
+   for( IndexType i = 0; i <= xSize; i++ )
+      meshBuilder.setPoint( j * ( xSize + 1 ) + i, PointType( i * hx, j * hy ) );
+
+   /****
+    * Setup cells
+    */
+   IndexType cellIdx( 0 );
+   for( IndexType j = 0; j < ySize; j++ )
+   for( IndexType i = 0; i < xSize; i++ )
+   {
+      const IndexType vertex0 = j * ( xSize + 1 ) + i;
+      const IndexType vertex1 = j * ( xSize + 1 ) + i + 1;
+      const IndexType vertex2 = ( j + 1 ) * ( xSize + 1 ) + i + 1;
+      const IndexType vertex3 = ( j + 1 ) * ( xSize + 1 ) + i;
+
+      meshBuilder.getCellSeed( cellIdx   ).setCornerId( 0, vertex0 );
+      meshBuilder.getCellSeed( cellIdx   ).setCornerId( 1, vertex1 );
+      meshBuilder.getCellSeed( cellIdx   ).setCornerId( 2, vertex2 );
+      meshBuilder.getCellSeed( cellIdx++ ).setCornerId( 3, vertex3 );
+   }
+
+   ASSERT_TRUE( meshBuilder.build( *meshPointer ) );
+
+   // traversers for all test cases
+   Traverser< TestQuadrilateralMesh, QuadrilateralMeshEntityType > traverser_cells;
+   Traverser< TestQuadrilateralMesh, EdgeMeshEntityType > traverser_edges;
+   Traverser< TestQuadrilateralMesh, VertexMeshEntityType > traverser_vertices;
+
+   // arrays for all test cases
+   Containers::Array< int > array_cells_boundary( meshPointer->template getEntitiesCount< 2 >() );
+   Containers::Array< int > array_cells_interior( meshPointer->template getEntitiesCount< 2 >() );
+   Containers::Array< int > array_cells_all     ( meshPointer->template getEntitiesCount< 2 >() );
+
+   Containers::Array< int > array_edges_boundary( meshPointer->template getEntitiesCount< 1 >() );
+   Containers::Array< int > array_edges_interior( meshPointer->template getEntitiesCount< 1 >() );
+   Containers::Array< int > array_edges_all     ( meshPointer->template getEntitiesCount< 1 >() );
+
+   Containers::Array< int > array_vertices_boundary( meshPointer->template getEntitiesCount< 0 >() );
+   Containers::Array< int > array_vertices_interior( meshPointer->template getEntitiesCount< 0 >() );
+   Containers::Array< int > array_vertices_all     ( meshPointer->template getEntitiesCount< 0 >() );
+
+   // reset all arrays
+   array_cells_boundary.setValue( 0 );
+   array_cells_interior.setValue( 0 );
+   array_cells_all     .setValue( 0 );
+
+   array_edges_boundary.setValue( 0 );
+   array_edges_interior.setValue( 0 );
+   array_edges_all     .setValue( 0 );
+
+   array_vertices_boundary.setValue( 0 );
+   array_vertices_interior.setValue( 0 );
+   array_vertices_all     .setValue( 0 );
+
+   // traverse for all test cases
+   traverser_cells.template processBoundaryEntities< TestEntitiesProcessor >( meshPointer, array_cells_boundary.getView() );
+   traverser_cells.template processInteriorEntities< TestEntitiesProcessor >( meshPointer, array_cells_interior.getView() );
+   traverser_cells.template processAllEntities     < TestEntitiesProcessor >( meshPointer, array_cells_all.getView() );
+
+   traverser_edges.template processBoundaryEntities< TestEntitiesProcessor >( meshPointer, array_edges_boundary.getView() );
+   traverser_edges.template processInteriorEntities< TestEntitiesProcessor >( meshPointer, array_edges_interior.getView() );
+   traverser_edges.template processAllEntities     < TestEntitiesProcessor >( meshPointer, array_edges_all.getView() );
+
+   traverser_vertices.template processBoundaryEntities< TestEntitiesProcessor >( meshPointer, array_vertices_boundary.getView() );
+   traverser_vertices.template processInteriorEntities< TestEntitiesProcessor >( meshPointer, array_vertices_interior.getView() );
+   traverser_vertices.template processAllEntities     < TestEntitiesProcessor >( meshPointer, array_vertices_all.getView() );
+
+   // test traversing cells
+   for( IndexType j = 0; j < ySize; j++ )
+   for( IndexType i = 0; i < xSize; i++ )
+   {
+      const IndexType idx = j * xSize + i;
+      if( j == 0 || j == ySize - 1 || i == 0 || i == xSize - 1 ) {
+         EXPECT_EQ( array_cells_boundary[ idx ], 1 );
+         EXPECT_EQ( array_cells_interior[ idx ], 0 );
+      }
+      else {
+         EXPECT_EQ( array_cells_boundary[ idx ], 0 );
+         EXPECT_EQ( array_cells_interior[ idx ], 1 );
+      }
+      EXPECT_EQ( array_cells_all[ idx ], 1 );
+   }
+
+   // test traversing edges
+   // (edges are not numbered systematically, so we just compare with isBoundaryEntity)
+   for( IndexType idx = 0; idx < meshPointer->template getEntitiesCount< 1 >(); idx++ )
+   {
+      if( meshPointer->template isBoundaryEntity< 1 >( idx ) ) {
+         EXPECT_EQ( array_edges_boundary[ idx ], 1 );
+         EXPECT_EQ( array_edges_interior[ idx ], 0 );
+      }
+      else {
+         EXPECT_EQ( array_edges_boundary[ idx ], 0 );
+         EXPECT_EQ( array_edges_interior[ idx ], 1 );
+      }
+      EXPECT_EQ( array_edges_all[ idx ], 1 );
+   }
+
+   // test traversing vertices
+   for( IndexType j = 0; j <= ySize; j++ )
+   for( IndexType i = 0; i <= xSize; i++ )
+   {
+      const IndexType idx = j * (xSize + 1) + i;
+      if( j == 0 || j == ySize || i == 0 || i == xSize ) {
+         EXPECT_EQ( array_vertices_boundary[ idx ], 1 );
+         EXPECT_EQ( array_vertices_interior[ idx ], 0 );
+      }
+      else {
+         EXPECT_EQ( array_vertices_boundary[ idx ], 0 );
+         EXPECT_EQ( array_vertices_interior[ idx ], 1 );
+      }
+      EXPECT_EQ( array_vertices_all[ idx ], 1 );
+   }
+
+   // test traverser with CUDA
+#ifdef HAVE_CUDA
+   using DeviceMesh = Mesh< TestQuadrilateralMeshConfig, Devices::Cuda >;
+   Pointers::SharedPointer< DeviceMesh > deviceMeshPointer;
+   *deviceMeshPointer = *meshPointer;
+
+   testCudaTraverser< QuadrilateralMeshEntityType >( deviceMeshPointer, array_cells_boundary, array_cells_interior, array_cells_all );
+   testCudaTraverser< EdgeMeshEntityType          >( deviceMeshPointer, array_edges_boundary, array_edges_interior, array_edges_all );
+   testCudaTraverser< VertexMeshEntityType        >( deviceMeshPointer, array_vertices_boundary, array_vertices_interior, array_vertices_all );
+#endif
+}
+
+TEST( MeshTest, RegularMeshOfHexahedronsTest )
+{
+   using HexahedronMeshEntityType = MeshEntity< TestHexahedronMeshConfig, Devices::Host, Topologies::Hexahedron >;
+   using QuadrilateralMeshEntityType = typename HexahedronMeshEntityType::SubentityTraits< 2 >::SubentityType;
+   using EdgeMeshEntityType = typename HexahedronMeshEntityType::SubentityTraits< 1 >::SubentityType;
+   using VertexMeshEntityType = typename HexahedronMeshEntityType::SubentityTraits< 0 >::SubentityType;
+
+   using PointType = typename VertexMeshEntityType::PointType;
+   static_assert( std::is_same< PointType, Containers::StaticVector< 3, RealType > >::value,
+                  "unexpected PointType" );
+
+   const IndexType xSize( 3 ), ySize( 4 ), zSize( 5 );
+   const RealType width( 1.0 ), height( 1.0 ), depth( 1.0 );
+   const RealType hx( width / ( RealType ) xSize ),
+                  hy( height / ( RealType ) ySize ),
+                  hz( depth / ( RealType ) zSize );
+   const IndexType numberOfCells = xSize * ySize * zSize;
+   const IndexType numberOfVertices = ( xSize + 1 ) * ( ySize + 1 ) * ( zSize + 1 );
+
+   using TestHexahedronMesh = Mesh< TestHexahedronMeshConfig >;
+   Pointers::SharedPointer< TestHexahedronMesh > meshPointer;
+   MeshBuilder< TestHexahedronMesh > meshBuilder;
+   meshBuilder.setPointsCount( numberOfVertices );
+   meshBuilder.setCellsCount( numberOfCells );
+
+   /****
+    * Setup vertices
+    */
+   for( IndexType k = 0; k <= zSize; k++ )
+   for( IndexType j = 0; j <= ySize; j++ )
+   for( IndexType i = 0; i <= xSize; i++ )
+      meshBuilder.setPoint( k * ( xSize + 1 ) * ( ySize + 1 ) + j * ( xSize + 1 ) + i, PointType( i * hx, j * hy, k * hz ) );
+
+   /****
+    * Setup cells
+    */
+   IndexType cellIdx( 0 );
+   for( IndexType k = 0; k < zSize; k++ )
+   for( IndexType j = 0; j < ySize; j++ )
+   for( IndexType i = 0; i < xSize; i++ )
+   {
+      const IndexType vertex0 = k * ( xSize + 1 ) * ( ySize + 1 ) + j * ( xSize + 1 ) + i;
+      const IndexType vertex1 = k * ( xSize + 1 ) * ( ySize + 1 ) + j * ( xSize + 1 ) + i + 1;
+      const IndexType vertex2 = k * ( xSize + 1 ) * ( ySize + 1 ) + ( j + 1 ) * ( xSize + 1 ) + i + 1;
+      const IndexType vertex3 = k * ( xSize + 1 ) * ( ySize + 1 ) + ( j + 1 ) * ( xSize + 1 ) + i;
+      const IndexType vertex4 = ( k + 1 ) * ( xSize + 1 ) * ( ySize + 1 ) + j * ( xSize + 1 ) + i;
+      const IndexType vertex5 = ( k + 1 ) * ( xSize + 1 ) * ( ySize + 1 ) + j * ( xSize + 1 ) + i + 1;
+      const IndexType vertex6 = ( k + 1 ) * ( xSize + 1 ) * ( ySize + 1 ) + ( j + 1 ) * ( xSize + 1 ) + i + 1;
+      const IndexType vertex7 = ( k + 1 ) * ( xSize + 1 ) * ( ySize + 1 ) + ( j + 1 ) * ( xSize + 1 ) + i;
+
+      meshBuilder.getCellSeed( cellIdx   ).setCornerId( 0, vertex0 );
+      meshBuilder.getCellSeed( cellIdx   ).setCornerId( 1, vertex1 );
+      meshBuilder.getCellSeed( cellIdx   ).setCornerId( 2, vertex2 );
+      meshBuilder.getCellSeed( cellIdx   ).setCornerId( 3, vertex3 );
+      meshBuilder.getCellSeed( cellIdx   ).setCornerId( 4, vertex4 );
+      meshBuilder.getCellSeed( cellIdx   ).setCornerId( 5, vertex5 );
+      meshBuilder.getCellSeed( cellIdx   ).setCornerId( 6, vertex6 );
+      meshBuilder.getCellSeed( cellIdx++ ).setCornerId( 7, vertex7 );
+   }
+
+   ASSERT_TRUE( meshBuilder.build( *meshPointer ) );
+
+   // traversers for all test cases
+   Traverser< TestHexahedronMesh, HexahedronMeshEntityType > traverser_cells;
+   Traverser< TestHexahedronMesh, QuadrilateralMeshEntityType > traverser_faces;
+   Traverser< TestHexahedronMesh, EdgeMeshEntityType > traverser_edges;
+   Traverser< TestHexahedronMesh, VertexMeshEntityType > traverser_vertices;
+
+   // arrays for all test cases
+   Containers::Array< int > array_cells_boundary( meshPointer->template getEntitiesCount< 3 >() );
+   Containers::Array< int > array_cells_interior( meshPointer->template getEntitiesCount< 3 >() );
+   Containers::Array< int > array_cells_all     ( meshPointer->template getEntitiesCount< 3 >() );
+
+   Containers::Array< int > array_faces_boundary( meshPointer->template getEntitiesCount< 2 >() );
+   Containers::Array< int > array_faces_interior( meshPointer->template getEntitiesCount< 2 >() );
+   Containers::Array< int > array_faces_all     ( meshPointer->template getEntitiesCount< 2 >() );
+
+   Containers::Array< int > array_edges_boundary( meshPointer->template getEntitiesCount< 1 >() );
+   Containers::Array< int > array_edges_interior( meshPointer->template getEntitiesCount< 1 >() );
+   Containers::Array< int > array_edges_all     ( meshPointer->template getEntitiesCount< 1 >() );
+
+   Containers::Array< int > array_vertices_boundary( meshPointer->template getEntitiesCount< 0 >() );
+   Containers::Array< int > array_vertices_interior( meshPointer->template getEntitiesCount< 0 >() );
+   Containers::Array< int > array_vertices_all     ( meshPointer->template getEntitiesCount< 0 >() );
+
+   // reset all arrays
+   array_cells_boundary.setValue( 0 );
+   array_cells_interior.setValue( 0 );
+   array_cells_all     .setValue( 0 );
+
+   array_faces_boundary.setValue( 0 );
+   array_faces_interior.setValue( 0 );
+   array_faces_all     .setValue( 0 );
+
+   array_edges_boundary.setValue( 0 );
+   array_edges_interior.setValue( 0 );
+   array_edges_all     .setValue( 0 );
+
+   array_vertices_boundary.setValue( 0 );
+   array_vertices_interior.setValue( 0 );
+   array_vertices_all     .setValue( 0 );
+
+   // traverse for all test cases
+   traverser_cells.template processBoundaryEntities< TestEntitiesProcessor >( meshPointer, array_cells_boundary.getView() );
+   traverser_cells.template processInteriorEntities< TestEntitiesProcessor >( meshPointer, array_cells_interior.getView() );
+   traverser_cells.template processAllEntities     < TestEntitiesProcessor >( meshPointer, array_cells_all.getView() );
+
+   traverser_faces.template processBoundaryEntities< TestEntitiesProcessor >( meshPointer, array_faces_boundary.getView() );
+   traverser_faces.template processInteriorEntities< TestEntitiesProcessor >( meshPointer, array_faces_interior.getView() );
+   traverser_faces.template processAllEntities     < TestEntitiesProcessor >( meshPointer, array_faces_all.getView() );
+
+   traverser_edges.template processBoundaryEntities< TestEntitiesProcessor >( meshPointer, array_edges_boundary.getView() );
+   traverser_edges.template processInteriorEntities< TestEntitiesProcessor >( meshPointer, array_edges_interior.getView() );
+   traverser_edges.template processAllEntities     < TestEntitiesProcessor >( meshPointer, array_edges_all.getView() );
+
+   traverser_vertices.template processBoundaryEntities< TestEntitiesProcessor >( meshPointer, array_vertices_boundary.getView() );
+   traverser_vertices.template processInteriorEntities< TestEntitiesProcessor >( meshPointer, array_vertices_interior.getView() );
+   traverser_vertices.template processAllEntities     < TestEntitiesProcessor >( meshPointer, array_vertices_all.getView() );
+
+   // test traversing cells
+   for( IndexType k = 0; k < zSize; k++ )
+   for( IndexType j = 0; j < ySize; j++ )
+   for( IndexType i = 0; i < xSize; i++ )
+   {
+      const IndexType idx = k * xSize * ySize + j * xSize + i;
+      if( k == 0 || k == zSize - 1 || j == 0 || j == ySize - 1 || i == 0 || i == xSize - 1 ) {
+         EXPECT_EQ( array_cells_boundary[ idx ], 1 );
+         EXPECT_EQ( array_cells_interior[ idx ], 0 );
+      }
+      else {
+         EXPECT_EQ( array_cells_boundary[ idx ], 0 );
+         EXPECT_EQ( array_cells_interior[ idx ], 1 );
+      }
+      EXPECT_EQ( array_cells_all[ idx ], 1 );
+   }
+
+   // test traversing faces
+   // (faces are not numbered systematically, so we just compare with isBoundaryEntity)
+   for( IndexType idx = 0; idx < meshPointer->template getEntitiesCount< 2 >(); idx++ )
+   {
+      if( meshPointer->template isBoundaryEntity< 2 >( idx ) ) {
+         EXPECT_EQ( array_faces_boundary[ idx ], 1 );
+         EXPECT_EQ( array_faces_interior[ idx ], 0 );
+      }
+      else {
+         EXPECT_EQ( array_faces_boundary[ idx ], 0 );
+         EXPECT_EQ( array_faces_interior[ idx ], 1 );
+      }
+      EXPECT_EQ( array_faces_all[ idx ], 1 );
+   }
+
+   // test traversing edges
+   // (edges are not numbered systematically, so we just compare with isBoundaryEntity)
+   for( IndexType idx = 0; idx < meshPointer->template getEntitiesCount< 1 >(); idx++ )
+   {
+      if( meshPointer->template isBoundaryEntity< 1 >( idx ) ) {
+         EXPECT_EQ( array_edges_boundary[ idx ], 1 );
+         EXPECT_EQ( array_edges_interior[ idx ], 0 );
+      }
+      else {
+         EXPECT_EQ( array_edges_boundary[ idx ], 0 );
+         EXPECT_EQ( array_edges_interior[ idx ], 1 );
+      }
+      EXPECT_EQ( array_edges_all[ idx ], 1 );
+   }
+
+   // test traversing vertices
+   for( IndexType k = 0; k <= zSize; k++ )
+   for( IndexType j = 0; j <= ySize; j++ )
+   for( IndexType i = 0; i <= xSize; i++ )
+   {
+      const IndexType idx = k * (xSize + 1) * (ySize + 1) + j * (xSize + 1) + i;
+      if( k == 0 || k == zSize || j == 0 || j == ySize || i == 0 || i == xSize ) {
+         EXPECT_EQ( array_vertices_boundary[ idx ], 1 );
+         EXPECT_EQ( array_vertices_interior[ idx ], 0 );
+      }
+      else {
+         EXPECT_EQ( array_vertices_boundary[ idx ], 0 );
+         EXPECT_EQ( array_vertices_interior[ idx ], 1 );
+      }
+      EXPECT_EQ( array_vertices_all[ idx ], 1 );
+   }
+
+   // test traverser with CUDA
+#ifdef HAVE_CUDA
+   using DeviceMesh = Mesh< TestHexahedronMeshConfig, Devices::Cuda >;
+   Pointers::SharedPointer< DeviceMesh > deviceMeshPointer;
+   *deviceMeshPointer = *meshPointer;
+
+   testCudaTraverser< HexahedronMeshEntityType    >( deviceMeshPointer, array_cells_boundary, array_cells_interior, array_cells_all );
+   testCudaTraverser< QuadrilateralMeshEntityType >( deviceMeshPointer, array_faces_boundary, array_faces_interior, array_faces_all );
+   testCudaTraverser< EdgeMeshEntityType          >( deviceMeshPointer, array_edges_boundary, array_edges_interior, array_edges_all );
+   testCudaTraverser< VertexMeshEntityType        >( deviceMeshPointer, array_vertices_boundary, array_vertices_interior, array_vertices_all );
+#endif
+}
+
+} // namespace MeshTest
+
+#endif
-- 
GitLab


From e202036eeaab8771090cb86540e6c0a30a39c83b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Fri, 18 Oct 2019 16:13:23 +0200
Subject: [PATCH 31/35] Reimplemented mesh traverser using ParallelFor

---
 src/TNL/Meshes/MeshDetails/Traverser_impl.h | 232 +++++---------------
 src/TNL/Meshes/Traverser.h                  |  28 +--
 2 files changed, 51 insertions(+), 209 deletions(-)

diff --git a/src/TNL/Meshes/MeshDetails/Traverser_impl.h b/src/TNL/Meshes/MeshDetails/Traverser_impl.h
index b1d4233d9..f9308758c 100644
--- a/src/TNL/Meshes/MeshDetails/Traverser_impl.h
+++ b/src/TNL/Meshes/MeshDetails/Traverser_impl.h
@@ -11,10 +11,7 @@
 #pragma once
 
 #include <TNL/Meshes/Traverser.h>
-
-#include <TNL/Exceptions/CudaSupportMissing.h>
-#include <TNL/Cuda/DeviceInfo.h>
-#include <TNL/Cuda/LaunchHelpers.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 namespace TNL {
 namespace Meshes {
@@ -29,16 +26,24 @@ Traverser< Mesh, MeshEntity, EntitiesDimension >::
 processBoundaryEntities( const MeshPointer& meshPointer,
                          UserData userData ) const
 {
-   auto entitiesCount = meshPointer->template getBoundaryEntitiesCount< EntitiesDimension >();
-#ifdef HAVE_OPENMP
-#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
-#endif
-   for( decltype(entitiesCount) i = 0; i < entitiesCount; i++ ) {
-      const auto entityIndex = meshPointer->template getBoundaryEntityIndex< EntitiesDimension >( i );
-      auto& entity = meshPointer->template getEntity< EntitiesDimension >( entityIndex );
+   const GlobalIndexType entitiesCount = meshPointer->template getBoundaryEntitiesCount< MeshEntity::getEntityDimension() >();
+   auto kernel = [] __cuda_callable__
+      ( const GlobalIndexType i,
+        const Mesh* mesh,
+        UserData userData )
+   {
+      const GlobalIndexType entityIndex = mesh->template getBoundaryEntityIndex< MeshEntity::getEntityDimension() >( i );
+      auto& entity = mesh->template getEntity< MeshEntity::getEntityDimension() >( entityIndex );
       // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex
-      EntitiesProcessor::processEntity( *meshPointer, userData, entity );
-   }
+      EntitiesProcessor::processEntity( *mesh, userData, entity );
+   };
+   if( std::is_same< DeviceType, Devices::Cuda >::value )
+      Pointers::synchronizeSmartPointersOnDevice< DeviceType >();
+   Algorithms::ParallelFor< DeviceType >::exec(
+         (GlobalIndexType) 0, entitiesCount,
+         kernel,
+         &meshPointer.template getData< DeviceType >(),
+         userData );
 }
 
 template< typename Mesh,
@@ -51,16 +56,24 @@ Traverser< Mesh, MeshEntity, EntitiesDimension >::
 processInteriorEntities( const MeshPointer& meshPointer,
                          UserData userData ) const
 {
-   auto entitiesCount = meshPointer->template getInteriorEntitiesCount< EntitiesDimension >();
-#ifdef HAVE_OPENMP
-#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
-#endif
-   for( decltype(entitiesCount) i = 0; i < entitiesCount; i++ ) {
-      const auto entityIndex = meshPointer->template getInteriorEntityIndex< EntitiesDimension >( i );
-      auto& entity = meshPointer->template getEntity< EntitiesDimension >( entityIndex );
+   const auto entitiesCount = meshPointer->template getInteriorEntitiesCount< MeshEntity::getEntityDimension() >();
+   auto kernel = [] __cuda_callable__
+      ( const GlobalIndexType i,
+        const Mesh* mesh,
+        UserData userData )
+   {
+      const GlobalIndexType entityIndex = mesh->template getInteriorEntityIndex< MeshEntity::getEntityDimension() >( i );
+      auto& entity = mesh->template getEntity< MeshEntity::getEntityDimension() >( entityIndex );
       // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex
-      EntitiesProcessor::processEntity( *meshPointer, userData, entity );
-   }
+      EntitiesProcessor::processEntity( *mesh, userData, entity );
+   };
+   if( std::is_same< DeviceType, Devices::Cuda >::value )
+      Pointers::synchronizeSmartPointersOnDevice< DeviceType >();
+   Algorithms::ParallelFor< DeviceType >::exec(
+         (GlobalIndexType) 0, entitiesCount,
+         kernel,
+         &meshPointer.template getData< DeviceType >(),
+         userData );
 }
 
 template< typename Mesh,
@@ -73,170 +86,23 @@ Traverser< Mesh, MeshEntity, EntitiesDimension >::
 processAllEntities( const MeshPointer& meshPointer,
                     UserData userData ) const
 {
-   auto entitiesCount = meshPointer->template getEntitiesCount< EntitiesDimension >();
-#ifdef HAVE_OPENMP
-#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
-#endif
-   for( decltype(entitiesCount) entityIndex = 0; entityIndex < entitiesCount; entityIndex++ ) {
-      auto& entity = meshPointer->template getEntity< EntitiesDimension >( entityIndex );
-      // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex
-      EntitiesProcessor::processEntity( *meshPointer, userData, entity );
-   }
-}
-
-
-#ifdef HAVE_CUDA
-template< int EntitiesDimension,
-          typename EntitiesProcessor,
-          typename Mesh,
-          typename UserData >
-__global__ void
-MeshTraverserBoundaryEntitiesKernel( const Mesh* mesh,
-                                     UserData userData,
-                                     typename Mesh::GlobalIndexType entitiesCount )
-{
-   for( typename Mesh::GlobalIndexType i = blockIdx.x * blockDim.x + threadIdx.x;
-        i < entitiesCount;
-        i += blockDim.x * gridDim.x )
+   const auto entitiesCount = meshPointer->template getEntitiesCount< MeshEntity::getEntityDimension() >();
+   auto kernel = [] __cuda_callable__
+      ( const GlobalIndexType entityIndex,
+        const Mesh* mesh,
+        UserData userData )
    {
-      const auto entityIndex = mesh->template getBoundaryEntityIndex< EntitiesDimension >( i );
-      auto& entity = mesh->template getEntity< EntitiesDimension >( entityIndex );
+      auto& entity = mesh->template getEntity< MeshEntity::getEntityDimension() >( entityIndex );
       // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex
       EntitiesProcessor::processEntity( *mesh, userData, entity );
-   }
-}
-
-template< int EntitiesDimension,
-          typename EntitiesProcessor,
-          typename Mesh,
-          typename UserData >
-__global__ void
-MeshTraverserInteriorEntitiesKernel( const Mesh* mesh,
-                                     UserData userData,
-                                     typename Mesh::GlobalIndexType entitiesCount )
-{
-   for( typename Mesh::GlobalIndexType i = blockIdx.x * blockDim.x + threadIdx.x;
-        i < entitiesCount;
-        i += blockDim.x * gridDim.x )
-   {
-      const auto entityIndex = mesh->template getInteriorEntityIndex< EntitiesDimension >( i );
-      auto& entity = mesh->template getEntity< EntitiesDimension >( entityIndex );
-      // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex
-      EntitiesProcessor::processEntity( *mesh, userData, entity );
-   }
-}
-
-template< int EntitiesDimension,
-          typename EntitiesProcessor,
-          typename Mesh,
-          typename UserData >
-__global__ void
-MeshTraverserAllEntitiesKernel( const Mesh* mesh,
-                                UserData userData,
-                                typename Mesh::GlobalIndexType entitiesCount )
-{
-   for( typename Mesh::GlobalIndexType entityIndex = blockIdx.x * blockDim.x + threadIdx.x;
-        entityIndex < entitiesCount;
-        entityIndex += blockDim.x * gridDim.x )
-   {
-      auto& entity = mesh->template getEntity< EntitiesDimension >( entityIndex );
-      // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex
-      EntitiesProcessor::processEntity( *mesh, userData, entity );
-   }
-}
-#endif
-
-template< typename MeshConfig,
-          typename MeshEntity,
-          int EntitiesDimension >
-   template< typename EntitiesProcessor,
-             typename UserData >
-void
-Traverser< Mesh< MeshConfig, Devices::Cuda >, MeshEntity, EntitiesDimension >::
-processBoundaryEntities( const MeshPointer& meshPointer,
-                         UserData userData ) const
-{
-#ifdef HAVE_CUDA
-   auto entitiesCount = meshPointer->template getBoundaryEntitiesCount< EntitiesDimension >();
-
-   dim3 blockSize( 256 );
-   dim3 gridSize;
-   const int desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() );
-   gridSize.x = min( desGridSize, Cuda::getNumberOfBlocks( entitiesCount, blockSize.x ) );
-
-   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
-   MeshTraverserBoundaryEntitiesKernel< EntitiesDimension, EntitiesProcessor >
-      <<< gridSize, blockSize >>>
-      ( &meshPointer.template getData< Devices::Cuda >(),
-        userData,
-        entitiesCount );
-   cudaDeviceSynchronize();
-   TNL_CHECK_CUDA_DEVICE;
-#else
-   throw Exceptions::CudaSupportMissing();
-#endif
-}
-
-template< typename MeshConfig,
-          typename MeshEntity,
-          int EntitiesDimension >
-   template< typename EntitiesProcessor,
-             typename UserData >
-void
-Traverser< Mesh< MeshConfig, Devices::Cuda >, MeshEntity, EntitiesDimension >::
-processInteriorEntities( const MeshPointer& meshPointer,
-                         UserData userData ) const
-{
-#ifdef HAVE_CUDA
-   auto entitiesCount = meshPointer->template getInteriorEntitiesCount< EntitiesDimension >();
-
-   dim3 blockSize( 256 );
-   dim3 gridSize;
-   const int desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() );
-   gridSize.x = min( desGridSize, Cuda::getNumberOfBlocks( entitiesCount, blockSize.x ) );
-
-   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
-   MeshTraverserInteriorEntitiesKernel< EntitiesDimension, EntitiesProcessor >
-      <<< gridSize, blockSize >>>
-      ( &meshPointer.template getData< Devices::Cuda >(),
-        userData,
-        entitiesCount );
-   cudaDeviceSynchronize();
-   TNL_CHECK_CUDA_DEVICE;
-#else
-   throw Exceptions::CudaSupportMissing();
-#endif
-}
-
-template< typename MeshConfig,
-          typename MeshEntity,
-          int EntitiesDimension >
-   template< typename EntitiesProcessor,
-             typename UserData >
-void
-Traverser< Mesh< MeshConfig, Devices::Cuda >, MeshEntity, EntitiesDimension >::
-processAllEntities( const MeshPointer& meshPointer,
-                    UserData userData ) const
-{
-#ifdef HAVE_CUDA
-   auto entitiesCount = meshPointer->template getEntitiesCount< EntitiesDimension >();
-
-   dim3 blockSize( 256 );
-   dim3 gridSize;
-   const int desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() );
-   gridSize.x = min( desGridSize, Cuda::getNumberOfBlocks( entitiesCount, blockSize.x ) );
-
-   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
-   MeshTraverserAllEntitiesKernel< EntitiesDimension, EntitiesProcessor >
-      <<< gridSize, blockSize >>>
-      ( &meshPointer.template getData< Devices::Cuda >(),
-        userData,
-        entitiesCount );
-   cudaDeviceSynchronize();
-   TNL_CHECK_CUDA_DEVICE;
-#else
-   throw Exceptions::CudaSupportMissing();
-#endif
+   };
+   if( std::is_same< DeviceType, Devices::Cuda >::value )
+      Pointers::synchronizeSmartPointersOnDevice< DeviceType >();
+   Algorithms::ParallelFor< DeviceType >::exec(
+         (GlobalIndexType) 0, entitiesCount,
+         kernel,
+         &meshPointer.template getData< DeviceType >(),
+         userData );
 }
 
 } // namespace Meshes
diff --git a/src/TNL/Meshes/Traverser.h b/src/TNL/Meshes/Traverser.h
index 4cf707386..f157e3afc 100644
--- a/src/TNL/Meshes/Traverser.h
+++ b/src/TNL/Meshes/Traverser.h
@@ -18,6 +18,7 @@ namespace Meshes {
 
 template< typename Mesh,
           typename MeshEntity,
+          // extra parameter which is used only for specializations implementing grid traversers
           int EntitiesDimension = MeshEntity::getEntityDimension() >
 class Traverser
 {
@@ -25,6 +26,7 @@ class Traverser
       using MeshType = Mesh;
       using MeshPointer = Pointers::SharedPointer< MeshType >;
       using DeviceType = typename MeshType::DeviceType;
+      using GlobalIndexType = typename MeshType::GlobalIndexType;
 
       template< typename EntitiesProcessor,
                 typename UserData >
@@ -42,32 +44,6 @@ class Traverser
                                UserData userData ) const;
 };
 
-template< typename MeshConfig,
-          typename MeshEntity,
-          int EntitiesDimension >
-class Traverser< Mesh< MeshConfig, Devices::Cuda >, MeshEntity, EntitiesDimension >
-{
-   public:
-      using MeshType = Mesh< MeshConfig, Devices::Cuda >;
-      using MeshPointer = Pointers::SharedPointer< MeshType >;
-      using DeviceType = typename MeshType::DeviceType;
-
-      template< typename EntitiesProcessor,
-                typename UserData >
-      void processBoundaryEntities( const MeshPointer& meshPointer,
-                                       UserData userData ) const;
-
-      template< typename EntitiesProcessor,
-                typename UserData >
-      void processInteriorEntities( const MeshPointer& meshPointer,
-                                    UserData userData ) const;
-
-      template< typename EntitiesProcessor,
-                typename UserData >
-      void processAllEntities( const MeshPointer& meshPointer,
-                               UserData userData ) const;
-};
-
 } // namespace Meshes
 } // namespace TNL
 
-- 
GitLab


From ef4cd475ea0958939f30093812a2abfc82ced0b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sat, 19 Oct 2019 15:34:44 +0200
Subject: [PATCH 32/35] Added missing __cuda_callable__ to StaticArray and
 StaticVector methods

---
 src/TNL/Containers/StaticArray.h    | 1 +
 src/TNL/Containers/StaticArray.hpp  | 1 +
 src/TNL/Containers/StaticVector.h   | 6 ++++++
 src/TNL/Containers/StaticVector.hpp | 2 ++
 4 files changed, 10 insertions(+)

diff --git a/src/TNL/Containers/StaticArray.h b/src/TNL/Containers/StaticArray.h
index fa543b591..51ee05506 100644
--- a/src/TNL/Containers/StaticArray.h
+++ b/src/TNL/Containers/StaticArray.h
@@ -84,6 +84,7 @@ public:
     * 
     * @param elems input initializer list
     */
+   __cuda_callable__
    StaticArray( const std::initializer_list< Value > &elems );
 
    /**
diff --git a/src/TNL/Containers/StaticArray.hpp b/src/TNL/Containers/StaticArray.hpp
index ed7a2df0b..c1ac8e62a 100644
--- a/src/TNL/Containers/StaticArray.hpp
+++ b/src/TNL/Containers/StaticArray.hpp
@@ -120,6 +120,7 @@ StaticArray< Size, Value >::StaticArray( const StaticArray< Size, Value >& v )
 }
 
 template< int Size, typename Value >
+__cuda_callable__
 StaticArray< Size, Value >::StaticArray( const std::initializer_list< Value > &elems)
 {
    auto it = elems.begin();
diff --git a/src/TNL/Containers/StaticVector.h b/src/TNL/Containers/StaticVector.h
index 640fa45de..2fe136ac9 100644
--- a/src/TNL/Containers/StaticVector.h
+++ b/src/TNL/Containers/StaticVector.h
@@ -53,11 +53,13 @@ public:
    /**
     * \brief Default copy-assignment operator.
     */
+   __cuda_callable__
    StaticVector& operator=( const StaticVector& ) = default;
 
    /**
     * \brief Default move-assignment operator.
     */
+   __cuda_callable__
    StaticVector& operator=( StaticVector&& ) = default;
 
    //! Constructors and assignment operators are inherited from the class \ref StaticArray.
@@ -103,6 +105,7 @@ public:
     * \return reference to this vector
     */
    template< typename VectorExpression >
+   __cuda_callable__
    StaticVector& operator=( const VectorExpression& expression );
 
    /**
@@ -180,6 +183,7 @@ namespace TNL {
 namespace Containers {
 
 template< typename Real >
+__cuda_callable__
 StaticVector< 3, Real > VectorProduct( const StaticVector< 3, Real >& u,
                                        const StaticVector< 3, Real >& v )
 {
@@ -191,6 +195,7 @@ StaticVector< 3, Real > VectorProduct( const StaticVector< 3, Real >& u,
 }
 
 template< typename Real >
+__cuda_callable__
 Real TriangleArea( const StaticVector< 2, Real >& a,
                    const StaticVector< 2, Real >& b,
                    const StaticVector< 2, Real >& c )
@@ -208,6 +213,7 @@ Real TriangleArea( const StaticVector< 2, Real >& a,
 }
 
 template< typename Real >
+__cuda_callable__
 Real TriangleArea( const StaticVector< 3, Real >& a,
                    const StaticVector< 3, Real >& b,
                    const StaticVector< 3, Real >& c )
diff --git a/src/TNL/Containers/StaticVector.hpp b/src/TNL/Containers/StaticVector.hpp
index b512a51a0..dc97eeea9 100644
--- a/src/TNL/Containers/StaticVector.hpp
+++ b/src/TNL/Containers/StaticVector.hpp
@@ -20,6 +20,7 @@ template< int Size, typename Real >
    template< typename T1,
              typename T2,
              template< typename, typename > class Operation >
+__cuda_callable__
 StaticVector< Size, Real >::StaticVector( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& expr )
 {
    detail::VectorAssignment< StaticVector< Size, Real >, Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation > >::assignStatic( *this, expr );
@@ -52,6 +53,7 @@ StaticVector< Size, Real >::setup( const Config::ParameterContainer& parameters,
 
 template< int Size, typename Real >
    template< typename VectorExpression >
+__cuda_callable__
 StaticVector< Size, Real >&
 StaticVector< Size, Real >::operator=( const VectorExpression& expression )
 {
-- 
GitLab


From e8cc0880358341ddf6604c2f73d7f2fb4ba9ec34 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Fri, 25 Oct 2019 16:25:53 +0200
Subject: [PATCH 33/35] Fixed internal linkage of the getHardwareMetadata
 function in benchmarks

---
 src/Benchmarks/Benchmarks.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Benchmarks/Benchmarks.h b/src/Benchmarks/Benchmarks.h
index 67010118e..4caf0fbda 100644
--- a/src/Benchmarks/Benchmarks.h
+++ b/src/Benchmarks/Benchmarks.h
@@ -330,7 +330,7 @@ protected:
 };
 
 
-Benchmark::MetadataMap getHardwareMetadata()
+inline Benchmark::MetadataMap getHardwareMetadata()
 {
    const int cpu_id = 0;
    const CacheSizes cacheSizes = SystemInfo::getCPUCacheSizes( cpu_id );
-- 
GitLab


From 9615d107c21c11850be77bd6d3cb2fc6a6b6c2c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Fri, 8 Nov 2019 15:14:03 +0100
Subject: [PATCH 34/35] Fixed handling of Cuda::getTransferBufferSize() in
 memory operations

---
 .gitlab-ci.yml                                |  2 +-
 src/TNL/Algorithms/MemoryOperationsCuda.hpp   |  5 ++--
 .../Algorithms/MultiDeviceMemoryOperations.h  | 29 ++++++++++---------
 3 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 1c8f367c0..5882f50c9 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -46,7 +46,7 @@ stages:
         - export CTEST_OUTPUT_ON_FAILURE=1
         - export CTEST_PARALLEL_LEVEL=4
         # enforce (more or less) warning-free builds
-        - export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized"
+        - export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized -Wno-error=vla"
         - mkdir -p "./builddir/$CI_JOB_NAME"
         - pushd "./builddir/$CI_JOB_NAME"
         - cmake ../..
diff --git a/src/TNL/Algorithms/MemoryOperationsCuda.hpp b/src/TNL/Algorithms/MemoryOperationsCuda.hpp
index a823f0ecb..ea4b92b61 100644
--- a/src/TNL/Algorithms/MemoryOperationsCuda.hpp
+++ b/src/TNL/Algorithms/MemoryOperationsCuda.hpp
@@ -92,11 +92,12 @@ copyFromIterator( DestinationElement* destination,
                   SourceIterator last )
 {
    using BaseType = typename std::remove_cv< DestinationElement >::type;
-   std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] };
+   const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(BaseType), destinationSize );
+   std::unique_ptr< BaseType[] > buffer{ new BaseType[ buffer_size ] };
    Index copiedElements = 0;
    while( copiedElements < destinationSize && first != last ) {
       Index i = 0;
-      while( i < Cuda::getTransferBufferSize() && first != last )
+      while( i < buffer_size && first != last )
          buffer[ i++ ] = *first++;
       MultiDeviceMemoryOperations< Devices::Cuda, void >::copy( &destination[ copiedElements ], buffer.get(), i );
       copiedElements += i;
diff --git a/src/TNL/Algorithms/MultiDeviceMemoryOperations.h b/src/TNL/Algorithms/MultiDeviceMemoryOperations.h
index c0e75f2fa..48e5ad647 100644
--- a/src/TNL/Algorithms/MultiDeviceMemoryOperations.h
+++ b/src/TNL/Algorithms/MultiDeviceMemoryOperations.h
@@ -137,18 +137,19 @@ copy( DestinationElement* destination,
    else
    {
       using BaseType = typename std::remove_cv< SourceElement >::type;
-      std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] };
-      Index i( 0 );
+      const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(BaseType), size );
+      std::unique_ptr< BaseType[] > buffer{ new BaseType[ buffer_size ] };
+      Index i = 0;
       while( i < size )
       {
          if( cudaMemcpy( (void*) buffer.get(),
                          (void*) &source[ i ],
-                         TNL::min( size - i, Cuda::getTransferBufferSize() ) * sizeof( SourceElement ),
+                         TNL::min( size - i, buffer_size ) * sizeof(SourceElement),
                          cudaMemcpyDeviceToHost ) != cudaSuccess )
             std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
          TNL_CHECK_CUDA_DEVICE;
-         Index j( 0 );
-         while( j < Cuda::getTransferBufferSize() && i + j < size )
+         int j = 0;
+         while( j < buffer_size && i + j < size )
          {
             destination[ i + j ] = buffer[ j ];
             j++;
@@ -180,14 +181,15 @@ compare( const Element1* destination,
    TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
    TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." );
 #ifdef HAVE_CUDA
-   std::unique_ptr< Element2[] > host_buffer{ new Element2[ Cuda::getTransferBufferSize() ] };
-   Index compared( 0 );
+   const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(Element2), size );
+   std::unique_ptr< Element2[] > host_buffer{ new Element2[ buffer_size ] };
+   Index compared = 0;
    while( compared < size )
    {
-      Index transfer = min( size - compared, Cuda::getTransferBufferSize() );
+      const int transfer = TNL::min( size - compared, buffer_size );
       if( cudaMemcpy( (void*) host_buffer.get(),
                       (void*) &source[ compared ],
-                      transfer * sizeof( Element2 ),
+                      transfer * sizeof(Element2),
                       cudaMemcpyDeviceToHost ) != cudaSuccess )
          std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
       TNL_CHECK_CUDA_DEVICE;
@@ -230,12 +232,13 @@ copy( DestinationElement* destination,
    }
    else
    {
-      std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ Cuda::getTransferBufferSize() ] };
-      Index i( 0 );
+      const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(DestinationElement), size );
+      std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ buffer_size ] };
+      Index i = 0;
       while( i < size )
       {
-         Index j( 0 );
-         while( j < Cuda::getTransferBufferSize() && i + j < size )
+         int j = 0;
+         while( j < buffer_size && i + j < size )
          {
             buffer[ j ] = source[ i + j ];
             j++;
-- 
GitLab


From 9723c16bfa3ba16bad97e4902b47d27884768614 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Fri, 8 Nov 2019 15:38:26 +0100
Subject: [PATCH 35/35] Moved skipping of synchronization directly into the
 synchronizeSmartPointersOnDevice function

---
 src/TNL/Meshes/MeshDetails/Traverser_impl.h | 9 +++------
 src/TNL/Pointers/SmartPointersRegister.h    | 6 ++++++
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/TNL/Meshes/MeshDetails/Traverser_impl.h b/src/TNL/Meshes/MeshDetails/Traverser_impl.h
index f9308758c..2ce07addf 100644
--- a/src/TNL/Meshes/MeshDetails/Traverser_impl.h
+++ b/src/TNL/Meshes/MeshDetails/Traverser_impl.h
@@ -37,8 +37,7 @@ processBoundaryEntities( const MeshPointer& meshPointer,
       // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex
       EntitiesProcessor::processEntity( *mesh, userData, entity );
    };
-   if( std::is_same< DeviceType, Devices::Cuda >::value )
-      Pointers::synchronizeSmartPointersOnDevice< DeviceType >();
+   Pointers::synchronizeSmartPointersOnDevice< DeviceType >();
    Algorithms::ParallelFor< DeviceType >::exec(
          (GlobalIndexType) 0, entitiesCount,
          kernel,
@@ -67,8 +66,7 @@ processInteriorEntities( const MeshPointer& meshPointer,
       // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex
       EntitiesProcessor::processEntity( *mesh, userData, entity );
    };
-   if( std::is_same< DeviceType, Devices::Cuda >::value )
-      Pointers::synchronizeSmartPointersOnDevice< DeviceType >();
+   Pointers::synchronizeSmartPointersOnDevice< DeviceType >();
    Algorithms::ParallelFor< DeviceType >::exec(
          (GlobalIndexType) 0, entitiesCount,
          kernel,
@@ -96,8 +94,7 @@ processAllEntities( const MeshPointer& meshPointer,
       // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex
       EntitiesProcessor::processEntity( *mesh, userData, entity );
    };
-   if( std::is_same< DeviceType, Devices::Cuda >::value )
-      Pointers::synchronizeSmartPointersOnDevice< DeviceType >();
+   Pointers::synchronizeSmartPointersOnDevice< DeviceType >();
    Algorithms::ParallelFor< DeviceType >::exec(
          (GlobalIndexType) 0, entitiesCount,
          kernel,
diff --git a/src/TNL/Pointers/SmartPointersRegister.h b/src/TNL/Pointers/SmartPointersRegister.h
index 5094c1c0e..7f261a28e 100644
--- a/src/TNL/Pointers/SmartPointersRegister.h
+++ b/src/TNL/Pointers/SmartPointersRegister.h
@@ -16,6 +16,8 @@
 #include <TNL/Pointers/SmartPointer.h>
 #include <TNL/Timer.h>
 #include <TNL/Cuda/DeviceInfo.h>
+#include <TNL/Devices/Sequential.h>
+#include <TNL/Devices/Host.h>
 
 namespace TNL {
 namespace Pointers {
@@ -109,6 +111,10 @@ Timer& getSmartPointersSynchronizationTimer()
 template< typename Device >
 bool synchronizeSmartPointersOnDevice( int deviceId = -1 )
 {
+   // TODO: better way to skip synchronization of host-only smart pointers
+   if( std::is_same< Device, Devices::Sequential >::value || std::is_same< Device, Devices::Host >::value )
+      return true;
+
    getSmartPointersSynchronizationTimer< Device >().start();
    bool b = getSmartPointersRegister< Device >().synchronizeDevice( deviceId );
    getSmartPointersSynchronizationTimer< Device >().stop();
-- 
GitLab