diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index d260486c83a221fb34a161103ad9e5686f98d2d7..5882f50c99e56ca1a178c21507c8bef0e42684c7 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -21,7 +21,6 @@ stages:
     WITH_OPENMP: "no"
     WITH_CUDA: "no"
     WITH_CUDA_ARCH: "auto"
-    WITH_MIC: "no"
     WITH_MPI: "no"
     # configurations
     WITH_TESTS: "no"
@@ -46,6 +45,8 @@ stages:
           fi
         - export CTEST_OUTPUT_ON_FAILURE=1
         - export CTEST_PARALLEL_LEVEL=4
+        # enforce (more or less) warning-free builds
+        - export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized -Wno-error=vla"
         - mkdir -p "./builddir/$CI_JOB_NAME"
         - pushd "./builddir/$CI_JOB_NAME"
         - cmake ../..
@@ -56,7 +57,6 @@ stages:
                 -DWITH_MPI=${WITH_MPI}
                 -DWITH_CUDA=${WITH_CUDA}
                 -DWITH_CUDA_ARCH=${WITH_CUDA_ARCH}
-                -DWITH_MIC=${WITH_MIC}
                 -DWITH_TESTS=${WITH_TESTS}
                 -DWITH_DOC=${WITH_DOC}
                 -DWITH_COVERAGE=${WITH_COVERAGE}
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9540fe0028c4647db0e16cb7c7864c119772c2fd..78c7f3dcd94c0113ed52b9ae6ffd35141a121c45 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,7 +17,6 @@ set( tnlVersion "0.1" )
 
 # declare all custom build options
 option(OFFLINE_BUILD "Offline build (i.e. without downloading libraries such as pybind11)" OFF)
-option(WITH_MIC "Build with MIC support" OFF)
 option(WITH_CUDA "Build with CUDA support" ON)
 set(WITH_CUDA_ARCH "auto" CACHE STRING "Build for these CUDA architectures")
 option(WITH_OPENMP "Build with OpenMP support" ON)
@@ -83,7 +82,7 @@ set( CMAKE_CXX_STANDARD_REQUIRED ON )
 set( CMAKE_CXX_EXTENSIONS OFF )
 
 # set Debug/Release options
-set( CMAKE_CXX_FLAGS "-pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-unknown-pragmas" )
+set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-unknown-pragmas" )
 set( CMAKE_CXX_FLAGS_DEBUG "-g" )
 set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native -DNDEBUG" )
 # pass -rdynamic only in Debug mode
@@ -120,22 +119,6 @@ if( NOT DEFINED ENV{CI_JOB_NAME} )
    endif()
 endif()
 
-if( CMAKE_CXX_COMPILER_ID STREQUAL "Intel" )
-   set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_ICPC -wd2568 -wd2571 -wd2570")
-   #####
-   #  Check for MIC
-   #
-   if( ${WITH_MIC} )
-      message( "Enabled MIC support." )
-      set( MIC_CXX_FLAGS "-DHAVE_MIC")
-      # build all tests with MIC support
-      set( CXX_TESTS_FLAGS ${CXX_TESTS_FLAGS} -DHAVE_MIC )
-      set( WITH_CUDA OFF CACHE BOOL "Build with CUDA support" )
-   else()
-      set( MIC_CXX_FLAGS "")
-   endif()
-endif()
-
 # force colorized output in continuous integration
 if( DEFINED ENV{CI_JOB_NAME} OR ${CMAKE_GENERATOR} STREQUAL "Ninja" )
    message(STATUS "Continuous integration or Ninja detected -- forcing compilers to produce colorized output.")
@@ -355,7 +338,6 @@ INCLUDE( CPack )
 # Print custom build options
 message( "-- Build options:" )
 message( "   OFFLINE_BUILD = ${OFFLINE_BUILD}" )
-message( "   WITH_MIC = ${WITH_MIC}" )
 message( "   WITH_CUDA = ${WITH_CUDA}" )
 message( "   WITH_CUDA_ARCH = ${WITH_CUDA_ARCH}" )
 message( "   WITH_OPENMP = ${WITH_OPENMP}" )
diff --git a/Documentation/Examples/FileExampleCuda.cu b/Documentation/Examples/FileExampleCuda.cu
index 0cfde8fc8bc3220cee4b7edae5532cd3ab204d70..4411b8c51bd0e1ed2285dfb2829b81416d35303f 100644
--- a/Documentation/Examples/FileExampleCuda.cu
+++ b/Documentation/Examples/FileExampleCuda.cu
@@ -17,7 +17,7 @@ int main()
     */
    File file;
    file.open( "file-example-cuda-test-file.tnl", std::ios_base::out | std::ios_base::trunc );
-   file.save< double, double, Devices::Host >( doubleArray, size );
+   file.save< double, double, Allocators::Host< double > >( doubleArray, size );
    file.close();
 
    /***
@@ -31,7 +31,7 @@ int main()
     * Read array from the file to device
     */
    file.open( "file-example-cuda-test-file.tnl", std::ios_base::in );
-   file.load< double, double, Devices::Cuda >( deviceArray, size );
+   file.load< double, double, Allocators::Cuda< double > >( deviceArray, size );
    file.close();
 
    /***
diff --git a/Documentation/Examples/FileExampleSaveAndLoad.cpp b/Documentation/Examples/FileExampleSaveAndLoad.cpp
index 00e353218b241e60659d78ad829ee78704b7641f..c232fc3fe7d76b13b6c488da369937f3a64c4f08 100644
--- a/Documentation/Examples/FileExampleSaveAndLoad.cpp
+++ b/Documentation/Examples/FileExampleSaveAndLoad.cpp
@@ -18,21 +18,21 @@ int main()
     */
    File file;
    file.open( "test-file.tnl", std::ios_base::out | std::ios_base::trunc );
-   file.save< double, float, Devices::Host >( doubleArray, size );
+   file.save< double, float >( doubleArray, size );
    file.close();
 
    /***
     * Load the array of floats from the file.
     */
    file.open( "test-file.tnl", std::ios_base::in );
-   file.load< float, float, Devices::Host >( floatArray, size );
+   file.load< float, float >( floatArray, size );
    file.close();
 
    /***
     * Load the array of floats from the file and convert them to integers.
     */
    file.open( "test-file.tnl", std::ios_base::in );
-   file.load< int, float, Devices::Host >( intArray, size );
+   file.load< int, float >( intArray, size );
    file.close();
 
    /***
diff --git a/Documentation/Examples/ObjectExample_getType.cpp b/Documentation/Examples/ObjectExample_getType.cpp
index 7cc7476d6cc90debc1e495eab8b84959619881f7..7c45a167d751812d37aa9eca3316c777ef70567a 100644
--- a/Documentation/Examples/ObjectExample_getType.cpp
+++ b/Documentation/Examples/ObjectExample_getType.cpp
@@ -1,5 +1,5 @@
 #include <iostream>
-#include <TNL/param-types.h>
+#include <TNL/TypeInfo.h>
 #include <TNL/Object.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
@@ -13,24 +13,12 @@ class MyArray : public Object
 {
    public:
 
-      using HostType = MyArray< Value, Devices::Host >;
-      
-      static String getType()
-      {
-         return "MyArray< " + TNL::getType< Value >() + ", " + TNL::getType< Device >() + " >";
-      }
-
-      String getTypeVirtual() const
-      {
-         return getType();
-      }
-
       static String getSerializationType()
       {
-         return HostType::getType();
+         return "MyArray< " + TNL::getType< Value >() + ", " + getType< Devices::Host >() + " >";
       }
 
-      String getSerializationTypeVirtual() const
+      virtual String getSerializationTypeVirtual() const override
       {
          return getSerializationType();
       }
@@ -47,11 +35,11 @@ int main()
    Object* cudaArrayPtr = &cudaArray;
 
    // Object types
-   cout << "HostArray type is                  " << HostArray::getType() << endl;
-   cout << "hostArrayPtr type is               " << hostArrayPtr->getTypeVirtual() << endl;
+   cout << "HostArray type is                  " << getType< HostArray >() << endl;
+   cout << "hostArrayPtr type is               " << getType( *hostArrayPtr ) << endl;
 
-   cout << "CudaArray type is                  " << CudaArray::getType() << endl;
-   cout << "cudaArrayPtr type is               " << cudaArrayPtr->getTypeVirtual() << endl;
+   cout << "CudaArray type is                  " << getType< CudaArray >() << endl;
+   cout << "cudaArrayPtr type is               " << getType( *cudaArrayPtr ) << endl;
 
    // Object serialization types
    cout << "HostArray serialization type is    " << HostArray::getSerializationType() << endl;
@@ -60,4 +48,3 @@ int main()
    cout << "CudaArray serialization type is    " << CudaArray::getSerializationType() << endl;
    cout << "cudaArrayPtr serialization type is " << cudaArrayPtr->getSerializationTypeVirtual() << endl;
 }
-
diff --git a/Documentation/Examples/StringExample.cpp b/Documentation/Examples/StringExample.cpp
index 609e2a26981362a663f2a92e6a82c6f86a94e41c..a86182d6574cde1d8ee2e65f2d0a14d251c837a7 100644
--- a/Documentation/Examples/StringExample.cpp
+++ b/Documentation/Examples/StringExample.cpp
@@ -1,6 +1,5 @@
 #include <iostream>
 #include <TNL/String.h>
-#include <TNL/Containers/List.h>
 #include <TNL/File.h>
 
 using namespace TNL;
diff --git a/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp b/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp
index d0b66adb4febd515b5296d36909f551b58c8dc3b..4c3a17268cc1107aaca84d911cb1f4b4f5cb8a28 100644
--- a/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp
@@ -1,11 +1,11 @@
 #include <iostream>
 #include <cstdlib>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 bool comparison( const Vector< double, Device >& u, const Vector< double, Device >& v )
diff --git a/Documentation/Tutorials/ReductionAndScan/ExclusiveScanExample.cpp b/Documentation/Tutorials/ReductionAndScan/ExclusiveScanExample.cpp
index 9ccb5baa86c26dabb155226207b6dfd551c595dc..29817aa1427405142a2feb07362f9ad443fa4b39 100644
--- a/Documentation/Tutorials/ReductionAndScan/ExclusiveScanExample.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/ExclusiveScanExample.cpp
@@ -4,7 +4,7 @@
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 void scan( Vector< double, Device >& v )
diff --git a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp
index 8d21107fdc444872783c057a329f5c812f83527e..2fb76623836dc553d26d2f7e0fb49b0755b7ea79 100644
--- a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp
@@ -1,11 +1,11 @@
 #include <iostream>
 #include <cstdlib>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 double mapReduce( Vector< double, Device >& u )
diff --git a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-2.cpp b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-2.cpp
index f44cac9180fb68543750be923e1b9bfc9cd85324..10fb0b49966fa2ccbb8b9eb6273ca726076499d2 100644
--- a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-2.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-2.cpp
@@ -1,12 +1,12 @@
 #include <iostream>
 #include <cstdlib>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 #include <TNL/Timer.h>
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 double mapReduce( Vector< double, Device >& u )
diff --git a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-3.cpp b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-3.cpp
index 1125b605c8264b69377c2c5c1c31a385d7033db9..de8c4bab658c42f38a199c1f95075490101e3420 100644
--- a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-3.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-3.cpp
@@ -1,12 +1,12 @@
 #include <iostream>
 #include <cstdlib>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 #include <TNL/Timer.h>
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 double mapReduce( Vector< double, Device >& u )
diff --git a/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp b/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp
index 8e4dbc740fe4070788509bcfd951dd759a98dea9..ca4b8c8a481ec49e23d7770923dd19c27316f7fa 100644
--- a/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp
@@ -1,11 +1,11 @@
 #include <iostream>
 #include <cstdlib>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 double maximumNorm( const Vector< double, Device >& v )
diff --git a/Documentation/Tutorials/ReductionAndScan/ProductExample.cpp b/Documentation/Tutorials/ReductionAndScan/ProductExample.cpp
index 8be11efa32111e298ba0616f8634ba359802c2d9..e2691e40a7544306322841a69047d9d5d0b52dee 100644
--- a/Documentation/Tutorials/ReductionAndScan/ProductExample.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/ProductExample.cpp
@@ -1,11 +1,11 @@
 #include <iostream>
 #include <cstdlib>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 double product( const Vector< double, Device >& v )
diff --git a/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp b/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp
index e37a21b6e4f44d8ea4acce8a8c10a8cb1d9061c1..000af86feb4a0b96357a6ce2a4c1dd0c8829b5d3 100644
--- a/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp
@@ -1,11 +1,11 @@
 #include <iostream>
 #include <cstdlib>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 std::pair< int, double >
diff --git a/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp b/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp
index 5bfd759edcab7f70f3940918a03eb732f75a689b..c072e09ba4d2f1c48c051eb6979db5956376ccf6 100644
--- a/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp
@@ -1,11 +1,11 @@
 #include <iostream>
 #include <cstdlib>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 double scalarProduct( const Vector< double, Device >& u, const Vector< double, Device >& v )
diff --git a/Documentation/Tutorials/ReductionAndScan/ScanExample.cpp b/Documentation/Tutorials/ReductionAndScan/ScanExample.cpp
index 38f44ccddac7eea5a92395180804ebefb1bef17b..3dbd8581d1932933e20f3011b226fe1f3ce9bcf6 100644
--- a/Documentation/Tutorials/ReductionAndScan/ScanExample.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/ScanExample.cpp
@@ -4,7 +4,7 @@
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 void scan( Vector< double, Device >& v )
diff --git a/Documentation/Tutorials/ReductionAndScan/SegmentedScanExample.cpp b/Documentation/Tutorials/ReductionAndScan/SegmentedScanExample.cpp
index b3f67763906d7770cb65cf36a637eb0379928736..5e1379f5d572007beeee1fdcc6671c1240cc8973 100644
--- a/Documentation/Tutorials/ReductionAndScan/SegmentedScanExample.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/SegmentedScanExample.cpp
@@ -4,7 +4,7 @@
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 void segmentedScan( Vector< double, Device >& v, Vector< bool, Device >& flags )
diff --git a/Documentation/Tutorials/ReductionAndScan/SumExample.cpp b/Documentation/Tutorials/ReductionAndScan/SumExample.cpp
index 3be04bd9245c8aef312d421f2bb4d68e4aacbea2..3cf648a5774e17331b9716196939c6c988496ff2 100644
--- a/Documentation/Tutorials/ReductionAndScan/SumExample.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/SumExample.cpp
@@ -1,11 +1,11 @@
 #include <iostream>
 #include <cstdlib>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 double sum( const Vector< double, Device >& v )
diff --git a/Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cpp b/Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cpp
index bf93cd58b40e38a6d77df29f8aeb2c0d344664a9..4e44abe570bd4d701cd2712d8730374dbf940c24 100644
--- a/Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cpp
@@ -1,11 +1,11 @@
 #include <iostream>
 #include <cstdlib>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename Device >
 double updateAndResidue( Vector< double, Device >& u, const Vector< double, Device >& delta_u, const double& tau )
diff --git a/Documentation/Tutorials/Vectors/Reduction.cpp b/Documentation/Tutorials/Vectors/Reduction.cpp
index 1d76d8d0405d815d01b9f5ddc155694d76e64067..33768b07f456e38e14b8bfadd7466233075de47f 100644
--- a/Documentation/Tutorials/Vectors/Reduction.cpp
+++ b/Documentation/Tutorials/Vectors/Reduction.cpp
@@ -24,7 +24,6 @@ void expressions()
    b.evaluate( [] __cuda_callable__ ( int i )->RealType { return i - 5.0; } );
    c = -5;
 
-   int arg;
    std::cout << "a = " << a << std::endl;
    std::cout << "b = " << b << std::endl;
    std::cout << "c = " << c << std::endl;
diff --git a/README.md b/README.md
index a8a4b749e4aa518e7501db2690c6a791691bba77..371782d714aa228b2cc3b125df21500bc5b8fe34 100644
--- a/README.md
+++ b/README.md
@@ -12,13 +12,20 @@ Similarly to the STL, features provided by the TNL can be grouped into
 several modules:
 
 - _Core concepts_.
-  The main concept used in the TNL is the `Device` type which is used in most of
-  the other parts of the library. For data structures such as `Array` it
-  specifies where the data should be allocated, whereas for algorithms such as
-  `ParallelFor` it specifies how the algorithm should be executed.
+  The main concepts used in TNL are the _memory space_, which represents the
+  part of memory where given data is allocated, and the _execution model_,
+  which represents the way how given (typically parallel) algorithm is executed.
+  For example, data can be allocated in the main system memory, in the GPU
+  memory, or using the CUDA Unified Memory which can be accessed from the host
+  as well as from the GPU. On the other hand, algorithms can be executed using
+  either the host CPU or an accelerator (GPU), and for each there are many ways
+  to manage parallel execution. The usage of memory spaces is abstracted with
+  [allocators][allocators] and the execution model is represented by
+  [devices][devices]. See the [Core concepts][core concepts] page for details.
 - _[Containers][containers]_.
   TNL provides generic containers such as array, multidimensional array or array
-  views, which abstract data management on different hardware architectures.
+  views, which abstract data management and execution of common operations on
+  different hardware architectures.
 - _Linear algebra._
   TNL provides generic data structures and algorithms for linear algebra, such
   as [vectors][vectors], [sparse matrices][matrices],
@@ -39,6 +46,9 @@ several modules:
   [libpng](http://www.libpng.org/pub/png/libpng.html) for PNG files, or
   [libjpeg](http://libjpeg.sourceforge.net/) for JPEG files.
 
+[allocators]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/namespaceTNL_1_1Allocators.html
+[devices]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/namespaceTNL_1_1Devices.html
+[core concepts]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/core_concepts.html
 [containers]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/namespaceTNL_1_1Containers.html
 [vectors]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/classTNL_1_1Containers_1_1Vector.html
 [matrices]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/namespaceTNL_1_1Matrices.html
diff --git a/build b/build
index c1e0d3162a9585c41a1050d30cecc442fb8d2173..914c65b1971bd0b895f5c2aa6f093ee83dd132a2 100755
--- a/build
+++ b/build
@@ -48,7 +48,6 @@ do
         --offline-build                  ) OFFLINE_BUILD="yes" ;;
         --with-clang=*                   ) WITH_CLANG="${option#*=}" ;;
         --with-mpi=*                     ) WITH_MPI="${option#*=}" ;;
-        --with-mic=*                     ) WITH_MIC="${option#*=}" ;;
         --with-cuda=*                    ) WITH_CUDA="${option#*=}" ;;
         --with-cuda-arch=*               ) WITH_CUDA_ARCH="${option#*=}";;
         --with-openmp=*                  ) WITH_OPENMP="${option#*=}" ;;
@@ -78,7 +77,6 @@ if [[ ${HELP} == "yes" ]]; then
     echo "   --install=yes/no                      Enables the installation of TNL files."
     echo "   --offline-build=yes/no                Disables online updates during the build. 'no' by default."
     echo "   --with-mpi=yes/no                     Enables MPI. 'yes' by default (OpenMPI required)."
-    echo "   --with-mic=yes/no                     Enables MIC (Intel Xeon Phi). 'no' by default (Intel Compiler required)."
     echo "   --with-cuda=yes/no                    Enables CUDA. 'yes' by default (CUDA Toolkit is required)."
     echo "   --with-cuda-arch=all/auto/3.0/3.5/... Chooses CUDA architecture. 'auto' by default."
     echo "   --with-openmp=yes/no                  Enables OpenMP. 'yes' by default."
@@ -110,7 +108,12 @@ else
    export CC=gcc
 fi
 
-if hash ninja 2>/dev/null; then
+if [[ ! $(command -v cmake) ]]; then
+   echo "Error: cmake is not installed. See http://www.cmake.org/download/" >&2
+   exit 1
+fi
+
+if [[ $(command -v ninja) ]]; then
    generator=Ninja
    make=ninja
    check_file="build.ninja"
@@ -126,7 +129,6 @@ cmake_command=(
          -DCMAKE_BUILD_TYPE=${BUILD}
          -DCMAKE_INSTALL_PREFIX=${PREFIX}
          -DOFFLINE_BUILD=${OFFLINE_BUILD}
-         -DWITH_MIC=${WITH_MIC}
          -DWITH_CUDA=${WITH_CUDA}
          -DWITH_CUDA_ARCH=${WITH_CUDA_ARCH}
          -DWITH_OPENMP=${WITH_OPENMP}
diff --git a/install b/install
index 6f0770367953585ee30ad634dd308b4a952eadeb..fe138dfaa005539a87e7ccbb9a8746143c4cbb0e 100755
--- a/install
+++ b/install
@@ -1,59 +1,51 @@
 #!/bin/bash
 
+set -e
+
 BUILD_DEBUG="yes"
 BUILD_RELEASE="yes"
 
 OPTIONS=""
 
-CMAKE_TEST=`which cmake`    
-if test x${CMAKE_TEST} = "x";
-then
-    echo "Cmake is not installed on your system. Please install it by:"
-    echo ""
-    echo "   sudo apt-get install cmake     on Ubuntu and Debian based systems"
-    echo "   sudo yum install cmake         on RedHat, Fedora or CentOS"
-    echo "   sudo zypper install cmake      on OpenSuse"
-    echo ""
-    echo "You may also install it from the source code at:"
-    echo " http://www.cmake.org/download/"
-    exit 1
-fi
-
-for option in "$@"
-do
-    case $option in
-        --no-debug                    ) BUILD_DEBUG="no" ;;
-        --no-release                  ) BUILD_RELEASE="no" ;;        
-        *                             ) OPTIONS="${OPTIONS} ${option}" ;;
-    esac
+for option in "$@"; do
+   case $option in
+      --no-debug)
+         BUILD_DEBUG="no"
+         ;;
+      --no-release)
+         BUILD_RELEASE="no"
+         ;;
+      --build=*                     )
+         BUILD="${option#*=}"
+         if [[ "$BUILD" != "Release" ]]; then
+            BUILD_RELEASE="no"
+         fi
+         if [[ "$BUILD" != "Debug" ]]; then
+            BUILD_DEBUG="no"
+         fi
+         ;;
+      *)
+         OPTIONS="${OPTIONS} ${option}"
+         ;;
+   esac
 done
 
-if test ${BUILD_DEBUG} = "yes";
-then
-    if [ ! -d Debug ];
-    then
-       mkdir Debug
-    fi
-    cd Debug
-    if ! ../build --root-dir=.. --build=Debug --install=yes ${OPTIONS}
-    then
-       exit 1
-    fi
-    cd ..
+if [[ ${BUILD_DEBUG} == "yes" ]]; then
+   if [[ ! -d Debug ]]; then
+      mkdir Debug
+   fi
+   pushd Debug
+   ../build --root-dir=.. --build=Debug --install=yes ${OPTIONS}
+   popd
 fi
 
-if test ${BUILD_RELEASE} = "yes";
-then
-    if [ ! -d Release ];
-    then
-       mkdir Release
-    fi
-    cd Release
-    if ! ../build --root-dir=.. --build=Release --install=yes ${OPTIONS};
-    then
-        exit 1
-    fi
-    cd ..
+if [[ ${BUILD_RELEASE} == "yes" ]]; then
+   if [[ ! -d Release ]]; then
+      mkdir Release
+   fi
+   pushd Release
+   ../build --root-dir=.. --build=Release --install=yes ${OPTIONS};
+   popd
 fi
 
 
diff --git a/src/Benchmarks/BLAS/CommonVectorOperations.hpp b/src/Benchmarks/BLAS/CommonVectorOperations.hpp
index 640fda337b5d8a8a6dcec75f081702eccb45464c..13a0f63229dd45d2726318f77f3bd0a2b92519cd 100644
--- a/src/Benchmarks/BLAS/CommonVectorOperations.hpp
+++ b/src/Benchmarks/BLAS/CommonVectorOperations.hpp
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 #include "CommonVectorOperations.h"
 
 namespace TNL {
@@ -30,7 +30,7 @@ getVectorMax( const Vector& v )
    const auto* data = v.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; };
    auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
+   return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
 }
 
 template< typename Device >
@@ -47,7 +47,7 @@ getVectorMin( const Vector& v )
    const auto* data = v.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return data[ i ]; };
    auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
+   return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
 }
 
 template< typename Device >
@@ -64,7 +64,7 @@ getVectorAbsMax( const Vector& v )
    const auto* data = v.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); };
    auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
+   return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
 }
 
 template< typename Device >
@@ -81,7 +81,7 @@ getVectorAbsMin( const Vector& v )
    const auto* data = v.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); };
    auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
+   return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
 }
 
 template< typename Device >
@@ -97,7 +97,7 @@ getVectorL1Norm( const Vector& v )
 
    const auto* data = v.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 );
+   return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 );
 }
 
 template< typename Device >
@@ -113,7 +113,7 @@ getVectorL2Norm( const Vector& v )
 
    const auto* data = v.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data[ i ] * data[ i ]; };
-   return std::sqrt( Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ) );
+   return std::sqrt( Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ) );
 }
 
 template< typename Device >
@@ -136,7 +136,7 @@ getVectorLpNorm( const Vector& v,
 
    const auto* data = v.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data[ i ] ), p ); };
-   return std::pow( Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ), 1.0 / p );
+   return std::pow( Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ), 1.0 / p );
 }
 
 template< typename Device >
@@ -155,7 +155,7 @@ getVectorSum( const Vector& v )
 
    const auto* data = v.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i )  -> ResultType { return data[ i ]; };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 );
+   return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 );
 }
 
 template< typename Device >
@@ -175,7 +175,7 @@ getVectorDifferenceMax( const Vector1& v1,
    const auto* data2 = v2.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; };
    auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
+   return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
 }
 
 template< typename Device >
@@ -195,7 +195,7 @@ getVectorDifferenceMin( const Vector1& v1,
    const auto* data2 = v2.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; };
    auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
+   return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
 }
 
 template< typename Device >
@@ -215,7 +215,7 @@ getVectorDifferenceAbsMax( const Vector1& v1,
    const auto* data2 = v2.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); };
    auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
+   return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
 }
 
 template< typename Device >
@@ -235,7 +235,7 @@ getVectorDifferenceAbsMin( const Vector1& v1,
    const auto* data2 = v2.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); };
    auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
+   return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
 }
 
 template< typename Device >
@@ -254,7 +254,7 @@ getVectorDifferenceL1Norm( const Vector1& v1,
    const auto* data1 = v1.getData();
    const auto* data2 = v2.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 );
+   return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 );
 }
 
 template< typename Device >
@@ -276,7 +276,7 @@ getVectorDifferenceL2Norm( const Vector1& v1,
       auto diff = data1[ i ] - data2[ i ];
       return diff * diff;
    };
-   return std::sqrt( Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ) );
+   return std::sqrt( Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ) );
 }
 
 template< typename Device >
@@ -302,7 +302,7 @@ getVectorDifferenceLpNorm( const Vector1& v1,
    const auto* data1 = v1.getData();
    const auto* data2 = v2.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data1[ i ] - data2[ i ] ), p ); };
-   return std::pow( Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ), 1.0 / p );
+   return std::pow( Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ), 1.0 / p );
 }
 
 template< typename Device >
@@ -321,7 +321,7 @@ getVectorDifferenceSum( const Vector1& v1,
    const auto* data1 = v1.getData();
    const auto* data2 = v2.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 );
+   return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 );
 }
 
 template< typename Device >
@@ -340,7 +340,7 @@ getScalarProduct( const Vector1& v1,
    const auto* data1 = v1.getData();
    const auto* data2 = v2.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] * data2[ i ]; };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 );
+   return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 );
 }
 
 } // namespace Benchmarks
diff --git a/src/Benchmarks/BLAS/VectorOperations.h b/src/Benchmarks/BLAS/VectorOperations.h
index 0ad2c1ee6829aefbc184a1725afb75e665110724..4c9ad6cc5778cc6ff1bbfd873165d5add56ed17b 100644
--- a/src/Benchmarks/BLAS/VectorOperations.h
+++ b/src/Benchmarks/BLAS/VectorOperations.h
@@ -12,7 +12,7 @@
 
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 namespace TNL {
 namespace Benchmarks {
@@ -104,9 +104,9 @@ struct VectorOperations< Devices::Cuda >
       auto add2 = [=] __cuda_callable__ ( IndexType i ) { y[ i ] = thisMultiplicator * y[ i ] + alpha * x[ i ]; };
 
       if( thisMultiplicator == 1.0 )
-         ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _y.getSize(), add1 );
+         Algorithms::ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _y.getSize(), add1 );
       else
-         ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _y.getSize(), add2 );
+         Algorithms::ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _y.getSize(), add2 );
    }
 
    template< typename Vector1, typename Vector2, typename Vector3, typename Scalar1, typename Scalar2, typename Scalar3 >
@@ -131,9 +131,9 @@ struct VectorOperations< Devices::Cuda >
       auto add2 = [=] __cuda_callable__ ( IndexType i ) { v[ i ] = thisMultiplicator * v[ i ] + multiplicator1 * v1[ i ] + multiplicator2 * v2[ i ]; };
 
       if( thisMultiplicator == 1.0 )
-         ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _v.getSize(), add1 );
+         Algorithms::ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _v.getSize(), add1 );
       else
-         ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _v.getSize(), add2 );
+         Algorithms::ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _v.getSize(), add2 );
    }
 };
 
diff --git a/src/Benchmarks/BLAS/array-operations.h b/src/Benchmarks/BLAS/array-operations.h
index cff60c8cca5549dc7c0341f94a9d0d137d5dfb0b..84767a7b1f97e96387c25e6e52356259c67505d2 100644
--- a/src/Benchmarks/BLAS/array-operations.h
+++ b/src/Benchmarks/BLAS/array-operations.h
@@ -12,6 +12,8 @@
 
 #pragma once
 
+#include <cstring>
+
 #include "../Benchmarks.h"
 
 #include <TNL/Containers/Array.h>
@@ -66,6 +68,36 @@ benchmarkArrayOperations( Benchmark & benchmark,
    reset12();
 
 
+   if( std::is_fundamental< Real >::value ) {
+      // std::memcmp
+      auto compareHost = [&]() {
+         if( std::memcmp( hostArray.getData(), hostArray2.getData(), hostArray.getSize() * sizeof(Real) ) == 0 )
+            resultHost = true;
+         else
+            resultHost = false;
+      };
+      benchmark.setOperation( "comparison (memcmp)", 2 * datasetSize );
+      benchmark.time< Devices::Host >( reset12, "CPU", compareHost );
+
+      // std::memcpy and cudaMemcpy
+      auto copyHost = [&]() {
+         std::memcpy( hostArray.getData(), hostArray2.getData(), hostArray.getSize() * sizeof(Real) );
+      };
+      benchmark.setOperation( "copy (memcpy)", 2 * datasetSize );
+      benchmark.time< Devices::Host >( reset12, "CPU", copyHost );
+#ifdef HAVE_CUDA
+      auto copyCuda = [&]() {
+         cudaMemcpy( deviceArray.getData(),
+                     deviceArray2.getData(),
+                     deviceArray.getSize() * sizeof(Real),
+                     cudaMemcpyDeviceToDevice );
+         TNL_CHECK_CUDA_DEVICE;
+      };
+      benchmark.time< Devices::Cuda >( reset12, "GPU", copyCuda );
+#endif
+   }
+
+
    auto compareHost = [&]() {
       resultHost = (int) ( hostArray == hostArray2 );
    };
diff --git a/src/Benchmarks/BLAS/spmv.h b/src/Benchmarks/BLAS/spmv.h
index b6c91a2470648de91edf58f42cff912e1e2b32bc..d515d52d73d513d87b86d4b743d8b0e27b20e0ca 100644
--- a/src/Benchmarks/BLAS/spmv.h
+++ b/src/Benchmarks/BLAS/spmv.h
@@ -53,7 +53,7 @@ __global__ void setCudaTestMatrixKernel( Matrix* matrix,
                                          const int elementsPerRow,
                                          const int gridIdx )
 {
-   const int rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   const int rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    if( rowIdx >= matrix->getRows() )
       return;
    int col = rowIdx - elementsPerRow / 2;
@@ -73,12 +73,12 @@ void setCudaTestMatrix( Matrix& matrix,
    typedef typename Matrix::IndexType IndexType;
    typedef typename Matrix::RealType RealType;
    Pointers::DevicePointer< Matrix > kernel_matrix( matrix );
-   dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+   dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
    const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-   const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+   const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
    for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) {
       if( gridIdx == cudaGrids - 1 )
-         cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+         cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
       setCudaTestMatrixKernel< Matrix >
          <<< cudaGridSize, cudaBlockSize >>>
          ( &kernel_matrix.template modifyData< Devices::Cuda >(), elementsPerRow, gridIdx );
@@ -109,7 +109,7 @@ benchmarkSpMV( Benchmark & benchmark,
    CudaVector deviceVector, deviceVector2;
 
    // create benchmark group
-   const std::vector< String > parsedType = parseObjectType( HostMatrix::getType() );
+   const std::vector< String > parsedType = parseObjectType( getType< HostMatrix >() );
 #ifdef HAVE_CUDA
    benchmark.createHorizontalGroup( parsedType[ 0 ], 2 );
 #else
diff --git a/src/Benchmarks/BLAS/tnl-benchmark-blas.h b/src/Benchmarks/BLAS/tnl-benchmark-blas.h
index b510c383749c0104c3c87c8c111237dfdc5a35b9..a1bd3e92b0d2e11ac3f5377ffcf199d8a873ad60 100644
--- a/src/Benchmarks/BLAS/tnl-benchmark-blas.h
+++ b/src/Benchmarks/BLAS/tnl-benchmark-blas.h
@@ -182,7 +182,7 @@ main( int argc, char* argv[] )
       runBlasBenchmarks< double >( benchmark, metadata, minSize, maxSize, sizeStepFactor, elementsPerRow );
 
    if( ! benchmark.save( logFile ) ) {
-      std::cerr << "Failed to write the benchmark results to file '" << parameters.getParameter< String >( "log-file" ) << "'." << std::endl;
+      std::cerr << "Failed to write the benchmark results to file '" << logFileName << "'." << std::endl;
       return EXIT_FAILURE;
    }
 
diff --git a/src/Benchmarks/BLAS/triad.h b/src/Benchmarks/BLAS/triad.h
index c107944c893f3d799eabdfbaca5a3d32fd8a599a..3ac747fba5f386654a9558646868b8fb13671690 100644
--- a/src/Benchmarks/BLAS/triad.h
+++ b/src/Benchmarks/BLAS/triad.h
@@ -73,7 +73,7 @@ benchmarkTriad( Benchmark & benchmark,
          {
             a_v[i] = b_v[i] + scalar * c_v[i];
          };
-         ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel );
+         Algorithms::ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel );
 
          a_h = a_d;
       };
@@ -117,7 +117,7 @@ benchmarkTriad( Benchmark & benchmark,
          {
             a_v[i] = b_v[i] + scalar * c_v[i];
          };
-         ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel );
+         Algorithms::ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel );
 
          a_h = a_d;
       };
@@ -150,7 +150,7 @@ benchmarkTriad( Benchmark & benchmark,
       };
       auto triad = [&]()
       {
-         ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel );
+         Algorithms::ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel );
       };
 
       benchmark.time< Devices::Cuda >( reset, "zero-copy", triad );
@@ -181,7 +181,7 @@ benchmarkTriad( Benchmark & benchmark,
       };
       auto triad = [&]()
       {
-         ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel );
+         Algorithms::ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel );
       };
 
       benchmark.time< Devices::Cuda >( reset, "unified memory", triad );
diff --git a/src/Benchmarks/BLAS/vector-operations.h b/src/Benchmarks/BLAS/vector-operations.h
index ce2114f313fa75e91ac15d02bb4d3bfdf78aef9a..7254ba9f4075c81f7100e4c6c86bd16c3b9077a7 100644
--- a/src/Benchmarks/BLAS/vector-operations.h
+++ b/src/Benchmarks/BLAS/vector-operations.h
@@ -562,31 +562,31 @@ benchmarkVectorOperations( Benchmark & benchmark,
 #endif
 
    ////
-   // Inclusive prefix sum
-   auto inclusivePrefixSumHost = [&]() {
-      hostVector.prefixSum();
+   // Inclusive scan
+   auto inclusiveScanHost = [&]() {
+      hostVector.scan();
    };
-   benchmark.setOperation( "inclusive prefix sum", 2 * datasetSize );
-   benchmark.time< Devices::Host >( reset1, "CPU ET", inclusivePrefixSumHost );
+   benchmark.setOperation( "inclusive scan", 2 * datasetSize );
+   benchmark.time< Devices::Host >( reset1, "CPU ET", inclusiveScanHost );
 #ifdef HAVE_CUDA
-   auto inclusivePrefixSumCuda = [&]() {
-      deviceVector.prefixSum();
+   auto inclusiveScanCuda = [&]() {
+      deviceVector.scan();
    };
-   benchmark.time< Devices::Cuda >( reset1, "GPU ET", inclusivePrefixSumCuda );
+   benchmark.time< Devices::Cuda >( reset1, "GPU ET", inclusiveScanCuda );
 #endif
 
    ////
-   // Exclusive prefix sum
-   auto exclusivePrefixSumHost = [&]() {
-      hostVector.template prefixSum< Containers::Algorithms::ScanType::Exclusive >();
+   // Exclusive scan
+   auto exclusiveScanHost = [&]() {
+      hostVector.template scan< Algorithms::ScanType::Exclusive >();
    };
-   benchmark.setOperation( "exclusive prefix sum", 2 * datasetSize );
-   benchmark.time< Devices::Host >( reset1, "CPU ET", exclusivePrefixSumHost );
+   benchmark.setOperation( "exclusive scan", 2 * datasetSize );
+   benchmark.time< Devices::Host >( reset1, "CPU ET", exclusiveScanHost );
 #ifdef HAVE_CUDA
-   auto exclusivePrefixSumCuda = [&]() {
-      deviceVector.template prefixSum< Containers::Algorithms::ScanType::Exclusive >();
+   auto exclusiveScanCuda = [&]() {
+      deviceVector.template scan< Algorithms::ScanType::Exclusive >();
    };
-   benchmark.time< Devices::Cuda >( reset1, "GPU ET", exclusivePrefixSumCuda );
+   benchmark.time< Devices::Cuda >( reset1, "GPU ET", exclusiveScanCuda );
 #endif
 
 #ifdef HAVE_CUDA
diff --git a/src/Benchmarks/Benchmarks.h b/src/Benchmarks/Benchmarks.h
index 683a18376276c4b0dbd194329226e9a517a4af12..4caf0fbda397f92d8cb7c143a12896b89600beb0 100644
--- a/src/Benchmarks/Benchmarks.h
+++ b/src/Benchmarks/Benchmarks.h
@@ -23,8 +23,8 @@
 #include <TNL/String.h>
 
 #include <TNL/Devices/Host.h>
-#include <TNL/Devices/SystemInfo.h>
-#include <TNL/Devices/CudaDeviceInfo.h>
+#include <TNL/SystemInfo.h>
+#include <TNL/Cuda/DeviceInfo.h>
 #include <TNL/Config/ConfigDescription.h>
 #include <TNL/Communicators/MpiCommunicator.h>
 
@@ -330,25 +330,25 @@ protected:
 };
 
 
-Benchmark::MetadataMap getHardwareMetadata()
+inline Benchmark::MetadataMap getHardwareMetadata()
 {
    const int cpu_id = 0;
-   Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id );
+   const CacheSizes cacheSizes = SystemInfo::getCPUCacheSizes( cpu_id );
    String cacheInfo = convertToString( cacheSizes.L1data ) + ", "
                        + convertToString( cacheSizes.L1instruction ) + ", "
                        + convertToString( cacheSizes.L2 ) + ", "
                        + convertToString( cacheSizes.L3 );
 #ifdef HAVE_CUDA
-   const int activeGPU = Devices::CudaDeviceInfo::getActiveDevice();
-   const String deviceArch = convertToString( Devices::CudaDeviceInfo::getArchitectureMajor( activeGPU ) ) + "." +
-                             convertToString( Devices::CudaDeviceInfo::getArchitectureMinor( activeGPU ) );
+   const int activeGPU = Cuda::DeviceInfo::getActiveDevice();
+   const String deviceArch = convertToString( Cuda::DeviceInfo::getArchitectureMajor( activeGPU ) ) + "." +
+                             convertToString( Cuda::DeviceInfo::getArchitectureMinor( activeGPU ) );
 #endif
    Benchmark::MetadataMap metadata {
-       { "host name", Devices::SystemInfo::getHostname() },
-       { "architecture", Devices::SystemInfo::getArchitecture() },
-       { "system", Devices::SystemInfo::getSystemName() },
-       { "system release", Devices::SystemInfo::getSystemRelease() },
-       { "start time", Devices::SystemInfo::getCurrentTime() },
+       { "host name", SystemInfo::getHostname() },
+       { "architecture", SystemInfo::getArchitecture() },
+       { "system", SystemInfo::getSystemName() },
+       { "system release", SystemInfo::getSystemRelease() },
+       { "start time", SystemInfo::getCurrentTime() },
 #ifdef HAVE_MPI
        { "number of MPI processes", convertToString( (Communicators::MpiCommunicator::IsInitialized())
                                        ? Communicators::MpiCommunicator::GetSize( Communicators::MpiCommunicator::AllGroup )
@@ -356,19 +356,19 @@ Benchmark::MetadataMap getHardwareMetadata()
 #endif
        { "OpenMP enabled", convertToString( Devices::Host::isOMPEnabled() ) },
        { "OpenMP threads", convertToString( Devices::Host::getMaxThreadsCount() ) },
-       { "CPU model name", Devices::SystemInfo::getCPUModelName( cpu_id ) },
-       { "CPU cores", convertToString( Devices::SystemInfo::getNumberOfCores( cpu_id ) ) },
-       { "CPU threads per core", convertToString( Devices::SystemInfo::getNumberOfThreads( cpu_id ) / Devices::SystemInfo::getNumberOfCores( cpu_id ) ) },
-       { "CPU max frequency (MHz)", convertToString( Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 ) },
+       { "CPU model name", SystemInfo::getCPUModelName( cpu_id ) },
+       { "CPU cores", convertToString( SystemInfo::getNumberOfCores( cpu_id ) ) },
+       { "CPU threads per core", convertToString( SystemInfo::getNumberOfThreads( cpu_id ) / SystemInfo::getNumberOfCores( cpu_id ) ) },
+       { "CPU max frequency (MHz)", convertToString( SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 ) },
        { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo },
 #ifdef HAVE_CUDA
-       { "GPU name", Devices::CudaDeviceInfo::getDeviceName( activeGPU ) },
+       { "GPU name", Cuda::DeviceInfo::getDeviceName( activeGPU ) },
        { "GPU architecture", deviceArch },
-       { "GPU CUDA cores", convertToString( Devices::CudaDeviceInfo::getCudaCores( activeGPU ) ) },
-       { "GPU clock rate (MHz)", convertToString( (double) Devices::CudaDeviceInfo::getClockRate( activeGPU ) / 1e3 ) },
-       { "GPU global memory (GB)", convertToString( (double) Devices::CudaDeviceInfo::getGlobalMemory( activeGPU ) / 1e9 ) },
-       { "GPU memory clock rate (MHz)", convertToString( (double) Devices::CudaDeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 ) },
-       { "GPU memory ECC enabled", convertToString( Devices::CudaDeviceInfo::getECCEnabled( activeGPU ) ) },
+       { "GPU CUDA cores", convertToString( Cuda::DeviceInfo::getCudaCores( activeGPU ) ) },
+       { "GPU clock rate (MHz)", convertToString( (double) Cuda::DeviceInfo::getClockRate( activeGPU ) / 1e3 ) },
+       { "GPU global memory (GB)", convertToString( (double) Cuda::DeviceInfo::getGlobalMemory( activeGPU ) / 1e9 ) },
+       { "GPU memory clock rate (MHz)", convertToString( (double) Cuda::DeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 ) },
+       { "GPU memory ECC enabled", convertToString( Cuda::DeviceInfo::getECCEnabled( activeGPU ) ) },
 #endif
    };
 
diff --git a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
index 81e5d3a6d05aff53fa56e6d6eb045104f9ac3c42..aa4b29424d2b93b323017e5501231a57874ccfa4 100644
--- a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
+++ b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
@@ -73,8 +73,8 @@ benchmarkSpmvCuda( Benchmark& benchmark,
 {
    using RealType = typename Matrix::RealType;
    using IndexType = typename Matrix::IndexType;
-   using CudaMatrix = typename Matrix::CudaType;
-   using CudaVector = typename Vector::CudaType;
+   using CudaMatrix = typename Matrix::template Self< RealType, Devices::Cuda >;
+   using CudaVector = typename Vector::template Self< typename Vector::RealType, Devices::Cuda >;
 
    CudaVector cuda_x;
    cuda_x = x;
@@ -125,8 +125,8 @@ benchmarkDistributedSpmvCuda( Benchmark& benchmark,
 {
    using RealType = typename Matrix::RealType;
    using IndexType = typename Matrix::IndexType;
-   using CudaMatrix = typename Matrix::CudaType;
-   using CudaVector = typename Vector::CudaType;
+   using CudaMatrix = typename Matrix::template Self< RealType, Devices::Cuda >;
+   using CudaVector = typename Vector::template Self< typename Vector::RealType, Devices::Cuda >;
 
    CudaVector cuda_x;
    cuda_x = x;
diff --git a/src/Benchmarks/HeatEquation/BenchmarkLaplace.h b/src/Benchmarks/HeatEquation/BenchmarkLaplace.h
index 0a5494e2b157bbfa6ae5164b579d17e1f6aab43b..0c2fd92e309b53964a017b498dfd76ddf595e0bf 100644
--- a/src/Benchmarks/HeatEquation/BenchmarkLaplace.h
+++ b/src/Benchmarks/HeatEquation/BenchmarkLaplace.h
@@ -33,8 +33,6 @@ class BenchmarkLaplace< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, In
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimension = MeshType::getMeshDimension() };
 
-      static String getType();
-
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
       Real operator()( const MeshFunction& u,
@@ -81,8 +79,6 @@ class BenchmarkLaplace< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, Real, Ind
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimension = MeshType::getMeshDimension() };
 
-      static String getType();
-
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
       Real operator()( const MeshFunction& u,
@@ -144,8 +140,6 @@ class BenchmarkLaplace< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Ind
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimension = MeshType::getMeshDimension() };
 
-      static String getType();
-
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
       Real operator()( const MeshFunction& u,
diff --git a/src/Benchmarks/HeatEquation/BenchmarkLaplace_impl.h b/src/Benchmarks/HeatEquation/BenchmarkLaplace_impl.h
index 34a2e245aad4fcc17c662cf9b72b009df9a7dcfb..47a67744153d970d12d9aee7fe478a16c3141d0d 100644
--- a/src/Benchmarks/HeatEquation/BenchmarkLaplace_impl.h
+++ b/src/Benchmarks/HeatEquation/BenchmarkLaplace_impl.h
@@ -4,21 +4,6 @@
 /****
  * 1D problem
  */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-BenchmarkLaplace< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "BenchmarkLaplace< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -108,21 +93,6 @@ setMatrixElements( const RealType& time,
 /****
  * 2D problem
  */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-BenchmarkLaplace< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "BenchmarkLaplace< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -288,21 +258,6 @@ setMatrixElements( const RealType& time,
 /****
  * 3D problem
  */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-BenchmarkLaplace< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "BenchmarkLaplace< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
diff --git a/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem.h b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem.h
index 998be646d6a296c272f69560ebe75ec507e98dd2..95491a1cfa1d5faf0557447834760fbb80c16340 100644
--- a/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem.h
+++ b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem.h
@@ -40,8 +40,6 @@ class HeatEquationBenchmarkProblem:
       using typename BaseType::DofVectorPointer;
 
       HeatEquationBenchmarkProblem();
-      
-      static String getType();
 
       String getPrologHeader() const;
 
diff --git a/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h
index 14f1fd8a9e0a38a6aa335aacdbe886a155225811..3f0c9194867d011724e2387c882420e04e798ee6 100644
--- a/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h
+++ b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h
@@ -16,18 +16,6 @@
 
 
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename DifferentialOperator,
-          typename Communicator >
-String
-HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator, Communicator >::
-getType()
-{
-   return String( "HeatEquationBenchmarkProblem< " ) + Mesh :: getType() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
@@ -94,9 +82,9 @@ setup( const Config::ParameterContainer& parameters,
 
    if( std::is_same< DeviceType, Devices::Cuda >::value )
    {
-      this->cudaBoundaryConditions = Devices::Cuda::passToDevice( *this->boundaryConditionPointer );
-      this->cudaRightHandSide = Devices::Cuda::passToDevice( *this->rightHandSidePointer );
-      this->cudaDifferentialOperator = Devices::Cuda::passToDevice( *this->differentialOperatorPointer );
+      this->cudaBoundaryConditions = Cuda::passToDevice( *this->boundaryConditionPointer );
+      this->cudaRightHandSide = Cuda::passToDevice( *this->rightHandSidePointer );
+      this->cudaDifferentialOperator = Cuda::passToDevice( *this->differentialOperatorPointer );
    }
    this->explicitUpdater.setDifferentialOperator( this->differentialOperatorPointer );
    this->explicitUpdater.setBoundaryConditions( this->boundaryConditionPointer );
@@ -278,8 +266,8 @@ boundaryConditionsTemplatedCompact( const GridType* grid,
 {
    typename GridType::CoordinatesType coordinates;
 
-   coordinates.x() = begin.x() + ( gridXIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-   coordinates.y() = begin.y() + ( gridYIdx * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;        
+   coordinates.x() = begin.x() + ( gridXIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   coordinates.y() = begin.y() + ( gridYIdx * Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;        
 
    if( coordinates.x() < end.x() &&
        coordinates.y() < end.y() )
@@ -369,8 +357,8 @@ heatEquationTemplatedCompact( const GridType* grid,
    typedef typename GridType::IndexType IndexType;
    typedef typename GridType::RealType RealType;
 
-   coordinates.x() = begin.x() + ( gridXIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-   coordinates.y() = begin.y() + ( gridYIdx * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;     
+   coordinates.x() = begin.x() + ( gridXIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   coordinates.y() = begin.y() + ( gridYIdx * Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;     
       
    MeshFunction& u = *_u;
    MeshFunction& fu = *_fu;
@@ -495,14 +483,14 @@ getExplicitUpdate( const RealType& time,
          CellType cell( mesh.template getData< DeviceType >() );
          dim3 cudaBlockSize( 16, 16 );
          dim3 cudaBlocks;
-         cudaBlocks.x = Devices::Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x );
-         cudaBlocks.y = Devices::Cuda::getNumberOfBlocks( end.y() - begin.y() + 1, cudaBlockSize.y );
-         const IndexType cudaXGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.x );
-         const IndexType cudaYGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.y );
+         cudaBlocks.x = Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x );
+         cudaBlocks.y = Cuda::getNumberOfBlocks( end.y() - begin.y() + 1, cudaBlockSize.y );
+         const IndexType cudaXGrids = Cuda::getNumberOfGrids( cudaBlocks.x );
+         const IndexType cudaYGrids = Cuda::getNumberOfGrids( cudaBlocks.y );
          
          //std::cerr << "Setting boundary conditions..." << std::endl;
 
-         Devices::Cuda::synchronizeDevice();
+         Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
          for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ )
             for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
                boundaryConditionsTemplatedCompact< MeshType, CellType, BoundaryCondition, MeshFunctionType >
@@ -606,7 +594,7 @@ getExplicitUpdate( const RealType& time,
                                gridYSize / 16 + ( gridYSize % 16 != 0 ) );
             */
 
-            TNL::Devices::Cuda::synchronizeDevice();
+            Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
             int cudaErr;
             Meshes::Traverser< MeshType, Cell > meshTraverser;
             meshTraverser.template processInteriorEntities< UserData,
@@ -774,10 +762,10 @@ template< typename Mesh,
 HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator, Communicator >::
 ~HeatEquationBenchmarkProblem()
 {
-   if( this->cudaMesh ) Devices::Cuda::freeFromDevice( this->cudaMesh );
-   if( this->cudaBoundaryConditions )  Devices::Cuda::freeFromDevice( this->cudaBoundaryConditions );
-   if( this->cudaRightHandSide ) Devices::Cuda::freeFromDevice( this->cudaRightHandSide );
-   if( this->cudaDifferentialOperator ) Devices::Cuda::freeFromDevice( this->cudaDifferentialOperator );
+   if( this->cudaMesh ) Cuda::freeFromDevice( this->cudaMesh );
+   if( this->cudaBoundaryConditions )  Cuda::freeFromDevice( this->cudaBoundaryConditions );
+   if( this->cudaRightHandSide ) Cuda::freeFromDevice( this->cudaRightHandSide );
+   if( this->cudaDifferentialOperator ) Cuda::freeFromDevice( this->cudaDifferentialOperator );
 }
 
 
diff --git a/src/Benchmarks/HeatEquation/TestGridEntity.h b/src/Benchmarks/HeatEquation/TestGridEntity.h
index 3492b219807f4650ed665b2ee57c77754f5934f1..5be39bac1c342c9445e91c99bc868cd917935a16 100644
--- a/src/Benchmarks/HeatEquation/TestGridEntity.h
+++ b/src/Benchmarks/HeatEquation/TestGridEntity.h
@@ -78,8 +78,7 @@ class TestGridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension
       
       typedef Containers::StaticVector< meshDimension, IndexType > EntityOrientationType;
       typedef Containers::StaticVector< meshDimension, IndexType > EntityBasisType;
-      typedef TestGridEntity< GridType, entityDimension > ThisType;
-      typedef TestNeighborGridEntitiesStorage< ThisType > NeighborGridEntitiesStorageType;
+      typedef TestNeighborGridEntitiesStorage< TestGridEntity > NeighborGridEntitiesStorageType;
       
       __cuda_callable__ inline
       TestGridEntity( const GridType& grid )
diff --git a/src/Benchmarks/HeatEquation/Tuning/GridTraverser.h b/src/Benchmarks/HeatEquation/Tuning/GridTraverser.h
index cdbc4922ca07eda7da0ba442340705f6646d8430..7e7e5369182ebc579ab98da55bbacef872284edf 100644
--- a/src/Benchmarks/HeatEquation/Tuning/GridTraverser.h
+++ b/src/Benchmarks/HeatEquation/Tuning/GridTraverser.h
@@ -12,7 +12,7 @@
 
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Pointers/SharedPointer.h>
-#include <TNL/CudaStreamPool.h>
+#include <TNL/Cuda/StreamPool.h>
 
 namespace TNL {
 
diff --git a/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h b/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h
index f3d9fbeec528dae97e4f3304f44b8440318d4529..c9fe0e43be20b175321dc1b50c563fb48e843b5d 100644
--- a/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h
+++ b/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h
@@ -8,8 +8,6 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include <TNL/Devices/MIC.h>
-
 #pragma once
 
 #include "GridTraverser.h"
@@ -128,8 +126,8 @@ _GridTraverser2D(
    typedef Meshes::Grid< 2, Real, Devices::Cuda, Index > GridType;
    typename GridType::CoordinatesType coordinates;
 
-   coordinates.x() = begin.x() + Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
-   coordinates.y() = begin.y() + Devices::Cuda::getGlobalThreadIdx_y( gridIdx );
+   coordinates.x() = begin.x() + Cuda::getGlobalThreadIdx_x( gridIdx );
+   coordinates.y() = begin.y() + Cuda::getGlobalThreadIdx_y( gridIdx );
    
    if( coordinates <= end )
    {
@@ -175,7 +173,7 @@ _GridTraverser2DBoundary(
    Index entitiesAlongX = endX - beginX + 1;
    Index entitiesAlongY = endY - beginY;
    
-   Index threadId = Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
+   Index threadId = Cuda::getGlobalThreadIdx_x( gridIdx );
    if( threadId < entitiesAlongX )
    {
       GridEntity entity( *grid, 
@@ -246,12 +244,12 @@ processEntities(
       dim3 cudaBlockSize( 256 );      
       dim3 cudaBlocksCount, cudaGridsCount;
       IndexType cudaThreadsCount = 2 * ( end.x() - begin.x() + end.y() - begin.y() + 1 );
-      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, cudaThreadsCount );
+      Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, cudaThreadsCount );
       dim3 gridIdx, cudaGridSize;
-      Devices::Cuda::synchronizeDevice();
+      Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
       for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x++ )
       {
-         Devices::Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
+         Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
          _GridTraverser2DBoundary< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                <<< cudaGridSize, cudaBlockSize >>>
                ( &gridPointer.template getData< Devices::Cuda >(),
@@ -268,20 +266,20 @@ processEntities(
    {
       dim3 cudaBlockSize( 16, 16 );
       dim3 cudaBlocksCount, cudaGridsCount;
-      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount,
-                                   end.x() - begin.x() + 1,
-                                   end.y() - begin.y() + 1 );
+      Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount,
+                          end.x() - begin.x() + 1,
+                          end.y() - begin.y() + 1 );
       
-      auto& pool = CudaStreamPool::getInstance();
+      auto& pool = Cuda::StreamPool::getInstance();
       const cudaStream_t& s = pool.getStream( stream );
 
-      Devices::Cuda::synchronizeDevice();
+      Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
       dim3 gridIdx, cudaGridSize;
       for( gridIdx.y = 0; gridIdx.y < cudaGridsCount.y; gridIdx.y ++ )
          for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x ++ )
          {
-            Devices::Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
-	    //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCount, cudaGridSize, cudaGridsCount );
+            Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
+	    //Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCount, cudaGridSize, cudaGridsCount );
             TNL::_GridTraverser2D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                <<< cudaGridSize, cudaBlockSize, 0, s >>>
                ( &gridPointer.template getData< Devices::Cuda >(),
diff --git a/src/Benchmarks/HeatEquation/Tuning/SimpleCell.h b/src/Benchmarks/HeatEquation/Tuning/SimpleCell.h
index 67254ab3607c9c318f8d6a624387c5d875ee2484..59de340f2d7b23f1792aa0c48015df66bf405679 100644
--- a/src/Benchmarks/HeatEquation/Tuning/SimpleCell.h
+++ b/src/Benchmarks/HeatEquation/Tuning/SimpleCell.h
@@ -26,8 +26,7 @@ class SimpleCell
       typedef typename GridType::IndexType IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
       typedef typename GridType::PointType PointType;
-      typedef SimpleCell< GridType, Config > ThisType;
-      typedef Meshes::NeighborGridEntitiesStorage< ThisType, Config >
+      typedef Meshes::NeighborGridEntitiesStorage< SimpleCell, Config >
          NeighborGridEntitiesStorageType;
       typedef Config ConfigType;
       
diff --git a/src/Benchmarks/HeatEquation/tnlTestGrid2D.h b/src/Benchmarks/HeatEquation/tnlTestGrid2D.h
index 084d6cc39a6a755b5ead53d791d22d2c327b37b9..a7a6fe39e4a6cc1ce0f584c71813bfbe070feaad 100644
--- a/src/Benchmarks/HeatEquation/tnlTestGrid2D.h
+++ b/src/Benchmarks/HeatEquation/tnlTestGrid2D.h
@@ -52,9 +52,6 @@ class Meshes::Grid< 2, Real, Device, Index > : public tnlObject
    typedef Index IndexType;
    typedef Containers::StaticVector< 2, Real > PointType;
    typedef Containers::StaticVector< 2, Index > CoordinatesType;
-   typedef Meshes::Grid< 2, Real, Devices::Host, Index > HostType;
-   typedef Meshes::Grid< 2, Real, tnlCuda, Index > CudaType;   
-   typedef Meshes::Grid< 2, Real, Device, Index > ThisType;
    
    static const int meshDimension = 2;
 
@@ -78,10 +75,6 @@ class Meshes::Grid< 2, Real, Device, Index > : public tnlObject
 
    Grid();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    static String getSerializationType();
 
    virtual String getSerializationTypeVirtual() const;
@@ -212,29 +205,13 @@ Meshes::Grid< 2, Real, Device, Index > :: Grid()
 template< typename Real,
           typename Device,
           typename Index >
-String Meshes::Grid< 2, Real, Device, Index > :: getType()
+String Meshes::Grid< 2, Real, Device, Index > :: getSerializationType()
 {
    return String( "Meshes::Grid< " ) +
           convertToString( getMeshDimension() ) + ", " +
-          String( ::getType< RealType >() ) + ", " +
-          String( Device :: getDeviceType() ) + ", " +
-          String( ::getType< IndexType >() ) + " >";
-}
-
-template< typename Real,
-           typename Device,
-           typename Index >
-String Meshes::Grid< 2, Real, Device, Index > :: getTypeVirtual() const
-{
-   return this->getType();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Meshes::Grid< 2, Real, Device, Index > :: getSerializationType()
-{
-   return HostType::getType();
+          getType< RealType >() + ", " +
+          getType< Devices::Host >() + ", " +
+          getType< IndexType >() + " >";
 };
 
 template< typename Real,
@@ -828,9 +805,6 @@ class Meshes::Grid< 2, Real, Device, Index > : public tnlObject
    typedef Index IndexType;
    typedef Containers::StaticVector< 2, Real > PointType;
    typedef Containers::StaticVector< 2, Index > CoordinatesType;
-   typedef Meshes::Grid< 2, Real, Devices::Host, Index > HostType;
-   typedef Meshes::Grid< 2, Real, tnlCuda, Index > CudaType;   
-   typedef Meshes::Grid< 2, Real, Device, Index > ThisType;
    
    static const int meshDimension = 2;
 
@@ -854,10 +828,6 @@ class Meshes::Grid< 2, Real, Device, Index > : public tnlObject
 
    Grid();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    static String getSerializationType();
 
    virtual String getSerializationTypeVirtual() const;
@@ -977,29 +947,13 @@ Meshes::Grid< 2, Real, Device, Index > :: Grid()
 template< typename Real,
           typename Device,
           typename Index >
-String Meshes::Grid< 2, Real, Device, Index > :: getType()
+String Meshes::Grid< 2, Real, Device, Index > :: getSerializationType()
 {
    return String( "Meshes::Grid< " ) +
           convertToString( getMeshDimension() ) + ", " +
-          String( ::getType< RealType >() ) + ", " +
-          String( Device :: getDeviceType() ) + ", " +
-          String( ::getType< IndexType >() ) + " >";
-}
-
-template< typename Real,
-           typename Device,
-           typename Index >
-String Meshes::Grid< 2, Real, Device, Index > :: getTypeVirtual() const
-{
-   return this->getType();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Meshes::Grid< 2, Real, Device, Index > :: getSerializationType()
-{
-   return HostType::getType();
+          getType< RealType >() + ", " +
+          getType< Devices::Host >() + ", " +
+          getType< IndexType >() + " >";
 };
 
 template< typename Real,
diff --git a/src/Benchmarks/HeatEquation/tnlTestGridEntity.h b/src/Benchmarks/HeatEquation/tnlTestGridEntity.h
index aa8bd8d057309b1cd48fe38d71eab7886ccc0d7c..4401e1e7207f39f51d489de8f16375ba438f97ad 100644
--- a/src/Benchmarks/HeatEquation/tnlTestGridEntity.h
+++ b/src/Benchmarks/HeatEquation/tnlTestGridEntity.h
@@ -55,8 +55,6 @@ class tnlTestGridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimensi
       
       typedef TNL::Containers::StaticVector< meshDimension, IndexType > EntityOrientationType;
       typedef TNL::Containers::StaticVector< meshDimension, IndexType > EntityBasisType;
-      typedef tnlTestGridEntity< GridType, entityDimension, Config > ThisType;
-      //typedef tnlTestNeighborGridEntitiesStorage< ThisType > NeighborGridEntitiesStorageType;
       
       /*template< int NeighborEntityDimension = entityDimension >
       using NeighborEntities = 
diff --git a/src/Benchmarks/HeatEquation/tnlTestNeighbourGridEntityGetter2D_impl.h b/src/Benchmarks/HeatEquation/tnlTestNeighbourGridEntityGetter2D_impl.h
index 13c7848decb4f967dd04630a03e62bbb49249efa..a6434a01394e4ebfeb4d296f9640a563be967310 100644
--- a/src/Benchmarks/HeatEquation/tnlTestNeighbourGridEntityGetter2D_impl.h
+++ b/src/Benchmarks/HeatEquation/tnlTestNeighbourGridEntityGetter2D_impl.h
@@ -93,7 +93,6 @@ class tnlTestNeighborGridEntityGetter<
       typedef typename GridType::CoordinatesType CoordinatesType;
       typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetter;
       typedef GridEntityStencilStorageTag< GridEntityCrossStencil > StencilStorage;
-      typedef tnlTestNeighborGridEntityGetter< GridEntityType, 2, StencilStorage > ThisType;
       
       
       static const int stencilSize = Config::getStencilSize();
@@ -110,7 +109,7 @@ class tnlTestNeighborGridEntityGetter<
          public:
             
             __cuda_callable__
-            static void exec( ThisType& neighborEntityGetter, const IndexType& entityIndex )
+            static void exec( tnlTestNeighborGridEntityGetter& neighborEntityGetter, const IndexType& entityIndex )
             {
                neighborEntityGetter.stencilX[ index + stencilSize ] = entityIndex + index;
             }
@@ -122,7 +121,7 @@ class tnlTestNeighborGridEntityGetter<
          public:
             
             __cuda_callable__
-            static void exec( ThisType& neighborEntityGetter, const IndexType& entityIndex )
+            static void exec( tnlTestNeighborGridEntityGetter& neighborEntityGetter, const IndexType& entityIndex )
             {
                neighborEntityGetter.stencilY[ index + stencilSize ] = 
                   entityIndex + index * neighborEntityGetter.entity.getMesh().getDimensions().x();
diff --git a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
index 6661c5f6a8e720df9a265ca5cb3c5b97717294d5..0701b647a42416e6439cd02a1ef10157512210f3 100644
--- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
+++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
@@ -119,8 +119,8 @@ benchmarkIterativeSolvers( Benchmark& benchmark,
                            const Vector& b )
 {
 #ifdef HAVE_CUDA
-   using CudaMatrix = typename Matrix::CudaType;
-   using CudaVector = typename Vector::CudaType;
+   using CudaMatrix = typename Matrix::template Self< typename Matrix::RealType, Devices::Cuda >;
+   using CudaVector = typename Vector::template Self< typename Vector::RealType, Devices::Cuda >;
 
    CudaVector cuda_x0, cuda_b;
    cuda_x0 = x0;
@@ -130,7 +130,7 @@ benchmarkIterativeSolvers( Benchmark& benchmark,
    *cudaMatrixPointer = *matrixPointer;
 
    // synchronize shared pointers
-   Devices::Cuda::synchronizeDevice();
+   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
 #endif
 
    using namespace Solvers::Linear;
@@ -461,9 +461,11 @@ struct LinearSolversBenchmark
          SharedPointer< CSR > matrixCopy;
          Matrices::copySparseMatrix( *matrixCopy, *matrixPointer );
 
-         SharedPointer< typename CSR::CudaType > cuda_matrixCopy;
+         using CudaCSR = Matrices::CSR< RealType, Devices::Cuda, IndexType >;
+         using CudaVector = typename VectorType::template Self< RealType, Devices::Cuda >;
+         SharedPointer< CudaCSR > cuda_matrixCopy;
          *cuda_matrixCopy = *matrixCopy;
-         typename VectorType::CudaType cuda_x0, cuda_b;
+         CudaVector cuda_x0, cuda_b;
          cuda_x0.setLike( x0 );
          cuda_b.setLike( b );
          cuda_x0 = x0;
diff --git a/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h b/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h
index b1fdd8c719077d0eff230081d9b62b92da3c76b4..285dd6f3d7f97aa9f90a0914ca88fe535661d7eb 100644
--- a/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h
+++ b/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h
@@ -52,7 +52,8 @@ template< typename Array >
 void expect_eq( Array& a, Array& b )
 {
    if( std::is_same< typename Array::DeviceType, TNL::Devices::Cuda >::value ) {
-      typename Array::HostType a_host, b_host;
+      using HostArray = typename Array::template Self< typename Array::ValueType, TNL::Devices::Host >;
+      HostArray a_host, b_host;
       a_host = a;
       b_host = b;
       expect_eq_chunked( a_host, b_host );
diff --git a/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h b/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h
index 0de53ea8815033654194cc9e2eb6f3eaf6356356..0c29b21b5894e46627f8c4f129eafc8697ae9aec 100644
--- a/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h
+++ b/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h
@@ -14,7 +14,7 @@
 
 #include <TNL/Assert.h>
 #include <TNL/Math.h>
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 #include <TNL/Containers/NDArray.h>
 #include <TNL/Containers/ndarray/Operations.h>
@@ -54,7 +54,8 @@ template< typename Array >
 void expect_eq( Array& a, Array& b )
 {
    if( std::is_same< typename Array::DeviceType, TNL::Devices::Cuda >::value ) {
-      typename Array::HostType a_host, b_host;
+      using HostArray = typename Array::template Self< typename Array::ValueType, TNL::Devices::Host >;
+      HostArray a_host, b_host;
       a_host = a;
       b_host = b;
       expect_eq_chunked( a_host, b_host );
@@ -98,7 +99,7 @@ void benchmark_array( Benchmark& benchmark, index_type size = 500000000 )
    };
 
    auto f = [&]() {
-      TNL::ParallelFor< Device >::exec( 0, (int) size, kernel, a.getData(), b.getData() );
+      Algorithms::ParallelFor< Device >::exec( 0, (int) size, kernel, a.getData(), b.getData() );
    };
 
    // warm-up for all benchmarks
diff --git a/src/Benchmarks/ODESolvers/Euler.h b/src/Benchmarks/ODESolvers/Euler.h
index c767eb33ec7b09bab4b53fe5ec54cbf981f73588..2df469d6f2e960cda1844dac35c05ed070c402ce 100644
--- a/src/Benchmarks/ODESolvers/Euler.h
+++ b/src/Benchmarks/ODESolvers/Euler.h
@@ -38,8 +38,6 @@ class Euler : public Solvers::ODE::ExplicitSolver< Problem, SolverMonitor >
 
    Euler();
 
-   static String getType();
-
    static void configSetup( Config::ConfigDescription& config,
                             const String& prefix = "" );
 
diff --git a/src/Benchmarks/ODESolvers/Euler.hpp b/src/Benchmarks/ODESolvers/Euler.hpp
index 1066e178c2f150c97514eb04dcd19a5a30932102..ab975ed078c470f4824d18e7848033e6fed73f2c 100644
--- a/src/Benchmarks/ODESolvers/Euler.hpp
+++ b/src/Benchmarks/ODESolvers/Euler.hpp
@@ -10,7 +10,6 @@
 
 #pragma once
 
-#include <TNL/Devices/MIC.h>
 #include <TNL/Communicators/MpiCommunicator.h>
 #include <TNL/Communicators/NoDistrCommunicator.h>
 #include "ComputeBlockResidue.h"
@@ -33,14 +32,6 @@ Euler< Problem, SolverMonitor >::Euler()
 {
 };
 
-template< typename Problem, typename SolverMonitor >
-String Euler< Problem, SolverMonitor >::getType()
-{
-   return String( "Euler< " ) +
-          Problem :: getType() +
-          String( " >" );
-};
-
 template< typename Problem, typename SolverMonitor >
 void Euler< Problem, SolverMonitor >::configSetup( Config::ConfigDescription& config,
                                                const String& prefix )
@@ -185,10 +176,10 @@ void Euler< Problem, SolverMonitor >::computeNewTimeLevel( DofVectorPointer& u,
    {
 #ifdef HAVE_CUDA
       dim3 cudaBlockSize( 512 );
-      const IndexType cudaBlocks = Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x );
-      const IndexType cudaGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks );
-      this->cudaBlockResidue.setSize( min( cudaBlocks, Devices::Cuda::getMaxGridSize() ) );
-      const IndexType threadsPerGrid = Devices::Cuda::getMaxGridSize() * cudaBlockSize.x;
+      const IndexType cudaBlocks = Cuda::getNumberOfBlocks( size, cudaBlockSize.x );
+      const IndexType cudaGrids = Cuda::getNumberOfGrids( cudaBlocks );
+      this->cudaBlockResidue.setSize( min( cudaBlocks, Cuda::getMaxGridSize() ) );
+      const IndexType threadsPerGrid = Cuda::getMaxGridSize() * cudaBlockSize.x;
 
       localResidue = 0.0;
       for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx ++ )
@@ -196,7 +187,7 @@ void Euler< Problem, SolverMonitor >::computeNewTimeLevel( DofVectorPointer& u,
          const IndexType sharedMemory = cudaBlockSize.x * sizeof( RealType );
          const IndexType gridOffset = gridIdx * threadsPerGrid;
          const IndexType currentSize = min( size - gridOffset, threadsPerGrid );
-         const IndexType currentGridSize = Devices::Cuda::getNumberOfBlocks( currentSize, cudaBlockSize.x );
+         const IndexType currentGridSize = Cuda::getNumberOfBlocks( currentSize, cudaBlockSize.x );
 
          updateUEuler<<< currentGridSize, cudaBlockSize, sharedMemory >>>( currentSize,
                                                                       tau,
@@ -209,28 +200,7 @@ void Euler< Problem, SolverMonitor >::computeNewTimeLevel( DofVectorPointer& u,
       }
 #endif
    }
-   
-   //MIC
-   if( std::is_same< DeviceType, Devices::MIC >::value )
-   {
 
-#ifdef HAVE_MIC
-      Devices::MICHider<RealType> mu;
-      mu.pointer=_u;
-      Devices::MICHider<RealType> mk1;
-      mk1.pointer=_k1;
-    #pragma offload target(mic) in(mu,mk1,size) inout(localResidue)
-    {
-      #pragma omp parallel for reduction(+:localResidue) firstprivate( mu, mk1 )  
-      for( IndexType i = 0; i < size; i ++ )
-      {
-         const RealType add = tau * mk1.pointer[ i ];
-         mu.pointer[ i ] += add;
-         localResidue += std::fabs( add );
-      }
-    }
-#endif
-   }
    localResidue /= tau * ( RealType ) size;
    Problem::CommunicatorType::Allreduce( &localResidue, &currentResidue, 1, MPI_SUM, Problem::CommunicatorType::AllGroup );
    //std::cerr << "Local residue = " << localResidue << " - globalResidue = " << currentResidue << std::endl;
diff --git a/src/Benchmarks/ODESolvers/Merson.h b/src/Benchmarks/ODESolvers/Merson.h
index 8d00667c2aba372525bca19462c49a5e430e3e79..74e052705db27b18ec6e0b61a7fb8e1c863c4032 100644
--- a/src/Benchmarks/ODESolvers/Merson.h
+++ b/src/Benchmarks/ODESolvers/Merson.h
@@ -35,8 +35,6 @@ class Merson : public Solvers::ODE::ExplicitSolver< Problem, SolverMonitor >
    
    Merson();
 
-   static String getType();
-
    static void configSetup( Config::ConfigDescription& config,
                             const String& prefix = "" );
 
diff --git a/src/Benchmarks/ODESolvers/Merson.hpp b/src/Benchmarks/ODESolvers/Merson.hpp
index 3a5cdf660b82d6f48509172fb1eee3e3fadb5760..3c74bdf480c66025f44de201e18ad5368d050e6f 100644
--- a/src/Benchmarks/ODESolvers/Merson.hpp
+++ b/src/Benchmarks/ODESolvers/Merson.hpp
@@ -94,14 +94,6 @@ Merson< Problem, SolverMonitor >::Merson()
    }
 };
 
-template< typename Problem, typename SolverMonitor >
-String Merson< Problem, SolverMonitor >::getType()
-{
-   return String( "Merson< " ) +
-          Problem::getType() +
-          String( " >" );
-};
-
 template< typename Problem, typename SolverMonitor >
 void Merson< Problem, SolverMonitor >::configSetup( Config::ConfigDescription& config,
                                                 const String& prefix )
@@ -298,10 +290,10 @@ void Merson< Problem, SolverMonitor >::computeKFunctions( DofVectorPointer& u,
    {
 #ifdef HAVE_CUDA
       dim3 cudaBlockSize( 512 );
-      const IndexType cudaBlocks = Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x );
-      const IndexType cudaGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks );
-      this->cudaBlockResidue.setSize( min( cudaBlocks, Devices::Cuda::getMaxGridSize() ) );
-      const IndexType threadsPerGrid = Devices::Cuda::getMaxGridSize() * cudaBlockSize.x;
+      const IndexType cudaBlocks = Cuda::getNumberOfBlocks( size, cudaBlockSize.x );
+      const IndexType cudaGrids = Cuda::getNumberOfGrids( cudaBlocks );
+      this->cudaBlockResidue.setSize( min( cudaBlocks, Cuda::getMaxGridSize() ) );
+      const IndexType threadsPerGrid = Cuda::getMaxGridSize() * cudaBlockSize.x;
 
       this->problem->getExplicitUpdate( time, tau, u, k1 );
       cudaDeviceSynchronize();
@@ -392,10 +384,10 @@ typename Problem :: RealType Merson< Problem, SolverMonitor >::computeError( con
    {
 #ifdef HAVE_CUDA
       dim3 cudaBlockSize( 512 );
-      const IndexType cudaBlocks = Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x );
-      const IndexType cudaGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks );
-      this->cudaBlockResidue.setSize( min( cudaBlocks, Devices::Cuda::getMaxGridSize() ) );
-      const IndexType threadsPerGrid = Devices::Cuda::getMaxGridSize() * cudaBlockSize.x;
+      const IndexType cudaBlocks = Cuda::getNumberOfBlocks( size, cudaBlockSize.x );
+      const IndexType cudaGrids = Cuda::getNumberOfGrids( cudaBlocks );
+      this->cudaBlockResidue.setSize( min( cudaBlocks, Cuda::getMaxGridSize() ) );
+      const IndexType threadsPerGrid = Cuda::getMaxGridSize() * cudaBlockSize.x;
 
       for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx ++ )
       {
@@ -447,10 +439,10 @@ void Merson< Problem, SolverMonitor >::computeNewTimeLevel( const RealType time,
    {
 #ifdef HAVE_CUDA
       dim3 cudaBlockSize( 512 );
-      const IndexType cudaBlocks = Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x );
-      const IndexType cudaGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks );
-      this->cudaBlockResidue.setSize( min( cudaBlocks, Devices::Cuda::getMaxGridSize() ) );
-      const IndexType threadsPerGrid = Devices::Cuda::getMaxGridSize() * cudaBlockSize.x;
+      const IndexType cudaBlocks = Cuda::getNumberOfBlocks( size, cudaBlockSize.x );
+      const IndexType cudaGrids = Cuda::getNumberOfGrids( cudaBlocks );
+      this->cudaBlockResidue.setSize( min( cudaBlocks, Cuda::getMaxGridSize() ) );
+      const IndexType threadsPerGrid = Cuda::getMaxGridSize() * cudaBlockSize.x;
 
       localResidue = 0.0;
       for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx ++ )
diff --git a/src/Benchmarks/ODESolvers/SimpleProblem.h b/src/Benchmarks/ODESolvers/SimpleProblem.h
index 6323264b8e1dc88bedf1076fbeb05b8cb80adee6..ff81fd18e4576672a89f35f54ff37eeed4ba9d86 100644
--- a/src/Benchmarks/ODESolvers/SimpleProblem.h
+++ b/src/Benchmarks/ODESolvers/SimpleProblem.h
@@ -13,7 +13,7 @@
 #pragma once
 
 #include <TNL/Devices/Host.h>
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 namespace TNL {
    namespace Benchmarks {
@@ -43,7 +43,7 @@ struct SimpleProblem
       {
          fu[ i ] = 1.0;
       };
-      ParallelFor< DeviceType >::exec( ( IndexType ) 0, u.getSize(), computeF, u, fu );
+      Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, u.getSize(), computeF, u, fu );
    }
    
    template< typename Vector >
diff --git a/src/Benchmarks/Traversers/GridTraversersBenchmark.h b/src/Benchmarks/Traversers/GridTraversersBenchmark.h
index 72ca102bcc131067eec286390e819be91db22e04..01590f1221f7a451270234044e180a16ff589e02 100644
--- a/src/Benchmarks/Traversers/GridTraversersBenchmark.h
+++ b/src/Benchmarks/Traversers/GridTraversersBenchmark.h
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Containers/Vector.h>
diff --git a/src/Benchmarks/Traversers/GridTraversersBenchmark_1D.h b/src/Benchmarks/Traversers/GridTraversersBenchmark_1D.h
index 8ec5cdf888b35185becbcda3841cc2cd46a9a176..9820af39274cd5c8db310b583c0595d2b64bf252 100644
--- a/src/Benchmarks/Traversers/GridTraversersBenchmark_1D.h
+++ b/src/Benchmarks/Traversers/GridTraversersBenchmark_1D.h
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Containers/Vector.h>
diff --git a/src/Benchmarks/Traversers/GridTraversersBenchmark_2D.h b/src/Benchmarks/Traversers/GridTraversersBenchmark_2D.h
index 3c2037f40b7e33da59d1af2a3b0552d49d06ebb2..0e9ae7f2f36a9d19a913a4437d988d5172592e5b 100644
--- a/src/Benchmarks/Traversers/GridTraversersBenchmark_2D.h
+++ b/src/Benchmarks/Traversers/GridTraversersBenchmark_2D.h
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Containers/Vector.h>
diff --git a/src/Benchmarks/Traversers/GridTraversersBenchmark_3D.h b/src/Benchmarks/Traversers/GridTraversersBenchmark_3D.h
index 9dfeadb056461623f12b51992c3efee9a8c8767e..26b6413e43edbc86a3a02490580afc834bb0bde6 100644
--- a/src/Benchmarks/Traversers/GridTraversersBenchmark_3D.h
+++ b/src/Benchmarks/Traversers/GridTraversersBenchmark_3D.h
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Containers/Vector.h>
diff --git a/src/Benchmarks/Traversers/tnl-benchmark-traversers.h b/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
index 63b3cc8c94a58b1616cb77c4fa43348bc53e93e8..dbe637d826fe4e0dcd593320fba11ad46588e9b3 100644
--- a/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
+++ b/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
@@ -19,7 +19,7 @@
 #include <TNL/Config/ConfigDescription.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Containers/List.h>
 
 using namespace TNL;
diff --git a/src/Examples/CMakeLists.txt b/src/Examples/CMakeLists.txt
index 4038095719828aed5da45f9b62da30ac120d74cd..493f537d11ef6b6c54f42d476aca4d08cedf17cb 100644
--- a/src/Examples/CMakeLists.txt
+++ b/src/Examples/CMakeLists.txt
@@ -12,6 +12,5 @@ add_subdirectory( flow-vl )
 
 
 ADD_EXECUTABLE( ConfigDescriptionExample ConfigDescriptionExample.cpp )
-ADD_EXECUTABLE( ListExample ListExample.cpp )
 ADD_EXECUTABLE( LoggerExample LoggerExample.cpp )
 ADD_EXECUTABLE( MathExample MathExample.cpp )
diff --git a/src/Examples/ListExample.cpp b/src/Examples/ListExample.cpp
deleted file mode 100644
index 7196dc7594689bca4395976f43b23c8b12d42eb5..0000000000000000000000000000000000000000
--- a/src/Examples/ListExample.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-#include <iostream>
-#include <TNL/Config/ConfigDescription.h>
-#include <TNL/Containers/List.h>
-#include <TNL/Containers/Array.h>
-
-using namespace TNL;
-using namespace std;
-       
-int main()
-{
-    Containers::List< int > lst;
-    lst.isEmpty();
-
-    lst.Append(1);
-    lst.Append(3);
-
-    lst.isEmpty();
-    lst.getSize();
-
-    lst.Insert(2,1);
-
-    Containers::Array<int> array;
-    lst.toArray(array);
-}
\ No newline at end of file
diff --git a/src/Examples/flow-sw/DensityBoundaryConditionBoiler.h b/src/Examples/flow-sw/DensityBoundaryConditionBoiler.h
index 6231f6780e24e3090c83832c9d47534a9c6104a8..e02f1b1e403c25802c9e8792daa972bec1faf071 100644
--- a/src/Examples/flow-sw/DensityBoundaryConditionBoiler.h
+++ b/src/Examples/flow-sw/DensityBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshIn
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -234,7 +233,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -384,7 +382,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;   
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
diff --git a/src/Examples/flow-sw/DensityBoundaryConditionCavity.h b/src/Examples/flow-sw/DensityBoundaryConditionCavity.h
index 18eaff1101eccc7733eb5978b48807be82bba916..008a68bef0b7a81519f95c184163b6a8f1752a07 100644
--- a/src/Examples/flow-sw/DensityBoundaryConditionCavity.h
+++ b/src/Examples/flow-sw/DensityBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshIn
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef DensityBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -234,7 +233,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -381,7 +379,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsCavityBase< Function > BaseType;   
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
diff --git a/src/Examples/flow-sw/EnergyBoundaryConditionBoiler.h b/src/Examples/flow-sw/EnergyBoundaryConditionBoiler.h
index a99fdf0157bfcbca614374e8472ab9fe8a3b4f58..0090bc2452288f7af0b18fa7ce675cd252f4e423 100644
--- a/src/Examples/flow-sw/EnergyBoundaryConditionBoiler.h
+++ b/src/Examples/flow-sw/EnergyBoundaryConditionBoiler.h
@@ -113,7 +113,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshInd
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -252,7 +251,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -532,7 +530,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-sw/EnergyBoundaryConditionCavity.h b/src/Examples/flow-sw/EnergyBoundaryConditionCavity.h
index 3b49cd56e5ab6901716c86115561c26fbbbff973..0730c9ee227b0ed57ad6f389c3f957db0b8d0872 100644
--- a/src/Examples/flow-sw/EnergyBoundaryConditionCavity.h
+++ b/src/Examples/flow-sw/EnergyBoundaryConditionCavity.h
@@ -113,7 +113,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshInd
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -252,7 +251,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -476,7 +474,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-sw/LaxFridrichsContinuity.h b/src/Examples/flow-sw/LaxFridrichsContinuity.h
index 82747cd18220efc01bc2d68e0247c01723c29fd0..4195913b656162a58fec8688adbc80ff8107cb88 100644
--- a/src/Examples/flow-sw/LaxFridrichsContinuity.h
+++ b/src/Examples/flow-sw/LaxFridrichsContinuity.h
@@ -38,14 +38,6 @@ class LaxFridrichsContinuityBase
 
       LaxFridrichsContinuityBase()
        : artificialViscosity( 1.0 ){};
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
 
       void setTau(const Real& tau)
       {
diff --git a/src/Examples/flow-sw/LaxFridrichsEnergy.h b/src/Examples/flow-sw/LaxFridrichsEnergy.h
index 03019ed23c85f82ee489c95d8173c0f100cff3c8..df7828be3132d0431b3e2bae230ac090f126a6d4 100644
--- a/src/Examples/flow-sw/LaxFridrichsEnergy.h
+++ b/src/Examples/flow-sw/LaxFridrichsEnergy.h
@@ -36,14 +36,6 @@ class LaxFridrichsEnergyBase
       LaxFridrichsEnergyBase()
        : artificialViscosity( 1.0 ){};
 
-      static String getType()
-      {
-         return String( "LaxFridrichsEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/flow-sw/LaxFridrichsMomentumX.h b/src/Examples/flow-sw/LaxFridrichsMomentumX.h
index 63def12d315188b82e82402635fca863d1b9a629..b1877a2c4bf5837e08e099b0b3ba30b7b502be4d 100644
--- a/src/Examples/flow-sw/LaxFridrichsMomentumX.h
+++ b/src/Examples/flow-sw/LaxFridrichsMomentumX.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -119,14 +110,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -200,14 +183,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-sw/LaxFridrichsMomentumY.h b/src/Examples/flow-sw/LaxFridrichsMomentumY.h
index 8ce42282dd4c74d5ed72d2abbd661235b95dc160..b4fe7569105bb72f53be3279323269fb841512b9 100644
--- a/src/Examples/flow-sw/LaxFridrichsMomentumY.h
+++ b/src/Examples/flow-sw/LaxFridrichsMomentumY.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -189,14 +172,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-sw/LaxFridrichsMomentumZ.h b/src/Examples/flow-sw/LaxFridrichsMomentumZ.h
index a67e862ceffd78d4fd770d7b1a07e9f05af349d8..fe853959515c01e9b922cf866fb0b71388326175 100644
--- a/src/Examples/flow-sw/LaxFridrichsMomentumZ.h
+++ b/src/Examples/flow-sw/LaxFridrichsMomentumZ.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -169,14 +152,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-sw/MomentumXBoundaryConditionBoiler.h b/src/Examples/flow-sw/MomentumXBoundaryConditionBoiler.h
index dfe63e07623a6fdb6ca7ebb7da8ab445d9505372..6a921539c13c4b0958f01ffab8b357435c43c969 100644
--- a/src/Examples/flow-sw/MomentumXBoundaryConditionBoiler.h
+++ b/src/Examples/flow-sw/MomentumXBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -418,7 +416,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-sw/MomentumXBoundaryConditionCavity.h b/src/Examples/flow-sw/MomentumXBoundaryConditionCavity.h
index 07abfdbeb940039555ac2799d0ef374ca26faff0..f27dda7f46c5766a848c94664e652b002c8dbe51 100644
--- a/src/Examples/flow-sw/MomentumXBoundaryConditionCavity.h
+++ b/src/Examples/flow-sw/MomentumXBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -407,7 +405,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-sw/MomentumYBoundaryConditionBoiler.h b/src/Examples/flow-sw/MomentumYBoundaryConditionBoiler.h
index 83b6282ddd50033f32a382f0b48f5abe7347ccaa..2a8e06f2f611d11f545665b2a53f6b8f7a3f3cae 100644
--- a/src/Examples/flow-sw/MomentumYBoundaryConditionBoiler.h
+++ b/src/Examples/flow-sw/MomentumYBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -413,7 +411,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-sw/MomentumYBoundaryConditionCavity.h b/src/Examples/flow-sw/MomentumYBoundaryConditionCavity.h
index a83dd653f92328814b8d0746bc45c8775552a310..35c01409cb3188d02ee617a313267fbdd88b85ac 100644
--- a/src/Examples/flow-sw/MomentumYBoundaryConditionCavity.h
+++ b/src/Examples/flow-sw/MomentumYBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -404,7 +402,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-sw/MomentumZBoundaryConditionBoiler.h b/src/Examples/flow-sw/MomentumZBoundaryConditionBoiler.h
index 9d887857ce97f916dcfaccd0208138afc200afd1..cf790d77d0e2090cce60dc3309094ca5122b0318 100644
--- a/src/Examples/flow-sw/MomentumZBoundaryConditionBoiler.h
+++ b/src/Examples/flow-sw/MomentumZBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -394,7 +392,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-sw/MomentumZBoundaryConditionCavity.h b/src/Examples/flow-sw/MomentumZBoundaryConditionCavity.h
index 5fe6f22e5945513c9e9e86d835256ef84e27c054..a771ab84f84d2c5cf22b3d57d7ee9f2a91ef82bf 100644
--- a/src/Examples/flow-sw/MomentumZBoundaryConditionCavity.h
+++ b/src/Examples/flow-sw/MomentumZBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -394,7 +392,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-sw/UpwindContinuity.h b/src/Examples/flow-sw/UpwindContinuity.h
index fc599d3d9773c39752b72abcee6626150ea70c7a..d016cff6b5324de887e8eb80750343a357c69d6b 100644
--- a/src/Examples/flow-sw/UpwindContinuity.h
+++ b/src/Examples/flow-sw/UpwindContinuity.h
@@ -37,14 +37,6 @@ class UpwindContinuityBase
       typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
 
-      static String getType()
-      {
-         return String( "UpwindContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/flow-sw/UpwindEnergy.h b/src/Examples/flow-sw/UpwindEnergy.h
index 6c7e94ec87ce2e3fbd96e6affaeb91b1242d9246..8023631ba014e552d4a8353c1e9d44e27973c382 100644
--- a/src/Examples/flow-sw/UpwindEnergy.h
+++ b/src/Examples/flow-sw/UpwindEnergy.h
@@ -36,14 +36,6 @@ class UpwindEnergyBase
       UpwindEnergyBase()
        : artificialViscosity( 1.0 ){};
 
-      static String getType()
-      {
-         return String( "UpwindEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/flow-sw/UpwindMomentumX.h b/src/Examples/flow-sw/UpwindMomentumX.h
index edd3756208121de465185a84693a10671e587bac..939e4f555587adb7b5e5e79b81c95e7601e393de 100644
--- a/src/Examples/flow-sw/UpwindMomentumX.h
+++ b/src/Examples/flow-sw/UpwindMomentumX.h
@@ -46,15 +46,6 @@ class UpwindMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -136,14 +127,6 @@ class UpwindMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -268,14 +251,6 @@ class UpwindMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-sw/UpwindMomentumY.h b/src/Examples/flow-sw/UpwindMomentumY.h
index 4b5a7bcb26d049c2773790857d3f79246488b55b..7a4d3d050654d4db86a00e1e00e2e882088285fe 100644
--- a/src/Examples/flow-sw/UpwindMomentumY.h
+++ b/src/Examples/flow-sw/UpwindMomentumY.h
@@ -46,15 +46,6 @@ class UpwindMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class UpwindMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -238,14 +221,6 @@ class UpwindMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-sw/UpwindMomentumZ.h b/src/Examples/flow-sw/UpwindMomentumZ.h
index 887eec977517e5850db2085835d8242d63605c96..c425887578dfd313d18ed8567d86044ef4f568dc 100644
--- a/src/Examples/flow-sw/UpwindMomentumZ.h
+++ b/src/Examples/flow-sw/UpwindMomentumZ.h
@@ -46,15 +46,6 @@ class UpwindMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class UpwindMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -169,14 +152,6 @@ class UpwindMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-sw/navierStokesProblem.h b/src/Examples/flow-sw/navierStokesProblem.h
index 0252a5c46dc392566cde6ad8d454d341d8331b05..0e79d19df771363816f60297d75e5c15da8938af 100644
--- a/src/Examples/flow-sw/navierStokesProblem.h
+++ b/src/Examples/flow-sw/navierStokesProblem.h
@@ -55,8 +55,6 @@ class navierStokesProblem:
       typedef Pointers::SharedPointer< BoundaryCondition > BoundaryConditionPointer;
       typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
       using CommunicatorType = Communicator;
-      
-      static String getTypeStatic();
 
       String getPrologHeader() const;
 
diff --git a/src/Examples/flow-sw/navierStokesProblem_impl.h b/src/Examples/flow-sw/navierStokesProblem_impl.h
index 886c9f03f4e981cd9533d72ba5f71809388c6438..96bdb48279af245451dd7c140c1675b71df87b05 100644
--- a/src/Examples/flow-sw/navierStokesProblem_impl.h
+++ b/src/Examples/flow-sw/navierStokesProblem_impl.h
@@ -30,18 +30,6 @@
 
 namespace TNL {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename InviscidOperators,
-          typename Communicator >
-String
-navierStokesProblem< Mesh, BoundaryCondition, RightHandSide, InviscidOperators, Communicator >::
-getTypeStatic()
-{
-   return String( "navierStokesProblem< " ) + Mesh :: getTypeStatic() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/Examples/flow-vl/DensityBoundaryConditionBoiler.h b/src/Examples/flow-vl/DensityBoundaryConditionBoiler.h
index 6231f6780e24e3090c83832c9d47534a9c6104a8..e02f1b1e403c25802c9e8792daa972bec1faf071 100644
--- a/src/Examples/flow-vl/DensityBoundaryConditionBoiler.h
+++ b/src/Examples/flow-vl/DensityBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshIn
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -234,7 +233,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -384,7 +382,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;   
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
diff --git a/src/Examples/flow-vl/DensityBoundaryConditionCavity.h b/src/Examples/flow-vl/DensityBoundaryConditionCavity.h
index 18eaff1101eccc7733eb5978b48807be82bba916..008a68bef0b7a81519f95c184163b6a8f1752a07 100644
--- a/src/Examples/flow-vl/DensityBoundaryConditionCavity.h
+++ b/src/Examples/flow-vl/DensityBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshIn
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef DensityBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -234,7 +233,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -381,7 +379,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsCavityBase< Function > BaseType;   
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
diff --git a/src/Examples/flow-vl/EnergyBoundaryConditionBoiler.h b/src/Examples/flow-vl/EnergyBoundaryConditionBoiler.h
index a99fdf0157bfcbca614374e8472ab9fe8a3b4f58..0090bc2452288f7af0b18fa7ce675cd252f4e423 100644
--- a/src/Examples/flow-vl/EnergyBoundaryConditionBoiler.h
+++ b/src/Examples/flow-vl/EnergyBoundaryConditionBoiler.h
@@ -113,7 +113,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshInd
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -252,7 +251,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -532,7 +530,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-vl/EnergyBoundaryConditionCavity.h b/src/Examples/flow-vl/EnergyBoundaryConditionCavity.h
index 3b49cd56e5ab6901716c86115561c26fbbbff973..0730c9ee227b0ed57ad6f389c3f957db0b8d0872 100644
--- a/src/Examples/flow-vl/EnergyBoundaryConditionCavity.h
+++ b/src/Examples/flow-vl/EnergyBoundaryConditionCavity.h
@@ -113,7 +113,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshInd
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -252,7 +251,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -476,7 +474,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-vl/LaxFridrichsContinuity.h b/src/Examples/flow-vl/LaxFridrichsContinuity.h
index 82747cd18220efc01bc2d68e0247c01723c29fd0..4195913b656162a58fec8688adbc80ff8107cb88 100644
--- a/src/Examples/flow-vl/LaxFridrichsContinuity.h
+++ b/src/Examples/flow-vl/LaxFridrichsContinuity.h
@@ -38,14 +38,6 @@ class LaxFridrichsContinuityBase
 
       LaxFridrichsContinuityBase()
        : artificialViscosity( 1.0 ){};
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
 
       void setTau(const Real& tau)
       {
diff --git a/src/Examples/flow-vl/LaxFridrichsEnergy.h b/src/Examples/flow-vl/LaxFridrichsEnergy.h
index 03019ed23c85f82ee489c95d8173c0f100cff3c8..df7828be3132d0431b3e2bae230ac090f126a6d4 100644
--- a/src/Examples/flow-vl/LaxFridrichsEnergy.h
+++ b/src/Examples/flow-vl/LaxFridrichsEnergy.h
@@ -36,14 +36,6 @@ class LaxFridrichsEnergyBase
       LaxFridrichsEnergyBase()
        : artificialViscosity( 1.0 ){};
 
-      static String getType()
-      {
-         return String( "LaxFridrichsEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/flow-vl/LaxFridrichsMomentumX.h b/src/Examples/flow-vl/LaxFridrichsMomentumX.h
index 63def12d315188b82e82402635fca863d1b9a629..b1877a2c4bf5837e08e099b0b3ba30b7b502be4d 100644
--- a/src/Examples/flow-vl/LaxFridrichsMomentumX.h
+++ b/src/Examples/flow-vl/LaxFridrichsMomentumX.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -119,14 +110,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -200,14 +183,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-vl/LaxFridrichsMomentumY.h b/src/Examples/flow-vl/LaxFridrichsMomentumY.h
index 8ce42282dd4c74d5ed72d2abbd661235b95dc160..b4fe7569105bb72f53be3279323269fb841512b9 100644
--- a/src/Examples/flow-vl/LaxFridrichsMomentumY.h
+++ b/src/Examples/flow-vl/LaxFridrichsMomentumY.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -189,14 +172,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-vl/LaxFridrichsMomentumZ.h b/src/Examples/flow-vl/LaxFridrichsMomentumZ.h
index a67e862ceffd78d4fd770d7b1a07e9f05af349d8..fe853959515c01e9b922cf866fb0b71388326175 100644
--- a/src/Examples/flow-vl/LaxFridrichsMomentumZ.h
+++ b/src/Examples/flow-vl/LaxFridrichsMomentumZ.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -169,14 +152,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-vl/MomentumXBoundaryConditionBoiler.h b/src/Examples/flow-vl/MomentumXBoundaryConditionBoiler.h
index dfe63e07623a6fdb6ca7ebb7da8ab445d9505372..6a921539c13c4b0958f01ffab8b357435c43c969 100644
--- a/src/Examples/flow-vl/MomentumXBoundaryConditionBoiler.h
+++ b/src/Examples/flow-vl/MomentumXBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -418,7 +416,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-vl/MomentumXBoundaryConditionCavity.h b/src/Examples/flow-vl/MomentumXBoundaryConditionCavity.h
index 07abfdbeb940039555ac2799d0ef374ca26faff0..f27dda7f46c5766a848c94664e652b002c8dbe51 100644
--- a/src/Examples/flow-vl/MomentumXBoundaryConditionCavity.h
+++ b/src/Examples/flow-vl/MomentumXBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -407,7 +405,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-vl/MomentumYBoundaryConditionBoiler.h b/src/Examples/flow-vl/MomentumYBoundaryConditionBoiler.h
index 83b6282ddd50033f32a382f0b48f5abe7347ccaa..2a8e06f2f611d11f545665b2a53f6b8f7a3f3cae 100644
--- a/src/Examples/flow-vl/MomentumYBoundaryConditionBoiler.h
+++ b/src/Examples/flow-vl/MomentumYBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -413,7 +411,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-vl/MomentumYBoundaryConditionCavity.h b/src/Examples/flow-vl/MomentumYBoundaryConditionCavity.h
index a83dd653f92328814b8d0746bc45c8775552a310..35c01409cb3188d02ee617a313267fbdd88b85ac 100644
--- a/src/Examples/flow-vl/MomentumYBoundaryConditionCavity.h
+++ b/src/Examples/flow-vl/MomentumYBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -404,7 +402,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-vl/MomentumZBoundaryConditionBoiler.h b/src/Examples/flow-vl/MomentumZBoundaryConditionBoiler.h
index 9d887857ce97f916dcfaccd0208138afc200afd1..cf790d77d0e2090cce60dc3309094ca5122b0318 100644
--- a/src/Examples/flow-vl/MomentumZBoundaryConditionBoiler.h
+++ b/src/Examples/flow-vl/MomentumZBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -394,7 +392,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-vl/MomentumZBoundaryConditionCavity.h b/src/Examples/flow-vl/MomentumZBoundaryConditionCavity.h
index 5fe6f22e5945513c9e9e86d835256ef84e27c054..a771ab84f84d2c5cf22b3d57d7ee9f2a91ef82bf 100644
--- a/src/Examples/flow-vl/MomentumZBoundaryConditionCavity.h
+++ b/src/Examples/flow-vl/MomentumZBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -394,7 +392,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow-vl/UpwindContinuity.h b/src/Examples/flow-vl/UpwindContinuity.h
index 20bae4fbb49fe4d1510f95f0ef4c2404873903f7..fff04e9bb250bffb9c4021e801bb506486e038b5 100644
--- a/src/Examples/flow-vl/UpwindContinuity.h
+++ b/src/Examples/flow-vl/UpwindContinuity.h
@@ -37,14 +37,6 @@ class UpwindContinuityBase
       typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
 
-      static String getType()
-      {
-         return String( "UpwindContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/flow-vl/UpwindEnergy.h b/src/Examples/flow-vl/UpwindEnergy.h
index 8fa7a046a24d37c832746d6f17396e39582f7f5e..b4570e60829f5fa02f8d059123297f1a38ad7f53 100644
--- a/src/Examples/flow-vl/UpwindEnergy.h
+++ b/src/Examples/flow-vl/UpwindEnergy.h
@@ -36,14 +36,6 @@ class UpwindEnergyBase
       UpwindEnergyBase()
        : artificialViscosity( 1.0 ){};
 
-      static String getType()
-      {
-         return String( "UpwindEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/flow-vl/UpwindMomentumX.h b/src/Examples/flow-vl/UpwindMomentumX.h
index edd3756208121de465185a84693a10671e587bac..939e4f555587adb7b5e5e79b81c95e7601e393de 100644
--- a/src/Examples/flow-vl/UpwindMomentumX.h
+++ b/src/Examples/flow-vl/UpwindMomentumX.h
@@ -46,15 +46,6 @@ class UpwindMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -136,14 +127,6 @@ class UpwindMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -268,14 +251,6 @@ class UpwindMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-vl/UpwindMomentumY.h b/src/Examples/flow-vl/UpwindMomentumY.h
index 4b5a7bcb26d049c2773790857d3f79246488b55b..7a4d3d050654d4db86a00e1e00e2e882088285fe 100644
--- a/src/Examples/flow-vl/UpwindMomentumY.h
+++ b/src/Examples/flow-vl/UpwindMomentumY.h
@@ -46,15 +46,6 @@ class UpwindMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class UpwindMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -238,14 +221,6 @@ class UpwindMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-vl/UpwindMomentumZ.h b/src/Examples/flow-vl/UpwindMomentumZ.h
index 887eec977517e5850db2085835d8242d63605c96..c425887578dfd313d18ed8567d86044ef4f568dc 100644
--- a/src/Examples/flow-vl/UpwindMomentumZ.h
+++ b/src/Examples/flow-vl/UpwindMomentumZ.h
@@ -46,15 +46,6 @@ class UpwindMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class UpwindMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -169,14 +152,6 @@ class UpwindMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow-vl/navierStokesProblem.h b/src/Examples/flow-vl/navierStokesProblem.h
index 51cc5f014f40fb51f45782c5332baf868487fa2e..dbac46e749a0eda0e34a926ba5d1af3a964e820c 100644
--- a/src/Examples/flow-vl/navierStokesProblem.h
+++ b/src/Examples/flow-vl/navierStokesProblem.h
@@ -57,8 +57,6 @@ class navierStokesProblem:
       typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
       using CommunicatorType = Communicator;
 
-      static String getTypeStatic();
-
       String getPrologHeader() const;
 
       void writeProlog( Logger& logger,
diff --git a/src/Examples/flow-vl/navierStokesProblem_impl.h b/src/Examples/flow-vl/navierStokesProblem_impl.h
index 886c9f03f4e981cd9533d72ba5f71809388c6438..96bdb48279af245451dd7c140c1675b71df87b05 100644
--- a/src/Examples/flow-vl/navierStokesProblem_impl.h
+++ b/src/Examples/flow-vl/navierStokesProblem_impl.h
@@ -30,18 +30,6 @@
 
 namespace TNL {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename InviscidOperators,
-          typename Communicator >
-String
-navierStokesProblem< Mesh, BoundaryCondition, RightHandSide, InviscidOperators, Communicator >::
-getTypeStatic()
-{
-   return String( "navierStokesProblem< " ) + Mesh :: getTypeStatic() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/Examples/flow/DensityBoundaryConditionBoiler.h b/src/Examples/flow/DensityBoundaryConditionBoiler.h
index 6231f6780e24e3090c83832c9d47534a9c6104a8..e02f1b1e403c25802c9e8792daa972bec1faf071 100644
--- a/src/Examples/flow/DensityBoundaryConditionBoiler.h
+++ b/src/Examples/flow/DensityBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshIn
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -234,7 +233,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -384,7 +382,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;   
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
diff --git a/src/Examples/flow/DensityBoundaryConditionCavity.h b/src/Examples/flow/DensityBoundaryConditionCavity.h
index c753d324a288a331dbfb9d73e341fe962f39889e..7611f682cd9f14e05c6c37f5e4b10a3743444373 100644
--- a/src/Examples/flow/DensityBoundaryConditionCavity.h
+++ b/src/Examples/flow/DensityBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshIn
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef DensityBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -234,7 +233,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -381,7 +379,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshIn
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsCavityBase< Function > BaseType;   
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
diff --git a/src/Examples/flow/EnergyBoundaryConditionBoiler.h b/src/Examples/flow/EnergyBoundaryConditionBoiler.h
index a99fdf0157bfcbca614374e8472ab9fe8a3b4f58..0090bc2452288f7af0b18fa7ce675cd252f4e423 100644
--- a/src/Examples/flow/EnergyBoundaryConditionBoiler.h
+++ b/src/Examples/flow/EnergyBoundaryConditionBoiler.h
@@ -113,7 +113,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshInd
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -252,7 +251,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -532,7 +530,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow/EnergyBoundaryConditionCavity.h b/src/Examples/flow/EnergyBoundaryConditionCavity.h
index 60e55f4240ed1fcb8bc63e494c01faf61f899568..0ba8c80aae9c95ce3fef4cd3fb13a16fe15b7774 100644
--- a/src/Examples/flow/EnergyBoundaryConditionCavity.h
+++ b/src/Examples/flow/EnergyBoundaryConditionCavity.h
@@ -113,7 +113,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshInd
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -252,7 +251,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -476,7 +474,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshInd
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow/LaxFridrichsContinuity.h b/src/Examples/flow/LaxFridrichsContinuity.h
index bf3cc45ece7877291a53cb460dd874fa77bbd250..8a9d22c6a3aaf9d38713f0e0f91d3a859476e9c1 100644
--- a/src/Examples/flow/LaxFridrichsContinuity.h
+++ b/src/Examples/flow/LaxFridrichsContinuity.h
@@ -38,14 +38,6 @@ class LaxFridrichsContinuityBase
 
       LaxFridrichsContinuityBase()
        : artificialViscosity( 1.0 ){};
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
 
       void setTau(const Real& tau)
       {
diff --git a/src/Examples/flow/LaxFridrichsContinuityEuler.h b/src/Examples/flow/LaxFridrichsContinuityEuler.h
index f444a4e2541e76addb5c7a3eba87cf1d946ee4fa..ce175d8071028227427e603190862768394a96fa 100644
--- a/src/Examples/flow/LaxFridrichsContinuityEuler.h
+++ b/src/Examples/flow/LaxFridrichsContinuityEuler.h
@@ -38,14 +38,6 @@ class LaxFridrichsContinuityBase
 
       LaxFridrichsContinuityBase()
        : artificialViscosity( 1.0 ){};
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
 
       void setTau(const Real& tau)
       {
diff --git a/src/Examples/flow/LaxFridrichsEnergy.h b/src/Examples/flow/LaxFridrichsEnergy.h
index dd940243d7fbaa59ae66d013451cd24c2def8488..630a985fe4c2b2b3637ef8ece3579e13ef25bd2e 100644
--- a/src/Examples/flow/LaxFridrichsEnergy.h
+++ b/src/Examples/flow/LaxFridrichsEnergy.h
@@ -36,14 +36,6 @@ class LaxFridrichsEnergyBase
       LaxFridrichsEnergyBase()
        : artificialViscosity( 1.0 ){};
 
-      static String getType()
-      {
-         return String( "LaxFridrichsEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/flow/LaxFridrichsEnergyEuler.h b/src/Examples/flow/LaxFridrichsEnergyEuler.h
index 30180639d18c4d36b688eb60d597a571e5647115..37cd793a7be8fa94a13d2979a390470c779940c2 100644
--- a/src/Examples/flow/LaxFridrichsEnergyEuler.h
+++ b/src/Examples/flow/LaxFridrichsEnergyEuler.h
@@ -36,14 +36,6 @@ class LaxFridrichsEnergyBase
       LaxFridrichsEnergyBase()
        : artificialViscosity( 1.0 ){};
 
-      static String getType()
-      {
-         return String( "LaxFridrichsEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/flow/LaxFridrichsMomentumX.h b/src/Examples/flow/LaxFridrichsMomentumX.h
index 3e295c029f9bc4ae61dfc54650be4c4aff55cf18..8fe02be4033c9bd259d22133bff7175a653cd878 100644
--- a/src/Examples/flow/LaxFridrichsMomentumX.h
+++ b/src/Examples/flow/LaxFridrichsMomentumX.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -128,14 +119,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -242,14 +225,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow/LaxFridrichsMomentumXEuler.h b/src/Examples/flow/LaxFridrichsMomentumXEuler.h
index 63def12d315188b82e82402635fca863d1b9a629..b1877a2c4bf5837e08e099b0b3ba30b7b502be4d 100644
--- a/src/Examples/flow/LaxFridrichsMomentumXEuler.h
+++ b/src/Examples/flow/LaxFridrichsMomentumXEuler.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -119,14 +110,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -200,14 +183,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow/LaxFridrichsMomentumY.h b/src/Examples/flow/LaxFridrichsMomentumY.h
index 0df12c5227981b42b64437a4be96a511cf1b5991..61c3e09dc6a12f91241aa6308c61918c8cce5900 100644
--- a/src/Examples/flow/LaxFridrichsMomentumY.h
+++ b/src/Examples/flow/LaxFridrichsMomentumY.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -222,14 +205,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow/LaxFridrichsMomentumYEuler.h b/src/Examples/flow/LaxFridrichsMomentumYEuler.h
index 8ce42282dd4c74d5ed72d2abbd661235b95dc160..b4fe7569105bb72f53be3279323269fb841512b9 100644
--- a/src/Examples/flow/LaxFridrichsMomentumYEuler.h
+++ b/src/Examples/flow/LaxFridrichsMomentumYEuler.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -189,14 +172,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow/LaxFridrichsMomentumZ.h b/src/Examples/flow/LaxFridrichsMomentumZ.h
index e4f8501ec1f3c44f1a39fb2a5aa85de5209f9635..37056b5ae2e293781ba68270cac559fad8f4b06b 100644
--- a/src/Examples/flow/LaxFridrichsMomentumZ.h
+++ b/src/Examples/flow/LaxFridrichsMomentumZ.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -169,14 +152,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow/LaxFridrichsMomentumZEuler.h b/src/Examples/flow/LaxFridrichsMomentumZEuler.h
index a67e862ceffd78d4fd770d7b1a07e9f05af349d8..fe853959515c01e9b922cf866fb0b71388326175 100644
--- a/src/Examples/flow/LaxFridrichsMomentumZEuler.h
+++ b/src/Examples/flow/LaxFridrichsMomentumZEuler.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -169,14 +152,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/flow/MomentumXBoundaryConditionBoiler.h b/src/Examples/flow/MomentumXBoundaryConditionBoiler.h
index dfe63e07623a6fdb6ca7ebb7da8ab445d9505372..6a921539c13c4b0958f01ffab8b357435c43c969 100644
--- a/src/Examples/flow/MomentumXBoundaryConditionBoiler.h
+++ b/src/Examples/flow/MomentumXBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -418,7 +416,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow/MomentumXBoundaryConditionCavity.h b/src/Examples/flow/MomentumXBoundaryConditionCavity.h
index 07abfdbeb940039555ac2799d0ef374ca26faff0..f27dda7f46c5766a848c94664e652b002c8dbe51 100644
--- a/src/Examples/flow/MomentumXBoundaryConditionCavity.h
+++ b/src/Examples/flow/MomentumXBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -407,7 +405,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow/MomentumYBoundaryConditionBoiler.h b/src/Examples/flow/MomentumYBoundaryConditionBoiler.h
index 83b6282ddd50033f32a382f0b48f5abe7347ccaa..2a8e06f2f611d11f545665b2a53f6b8f7a3f3cae 100644
--- a/src/Examples/flow/MomentumYBoundaryConditionBoiler.h
+++ b/src/Examples/flow/MomentumYBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -413,7 +411,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow/MomentumYBoundaryConditionCavity.h b/src/Examples/flow/MomentumYBoundaryConditionCavity.h
index a83dd653f92328814b8d0746bc45c8775552a310..35c01409cb3188d02ee617a313267fbdd88b85ac 100644
--- a/src/Examples/flow/MomentumYBoundaryConditionCavity.h
+++ b/src/Examples/flow/MomentumYBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -404,7 +402,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow/MomentumZBoundaryConditionBoiler.h b/src/Examples/flow/MomentumZBoundaryConditionBoiler.h
index 9d887857ce97f916dcfaccd0208138afc200afd1..cf790d77d0e2090cce60dc3309094ca5122b0318 100644
--- a/src/Examples/flow/MomentumZBoundaryConditionBoiler.h
+++ b/src/Examples/flow/MomentumZBoundaryConditionBoiler.h
@@ -111,7 +111,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -394,7 +392,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow/MomentumZBoundaryConditionCavity.h b/src/Examples/flow/MomentumZBoundaryConditionCavity.h
index 5fe6f22e5945513c9e9e86d835256ef84e27c054..a771ab84f84d2c5cf22b3d57d7ee9f2a91ef82bf 100644
--- a/src/Examples/flow/MomentumZBoundaryConditionCavity.h
+++ b/src/Examples/flow/MomentumZBoundaryConditionCavity.h
@@ -111,7 +111,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
    typedef Containers::StaticVector< 1, RealType > PointType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
    typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -242,7 +241,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 2, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
@@ -394,7 +392,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef Containers::StaticVector< 3, RealType > PointType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
       typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
diff --git a/src/Examples/flow/navierStokesProblem.h b/src/Examples/flow/navierStokesProblem.h
index f42c2ed09908b4c73cfab906f9fcecfe5101201d..71e8243cdac7084aab484f81be18acbc33baad0b 100644
--- a/src/Examples/flow/navierStokesProblem.h
+++ b/src/Examples/flow/navierStokesProblem.h
@@ -57,8 +57,6 @@ class navierStokesProblem:
       typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
       using CommunicatorType = Communicator;
 
-      static String getTypeStatic();
-
       String getPrologHeader() const;
 
       void writeProlog( Logger& logger,
diff --git a/src/Examples/flow/navierStokesProblem_impl.h b/src/Examples/flow/navierStokesProblem_impl.h
index 4b0c7977441e87cab05fccab2c3984705670cfd4..c2c84e7a6da2ad965c3c9afd454560d56e12dfa4 100644
--- a/src/Examples/flow/navierStokesProblem_impl.h
+++ b/src/Examples/flow/navierStokesProblem_impl.h
@@ -42,18 +42,6 @@
 */
 namespace TNL {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename InviscidOperators,
-          typename Communicator >
-String
-navierStokesProblem< Mesh, BoundaryCondition, RightHandSide, InviscidOperators, Communicator >::
-getTypeStatic()
-{
-   return String( "navierStokesProblem< " ) + Mesh :: getTypeStatic() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/Examples/heat-equation/CMakeLists.txt b/src/Examples/heat-equation/CMakeLists.txt
index c895199060481cd4e20e22c30a4a5b473faeb552..979c34076cd5588921cfaea29e10d4ef712f7a79 100644
--- a/src/Examples/heat-equation/CMakeLists.txt
+++ b/src/Examples/heat-equation/CMakeLists.txt
@@ -1,18 +1,17 @@
-set( tnl_heat_equation_SOURCES     
+set( tnl_heat_equation_SOURCES
      tnl-heat-equation.cpp
      tnl-heat-equation-eoc.cpp
      tnl-heat-equation.cu
      tnl-heat-equation-eoc.cu )
-               
+
 IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE(tnl-heat-equation tnl-heat-equation.cu)
    CUDA_ADD_EXECUTABLE(tnl-heat-equation-eoc-test tnl-heat-equation-eoc.cu)
    target_link_libraries (tnl-heat-equation ${CUSPARSE_LIBRARY} )
    target_link_libraries (tnl-heat-equation-eoc-test ${CUSPARSE_LIBRARY} )
-ELSE(  BUILD_CUDA )               
-   ADD_EXECUTABLE(tnl-heat-equation tnl-heat-equation.cpp)     
-   ADD_EXECUTABLE(tnl-heat-equation-eoc-test tnl-heat-equation-eoc.cpp)   
-   TARGET_COMPILE_DEFINITIONS( tnl-heat-equation PUBLIC ${MIC_CXX_FLAGS} )
+ELSE(  BUILD_CUDA )
+   ADD_EXECUTABLE(tnl-heat-equation tnl-heat-equation.cpp)
+   ADD_EXECUTABLE(tnl-heat-equation-eoc-test tnl-heat-equation-eoc.cpp)
 ENDIF( BUILD_CUDA )
 
 
@@ -20,7 +19,7 @@ INSTALL( TARGETS tnl-heat-equation
                  tnl-heat-equation-eoc-test
          RUNTIME DESTINATION bin
          PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE )
-        
+
 INSTALL( FILES tnl-run-heat-equation-eoc-test
                tnl-run-heat-equation
                ${tnl_heat_equation_SOURCES}
diff --git a/src/Examples/inviscid-flow-sw/LaxFridrichsContinuity.h b/src/Examples/inviscid-flow-sw/LaxFridrichsContinuity.h
index 82747cd18220efc01bc2d68e0247c01723c29fd0..4195913b656162a58fec8688adbc80ff8107cb88 100644
--- a/src/Examples/inviscid-flow-sw/LaxFridrichsContinuity.h
+++ b/src/Examples/inviscid-flow-sw/LaxFridrichsContinuity.h
@@ -38,14 +38,6 @@ class LaxFridrichsContinuityBase
 
       LaxFridrichsContinuityBase()
        : artificialViscosity( 1.0 ){};
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
 
       void setTau(const Real& tau)
       {
diff --git a/src/Examples/inviscid-flow-sw/LaxFridrichsEnergy.h b/src/Examples/inviscid-flow-sw/LaxFridrichsEnergy.h
index 03019ed23c85f82ee489c95d8173c0f100cff3c8..df7828be3132d0431b3e2bae230ac090f126a6d4 100644
--- a/src/Examples/inviscid-flow-sw/LaxFridrichsEnergy.h
+++ b/src/Examples/inviscid-flow-sw/LaxFridrichsEnergy.h
@@ -36,14 +36,6 @@ class LaxFridrichsEnergyBase
       LaxFridrichsEnergyBase()
        : artificialViscosity( 1.0 ){};
 
-      static String getType()
-      {
-         return String( "LaxFridrichsEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumX.h b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumX.h
index 63def12d315188b82e82402635fca863d1b9a629..b1877a2c4bf5837e08e099b0b3ba30b7b502be4d 100644
--- a/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumX.h
+++ b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumX.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -119,14 +110,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -200,14 +183,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumY.h b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumY.h
index 8ce42282dd4c74d5ed72d2abbd661235b95dc160..b4fe7569105bb72f53be3279323269fb841512b9 100644
--- a/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumY.h
+++ b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumY.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -189,14 +172,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumZ.h b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumZ.h
index a67e862ceffd78d4fd770d7b1a07e9f05af349d8..fe853959515c01e9b922cf866fb0b71388326175 100644
--- a/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumZ.h
+++ b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumZ.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -169,14 +152,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-sw/UpwindContinuity.h b/src/Examples/inviscid-flow-sw/UpwindContinuity.h
index 22fc4ffc5d97e2933d7db36ceac3f66d5a33b63f..6a763635cb2ec8d08c2bd8549b89356bb4bed8be 100644
--- a/src/Examples/inviscid-flow-sw/UpwindContinuity.h
+++ b/src/Examples/inviscid-flow-sw/UpwindContinuity.h
@@ -37,14 +37,6 @@ class UpwindContinuityBase
       typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
 
-      static String getType()
-      {
-         return String( "UpwindContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/inviscid-flow-sw/UpwindEnergy.h b/src/Examples/inviscid-flow-sw/UpwindEnergy.h
index 39f6090064075b0dd688e6105ffac94c14421cde..7472790dbd8d7d65618a724d68e0f82d5948815e 100644
--- a/src/Examples/inviscid-flow-sw/UpwindEnergy.h
+++ b/src/Examples/inviscid-flow-sw/UpwindEnergy.h
@@ -36,14 +36,6 @@ class UpwindEnergyBase
       UpwindEnergyBase()
        : artificialViscosity( 1.0 ){};
 
-      static String getType()
-      {
-         return String( "UpwindEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/inviscid-flow-sw/UpwindMomentumX.h b/src/Examples/inviscid-flow-sw/UpwindMomentumX.h
index ed49dda94585e64f85d820569a757d849757e6ca..1a887e7a51961c37897e9df73c4dbf2c9838156d 100644
--- a/src/Examples/inviscid-flow-sw/UpwindMomentumX.h
+++ b/src/Examples/inviscid-flow-sw/UpwindMomentumX.h
@@ -46,15 +46,6 @@ class UpwindMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -130,14 +121,6 @@ class UpwindMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -236,14 +219,6 @@ class UpwindMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-sw/UpwindMomentumY.h b/src/Examples/inviscid-flow-sw/UpwindMomentumY.h
index c2126d43af781289f86999a5f4a7f8d24ad5c6e8..2ab8ffe82aecfff8e97391a8ed2dcd8385741648 100644
--- a/src/Examples/inviscid-flow-sw/UpwindMomentumY.h
+++ b/src/Examples/inviscid-flow-sw/UpwindMomentumY.h
@@ -46,15 +46,6 @@ class UpwindMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class UpwindMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -212,14 +195,6 @@ class UpwindMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-sw/UpwindMomentumZ.h b/src/Examples/inviscid-flow-sw/UpwindMomentumZ.h
index 97339e804b3bda5203d0b12feeb59e30249f2327..fe8be0eb20cc14fd491cc92d4df44e6dc737acb4 100644
--- a/src/Examples/inviscid-flow-sw/UpwindMomentumZ.h
+++ b/src/Examples/inviscid-flow-sw/UpwindMomentumZ.h
@@ -46,15 +46,6 @@ class UpwindMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class UpwindMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -169,14 +152,6 @@ class UpwindMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-sw/eulerProblem.h b/src/Examples/inviscid-flow-sw/eulerProblem.h
index a91e56176dbe8e5b507a61a8d27aaa5050855693..5c10ab7fb11417d637c89988ef29fde31ecd97f5 100644
--- a/src/Examples/inviscid-flow-sw/eulerProblem.h
+++ b/src/Examples/inviscid-flow-sw/eulerProblem.h
@@ -57,8 +57,6 @@ class eulerProblem:
       typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
       using CommunicatorType = Communicator;
 
-      static String getTypeStatic();
-
       String getPrologHeader() const;
 
       void writeProlog( Logger& logger,
diff --git a/src/Examples/inviscid-flow-sw/eulerProblem_impl.h b/src/Examples/inviscid-flow-sw/eulerProblem_impl.h
index e0382e9c2485bbec5740df99af47b87a28122139..d4f119d4c962bc6a2047e6401347fd8cda16c552 100644
--- a/src/Examples/inviscid-flow-sw/eulerProblem_impl.h
+++ b/src/Examples/inviscid-flow-sw/eulerProblem_impl.h
@@ -30,18 +30,6 @@
 
 namespace TNL {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename InviscidOperators,
-          typename Communicator >
-String
-eulerProblem< Mesh, BoundaryCondition, RightHandSide, InviscidOperators, Communicator >::
-getTypeStatic()
-{
-   return String( "eulerProblem< " ) + Mesh :: getTypeStatic() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/Examples/inviscid-flow-vl/LaxFridrichsContinuity.h b/src/Examples/inviscid-flow-vl/LaxFridrichsContinuity.h
index 82747cd18220efc01bc2d68e0247c01723c29fd0..4195913b656162a58fec8688adbc80ff8107cb88 100644
--- a/src/Examples/inviscid-flow-vl/LaxFridrichsContinuity.h
+++ b/src/Examples/inviscid-flow-vl/LaxFridrichsContinuity.h
@@ -38,14 +38,6 @@ class LaxFridrichsContinuityBase
 
       LaxFridrichsContinuityBase()
        : artificialViscosity( 1.0 ){};
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
 
       void setTau(const Real& tau)
       {
diff --git a/src/Examples/inviscid-flow-vl/LaxFridrichsEnergy.h b/src/Examples/inviscid-flow-vl/LaxFridrichsEnergy.h
index 03019ed23c85f82ee489c95d8173c0f100cff3c8..df7828be3132d0431b3e2bae230ac090f126a6d4 100644
--- a/src/Examples/inviscid-flow-vl/LaxFridrichsEnergy.h
+++ b/src/Examples/inviscid-flow-vl/LaxFridrichsEnergy.h
@@ -36,14 +36,6 @@ class LaxFridrichsEnergyBase
       LaxFridrichsEnergyBase()
        : artificialViscosity( 1.0 ){};
 
-      static String getType()
-      {
-         return String( "LaxFridrichsEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumX.h b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumX.h
index 63def12d315188b82e82402635fca863d1b9a629..b1877a2c4bf5837e08e099b0b3ba30b7b502be4d 100644
--- a/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumX.h
+++ b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumX.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -119,14 +110,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -200,14 +183,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumY.h b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumY.h
index 8ce42282dd4c74d5ed72d2abbd661235b95dc160..b4fe7569105bb72f53be3279323269fb841512b9 100644
--- a/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumY.h
+++ b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumY.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -189,14 +172,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumZ.h b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumZ.h
index a67e862ceffd78d4fd770d7b1a07e9f05af349d8..fe853959515c01e9b922cf866fb0b71388326175 100644
--- a/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumZ.h
+++ b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumZ.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -169,14 +152,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-vl/UpwindContinuity.h b/src/Examples/inviscid-flow-vl/UpwindContinuity.h
index 4a21cd502b3f54f898a25ef85be84dcf52a52f5d..3d60dfd9fc877e641cb10f0b7bc018c312254be3 100644
--- a/src/Examples/inviscid-flow-vl/UpwindContinuity.h
+++ b/src/Examples/inviscid-flow-vl/UpwindContinuity.h
@@ -37,14 +37,6 @@ class UpwindContinuityBase
       typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
 
-      static String getType()
-      {
-         return String( "UpwindContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/inviscid-flow-vl/UpwindEnergy.h b/src/Examples/inviscid-flow-vl/UpwindEnergy.h
index e3857cbcdf2e29110d01e3f6140a1f7da6b9f0b6..ce26148d7acf53571770dfa997f2fd251265ed9b 100644
--- a/src/Examples/inviscid-flow-vl/UpwindEnergy.h
+++ b/src/Examples/inviscid-flow-vl/UpwindEnergy.h
@@ -36,14 +36,6 @@ class UpwindEnergyBase
       UpwindEnergyBase()
        : artificialViscosity( 1.0 ){};
 
-      static String getType()
-      {
-         return String( "UpwindEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/inviscid-flow-vl/UpwindMomentumX.h b/src/Examples/inviscid-flow-vl/UpwindMomentumX.h
index ed49dda94585e64f85d820569a757d849757e6ca..cc7a01bc9662ea69b897de453083eade365a963c 100644
--- a/src/Examples/inviscid-flow-vl/UpwindMomentumX.h
+++ b/src/Examples/inviscid-flow-vl/UpwindMomentumX.h
@@ -47,15 +47,6 @@ class UpwindMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
       
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
-
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
       Real operator()( const MeshFunction& u,
@@ -130,14 +121,6 @@ class UpwindMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -236,14 +219,6 @@ class UpwindMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-vl/UpwindMomentumY.h b/src/Examples/inviscid-flow-vl/UpwindMomentumY.h
index c2126d43af781289f86999a5f4a7f8d24ad5c6e8..2ab8ffe82aecfff8e97391a8ed2dcd8385741648 100644
--- a/src/Examples/inviscid-flow-vl/UpwindMomentumY.h
+++ b/src/Examples/inviscid-flow-vl/UpwindMomentumY.h
@@ -46,15 +46,6 @@ class UpwindMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class UpwindMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -212,14 +195,6 @@ class UpwindMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-vl/UpwindMomentumZ.h b/src/Examples/inviscid-flow-vl/UpwindMomentumZ.h
index 97339e804b3bda5203d0b12feeb59e30249f2327..fe8be0eb20cc14fd491cc92d4df44e6dc737acb4 100644
--- a/src/Examples/inviscid-flow-vl/UpwindMomentumZ.h
+++ b/src/Examples/inviscid-flow-vl/UpwindMomentumZ.h
@@ -46,15 +46,6 @@ class UpwindMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class UpwindMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -169,14 +152,6 @@ class UpwindMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "UpwindMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow-vl/eulerProblem.h b/src/Examples/inviscid-flow-vl/eulerProblem.h
index a91e56176dbe8e5b507a61a8d27aaa5050855693..5c10ab7fb11417d637c89988ef29fde31ecd97f5 100644
--- a/src/Examples/inviscid-flow-vl/eulerProblem.h
+++ b/src/Examples/inviscid-flow-vl/eulerProblem.h
@@ -57,8 +57,6 @@ class eulerProblem:
       typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
       using CommunicatorType = Communicator;
 
-      static String getTypeStatic();
-
       String getPrologHeader() const;
 
       void writeProlog( Logger& logger,
diff --git a/src/Examples/inviscid-flow-vl/eulerProblem_impl.h b/src/Examples/inviscid-flow-vl/eulerProblem_impl.h
index e0382e9c2485bbec5740df99af47b87a28122139..d4f119d4c962bc6a2047e6401347fd8cda16c552 100644
--- a/src/Examples/inviscid-flow-vl/eulerProblem_impl.h
+++ b/src/Examples/inviscid-flow-vl/eulerProblem_impl.h
@@ -30,18 +30,6 @@
 
 namespace TNL {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename InviscidOperators,
-          typename Communicator >
-String
-eulerProblem< Mesh, BoundaryCondition, RightHandSide, InviscidOperators, Communicator >::
-getTypeStatic()
-{
-   return String( "eulerProblem< " ) + Mesh :: getTypeStatic() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/Examples/inviscid-flow/2d/Euler2DVelXGetter.h b/src/Examples/inviscid-flow/2d/Euler2DVelXGetter.h
index 2e79798a31a7073241903891d0317502c8494a60..f87c9103372e55dec8597695146e43e316ee91a8 100644
--- a/src/Examples/inviscid-flow/2d/Euler2DVelXGetter.h
+++ b/src/Examples/inviscid-flow/2d/Euler2DVelXGetter.h
@@ -21,8 +21,6 @@ class EulerVelXGetter
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
-      
       EulerVelXGetter( const MeshFunctionType& rho,
                        const MeshFunctionType& rhoVel)
       : rho( rho ), rhoVel( rhoVel )
diff --git a/src/Examples/inviscid-flow/3d/Euler2DVelXGetter.h b/src/Examples/inviscid-flow/3d/Euler2DVelXGetter.h
index 2e79798a31a7073241903891d0317502c8494a60..f87c9103372e55dec8597695146e43e316ee91a8 100644
--- a/src/Examples/inviscid-flow/3d/Euler2DVelXGetter.h
+++ b/src/Examples/inviscid-flow/3d/Euler2DVelXGetter.h
@@ -21,8 +21,6 @@ class EulerVelXGetter
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
-      
       EulerVelXGetter( const MeshFunctionType& rho,
                        const MeshFunctionType& rhoVel)
       : rho( rho ), rhoVel( rhoVel )
diff --git a/src/Examples/inviscid-flow/3d/EulerPressureGetter.h b/src/Examples/inviscid-flow/3d/EulerPressureGetter.h
index 45611c64754aa161274bdabe95cd0c60565ef2c1..5a39ca84dd28965165d4c6890acb5e24cc3cf9f2 100644
--- a/src/Examples/inviscid-flow/3d/EulerPressureGetter.h
+++ b/src/Examples/inviscid-flow/3d/EulerPressureGetter.h
@@ -22,8 +22,6 @@ class EulerPressureGetter
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
-      
       EulerPressureGetter( const MeshFunctionType& rho,
                            const MeshFunctionType& rhoVelX,
                            const MeshFunctionType& rhoVelY,
diff --git a/src/Examples/inviscid-flow/3d/EulerVelGetter.h b/src/Examples/inviscid-flow/3d/EulerVelGetter.h
index 24d06eaf5f0ce322c17086ea2cf04495c96bd3e7..82441fcaed012d17181a55c7c26a27457bbc7d58 100644
--- a/src/Examples/inviscid-flow/3d/EulerVelGetter.h
+++ b/src/Examples/inviscid-flow/3d/EulerVelGetter.h
@@ -21,8 +21,6 @@ class EulerVelGetter
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
-      
       EulerVelGetter( const MeshFunctionType& rho,
                       const MeshFunctionType& rhoVelX,
                       const MeshFunctionType& rhoVelY,
diff --git a/src/Examples/inviscid-flow/3d/LaxFridrichsContinuity_impl .h b/src/Examples/inviscid-flow/3d/LaxFridrichsContinuity_impl .h
index ac469a52407477aab2d8a60c3eca92e5d168a253..840fc26803197babe5f5b652f4cf1e7d2f9233ec 100644
--- a/src/Examples/inviscid-flow/3d/LaxFridrichsContinuity_impl .h	
+++ b/src/Examples/inviscid-flow/3d/LaxFridrichsContinuity_impl .h	
@@ -6,21 +6,6 @@ namespace TNL {
 /****
  * 1D problem
  */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-LaxFridrichsContinuity< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "LaxFridrichsContinuity< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -109,21 +94,6 @@ updateLinearSystem( const RealType& time,
 /****
  * 2D problem
  */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-LaxFridrichsContinuity< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "LaxFridrichsContinuity< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -224,21 +194,6 @@ updateLinearSystem( const RealType& time,
 /****
  * 3D problem
  */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-LaxFridrichsContinuity< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "LaxFridrichsContinuity< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
diff --git a/src/Examples/inviscid-flow/3d/LaxFridrichsEnergy.h b/src/Examples/inviscid-flow/3d/LaxFridrichsEnergy.h
index 9083970ae4bb036b2be46e8556dab2e4f8eb2607..9756f46c86fafd204cbe47da573f0aee5cae6a9a 100644
--- a/src/Examples/inviscid-flow/3d/LaxFridrichsEnergy.h
+++ b/src/Examples/inviscid-flow/3d/LaxFridrichsEnergy.h
@@ -29,7 +29,6 @@ class LaxFridrichsEnergy< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, Real, I
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
       Real tau;
       MeshFunctionType velocityX;
       MeshFunctionType velocityY;
@@ -101,7 +100,6 @@ class LaxFridrichsEnergy< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, Real, I
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
       Real tau;
       MeshFunctionType velocityX;
       MeshFunctionType velocityY;
@@ -173,7 +171,6 @@ class LaxFridrichsEnergy< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, I
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
       Real tau;
       MeshFunctionType velocityX;
       MeshFunctionType velocityY;
diff --git a/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumX.h b/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumX.h
index 5d1cf919252d85f00f08657e7e2944402940615a..33e9c33ac2e6fe1fc866ffa556cf17d281e5a61b 100644
--- a/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumX.h
+++ b/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumX.h
@@ -29,7 +29,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, Real
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
       Real tau;
       MeshFunctionType velocityX;
       MeshFunctionType velocityY;
@@ -101,7 +100,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, Real
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
       Real tau;
       MeshFunctionType velocityX;
       MeshFunctionType velocityY;
@@ -173,7 +171,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
       Real tau;
       MeshFunctionType velocityX;
       MeshFunctionType velocityY;
diff --git a/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumZ.h b/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumZ.h
index 0d7882f6deaf9d5009775ffa2c9a1ca57d4261db..63be3651052960ff2f786d1c28983fe16f3d6032 100644
--- a/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumZ.h
+++ b/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumZ.h
@@ -29,7 +29,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, Real
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
       Real tau;
       MeshFunctionType velocityX;
       MeshFunctionType velocityY;
@@ -101,7 +100,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, Real
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
       Real tau;
       MeshFunctionType velocityX;
       MeshFunctionType velocityY;
@@ -173,7 +171,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       enum { Dimensions = MeshType::getMeshDimensions() };
 
-      static String getType();
       Real tau;
       MeshFunctionType velocityX;
       MeshFunctionType velocityY;
diff --git a/src/Examples/inviscid-flow/3d/eulerProblem.h b/src/Examples/inviscid-flow/3d/eulerProblem.h
index d2ff1fc109c6d548ac503d82e398aa7c5b19c066..429c9d94848b109d18b549e3ad4a7ec0da3a9895 100644
--- a/src/Examples/inviscid-flow/3d/eulerProblem.h
+++ b/src/Examples/inviscid-flow/3d/eulerProblem.h
@@ -45,8 +45,6 @@ class eulerProblem:
       typedef typename DifferentialOperator::VelocityX VelocityX;
       typedef typename DifferentialOperator::Pressure Pressure;
 
-      static String getTypeStatic();
-
       String getPrologHeader() const;
 
       void writeProlog( Logger& logger,
diff --git a/src/Examples/inviscid-flow/3d/eulerProblem_impl.h b/src/Examples/inviscid-flow/3d/eulerProblem_impl.h
index b01979b87c6218f6e0ff1018caa8f05467bdfd1c..10953ebe2c68e2e32a57364e01206f884a12d9d4 100644
--- a/src/Examples/inviscid-flow/3d/eulerProblem_impl.h
+++ b/src/Examples/inviscid-flow/3d/eulerProblem_impl.h
@@ -14,17 +14,6 @@
 
 namespace TNL {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename DifferentialOperator >
-String
-eulerProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator >::
-getTypeStatic()
-{
-   return String( "eulerProblem< " ) + Mesh :: getTypeStatic() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/Examples/inviscid-flow/LaxFridrichsContinuity.h b/src/Examples/inviscid-flow/LaxFridrichsContinuity.h
index 0ae10b4f9399fc41c97d5ad35fba4748f98bef95..93e52f04ef8d8095b459521f8f8627afd19dbf16 100644
--- a/src/Examples/inviscid-flow/LaxFridrichsContinuity.h
+++ b/src/Examples/inviscid-flow/LaxFridrichsContinuity.h
@@ -38,14 +38,6 @@ class LaxFridrichsContinuityBase
 
       LaxFridrichsContinuityBase()
        : artificialViscosity( 1.0 ){};
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
 
       void setTau(const Real& tau)
       {
diff --git a/src/Examples/inviscid-flow/LaxFridrichsEnergy.h b/src/Examples/inviscid-flow/LaxFridrichsEnergy.h
index 8c6791cd17516d877a206adff346900fc80d2462..a9bb4148ce25c3ef9618680a7fea29a094e32c81 100644
--- a/src/Examples/inviscid-flow/LaxFridrichsEnergy.h
+++ b/src/Examples/inviscid-flow/LaxFridrichsEnergy.h
@@ -36,14 +36,6 @@ class LaxFridrichsEnergyBase
       LaxFridrichsEnergyBase()
        : artificialViscosity( 1.0 ){};
 
-      static String getType()
-      {
-         return String( "LaxFridrichsEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
       void setTau(const Real& tau)
       {
           this->tau = tau;
diff --git a/src/Examples/inviscid-flow/LaxFridrichsMomentumX.h b/src/Examples/inviscid-flow/LaxFridrichsMomentumX.h
index 63def12d315188b82e82402635fca863d1b9a629..b1877a2c4bf5837e08e099b0b3ba30b7b502be4d 100644
--- a/src/Examples/inviscid-flow/LaxFridrichsMomentumX.h
+++ b/src/Examples/inviscid-flow/LaxFridrichsMomentumX.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -119,14 +110,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -200,14 +183,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumX< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow/LaxFridrichsMomentumY.h b/src/Examples/inviscid-flow/LaxFridrichsMomentumY.h
index 8ce42282dd4c74d5ed72d2abbd661235b95dc160..b4fe7569105bb72f53be3279323269fb841512b9 100644
--- a/src/Examples/inviscid-flow/LaxFridrichsMomentumY.h
+++ b/src/Examples/inviscid-flow/LaxFridrichsMomentumY.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -189,14 +172,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumY< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow/LaxFridrichsMomentumZ.h b/src/Examples/inviscid-flow/LaxFridrichsMomentumZ.h
index a67e862ceffd78d4fd770d7b1a07e9f05af349d8..fe853959515c01e9b922cf866fb0b71388326175 100644
--- a/src/Examples/inviscid-flow/LaxFridrichsMomentumZ.h
+++ b/src/Examples/inviscid-flow/LaxFridrichsMomentumZ.h
@@ -46,15 +46,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -108,14 +99,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
@@ -169,14 +152,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
       using typename BaseType::VelocityFieldType;
       using typename BaseType::VelocityFieldPointer;
       using BaseType::Dimensions;      
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsMomentumZ< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }      
 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
diff --git a/src/Examples/inviscid-flow/eulerProblem.h b/src/Examples/inviscid-flow/eulerProblem.h
index a854f8098e751d65d9f1e542c540d653b1fb08c1..dfc7be55908efc552013e92e753a3beafe887d46 100644
--- a/src/Examples/inviscid-flow/eulerProblem.h
+++ b/src/Examples/inviscid-flow/eulerProblem.h
@@ -56,8 +56,6 @@ class eulerProblem:
       typedef Pointers::SharedPointer<  BoundaryCondition > BoundaryConditionPointer;
       typedef Pointers::SharedPointer<  RightHandSide, DeviceType > RightHandSidePointer;
 
-      static String getType();
-
       String getPrologHeader() const;
 
       void writeProlog( Logger& logger,
diff --git a/src/Examples/inviscid-flow/eulerProblem_impl.h b/src/Examples/inviscid-flow/eulerProblem_impl.h
index fd64ae2846e347bdadd878c0a3f4052fdbefc522..d203a16ad8ae1b25995d1f446856f328558b777e 100644
--- a/src/Examples/inviscid-flow/eulerProblem_impl.h
+++ b/src/Examples/inviscid-flow/eulerProblem_impl.h
@@ -31,18 +31,6 @@
 
 namespace TNL {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename Communicator,
-          typename InviscidOperators >
-String
-eulerProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, InviscidOperators >::
-getType()
-{
-   return String( "eulerProblem< " ) + Mesh :: getType() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/Examples/navier-stokes/navierStokesSetter_impl.h b/src/Examples/navier-stokes/navierStokesSetter_impl.h
index 5109284a640553857249865b1232ae007a7ec314..a0369516cf8d4c7120ccef8c6837e380064f97ae 100644
--- a/src/Examples/navier-stokes/navierStokesSetter_impl.h
+++ b/src/Examples/navier-stokes/navierStokesSetter_impl.h
@@ -29,7 +29,7 @@ template< typename MeshType, typename SolverStarter >
              typename IndexType >
 bool navierStokesSetter< MeshType, SolverStarter > :: run( const Config::ParameterContainer& parameters )
 {
-   std::cerr << "The solver is not implemented for the mesh " << MeshType::getType() << "." << std::endl;
+   std::cerr << "The solver is not implemented for the mesh " << getType< MeshType >() << "." << std::endl;
    return false;
 }
 
diff --git a/src/Examples/navier-stokes/navierStokesSolver.h b/src/Examples/navier-stokes/navierStokesSolver.h
index 262d9d4802752de0d53235bb0a9ad6afd8463a06..c0166701abb96fa6cb76d016a8d26a821bab0769 100644
--- a/src/Examples/navier-stokes/navierStokesSolver.h
+++ b/src/Examples/navier-stokes/navierStokesSolver.h
@@ -55,8 +55,6 @@ class navierStokesSolver
 
    navierStokesSolver();
 
-   static String getType();
-
    String getPrologHeader() const;
 
    void writeProlog( Logger& logger,
diff --git a/src/Examples/navier-stokes/navierStokesSolver_impl.h b/src/Examples/navier-stokes/navierStokesSolver_impl.h
index d4120d38048160d1361bd56106c065cc022f43be..a42c7b3177330af85c003a3e5f9fb6b5385ce91f 100644
--- a/src/Examples/navier-stokes/navierStokesSolver_impl.h
+++ b/src/Examples/navier-stokes/navierStokesSolver_impl.h
@@ -285,13 +285,6 @@ SolverMonitor*
    return &solverMonitor;
 }
 
-template< typename Mesh, typename EulerScheme >
-String navierStokesSolver< Mesh, EulerScheme > :: getType()
-{
-   return String( "navierStokesSolver< " ) +
-          Mesh :: getType() + " >";
-}
-
 template< typename Mesh, typename EulerScheme >
 String navierStokesSolver< Mesh, EulerScheme > :: getPrologHeader() const
 {
diff --git a/src/Examples/simple-examples/large-meshfunction-example.h b/src/Examples/simple-examples/large-meshfunction-example.h
index 2f9c70b859606f6f02b3689e9cd74e6e87edd7ae..d5520b69e1af3ce54952d49c463c9b85d345903a 100644
--- a/src/Examples/simple-examples/large-meshfunction-example.h
+++ b/src/Examples/simple-examples/large-meshfunction-example.h
@@ -10,7 +10,6 @@ using namespace TNL;
 using namespace TNL::Containers;
 using namespace TNL::Meshes;
 using namespace TNL::Functions;
-using namespace TNL::Devices;
 
 int main(int argc, char ** argv)
 {
@@ -28,9 +27,9 @@ int main(int argc, char ** argv)
     time.start();
 
 #ifdef HAVE_CUDA
-    using Device=Cuda;
+    using Device=Devices::Cuda;
 #else
-    using Device=Host;
+    using Device=Devices::Host;
 #endif
 
   using MeshType= Grid<2, double,Device,int>;
diff --git a/src/Examples/transport-equation/transportEquationProblem.h b/src/Examples/transport-equation/transportEquationProblem.h
index b6aa381d5f1aca5fc004c6274a4128ac28a1c791..802100228cb067423a768d4b5507f0e8045e242d 100644
--- a/src/Examples/transport-equation/transportEquationProblem.h
+++ b/src/Examples/transport-equation/transportEquationProblem.h
@@ -50,8 +50,6 @@ public PDEProblem< Mesh,
       using typename BaseType::MeshPointer;
       using typename BaseType::DofVectorType;
       using typename BaseType::DofVectorPointer;
-      
-      static String getType();
 
       String getPrologHeader() const;
 
diff --git a/src/Examples/transport-equation/transportEquationProblemEoc.h b/src/Examples/transport-equation/transportEquationProblemEoc.h
index 62f10e273f95938a48d28ba85230d7463c0db4ce..279af6006e4ec568e564e47aa38fd202a388c04c 100644
--- a/src/Examples/transport-equation/transportEquationProblemEoc.h
+++ b/src/Examples/transport-equation/transportEquationProblemEoc.h
@@ -49,8 +49,6 @@ public transportEquationProblem< Mesh, BoundaryCondition, RightHandSide, Communi
       using typename BaseType::DofVectorPointer;
       
       //using BaseType::getExplicitUpdate;
-      
-      static String getType();
 
       String getPrologHeader() const;
 
diff --git a/src/Examples/transport-equation/transportEquationProblemEoc_impl.h b/src/Examples/transport-equation/transportEquationProblemEoc_impl.h
index 0ac3af2d8d963ea89cbb9a40837cc298c243fa40..8de7eb9bccb68630f7a53213e64f4c391c6f0489 100644
--- a/src/Examples/transport-equation/transportEquationProblemEoc_impl.h
+++ b/src/Examples/transport-equation/transportEquationProblemEoc_impl.h
@@ -23,18 +23,6 @@
 
 namespace TNL {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename Communicator,
-          typename DifferentialOperator >
-String
-transportEquationProblemEoc< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
-getType()
-{
-   return String( "transportEquationProblemEoc< " ) + Mesh :: getType() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/Examples/transport-equation/transportEquationProblem_impl.h b/src/Examples/transport-equation/transportEquationProblem_impl.h
index 7d83ceb0987466f1057df129b1b51d45b163812c..96cf1a6ec263029f783c9a95e87327927d13ce71 100644
--- a/src/Examples/transport-equation/transportEquationProblem_impl.h
+++ b/src/Examples/transport-equation/transportEquationProblem_impl.h
@@ -21,18 +21,6 @@
 
 namespace TNL {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename Communicator,
-          typename DifferentialOperator >
-String
-transportEquationProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
-getType()
-{
-   return String( "transportEquationProblem< " ) + Mesh :: getType() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/Python/pytnl/tnl/Array.h b/src/Python/pytnl/tnl/Array.h
index acebce3d22701f68b2a8beb831292fb997ed0a56..6e19878de07d6986d6a6dea1c109371b1c1b59d6 100644
--- a/src/Python/pytnl/tnl/Array.h
+++ b/src/Python/pytnl/tnl/Array.h
@@ -15,8 +15,6 @@ void export_Array(py::module & m, const char* name)
     auto array = py::class_<ArrayType>(m, name, py::buffer_protocol())
         .def(py::init<>())
         .def(py::init<int>())
-        .def_static("getType",              &ArrayType::getType)
-        .def("getTypeVirtual",              &ArrayType::getTypeVirtual)
         .def_static("getSerializationType", &ArrayType::getSerializationType)
         .def("getSerializationTypeVirtual", &ArrayType::getSerializationTypeVirtual)
         .def("setSize", &ArrayType::setSize)
diff --git a/src/Python/pytnl/tnl/Grid.h b/src/Python/pytnl/tnl/Grid.h
index afc5b39749362a08248befb7716c8f446e888dad..8cf28a8f5bd393dfda5bfc01b6547c77ad66ba91 100644
--- a/src/Python/pytnl/tnl/Grid.h
+++ b/src/Python/pytnl/tnl/Grid.h
@@ -59,8 +59,6 @@ void export_Grid( py::module & m, const char* name )
     auto grid = py::class_<Grid, TNL::Object>( m, name )
         .def(py::init<>())
         .def_static("getMeshDimension", &Grid::getMeshDimension)
-        .def_static("getType",              &Grid::getType)
-        .def("getTypeVirtual",              &Grid::getTypeVirtual)
         .def_static("getSerializationType", &Grid::getSerializationType)
         .def("getSerializationTypeVirtual", &Grid::getSerializationTypeVirtual)
         // FIXME: number of parameters depends on the grid dimension
diff --git a/src/Python/pytnl/tnl/Mesh.h b/src/Python/pytnl/tnl/Mesh.h
index ee17a134804fc2fb71d69f5e81bd2838d57428e2..c0207e243ceae80613ea634b73ff429b94d697d2 100644
--- a/src/Python/pytnl/tnl/Mesh.h
+++ b/src/Python/pytnl/tnl/Mesh.h
@@ -112,8 +112,6 @@ void export_Mesh( py::module & m, const char* name )
     auto mesh = py::class_< Mesh, TNL::Object >( m, name )
         .def(py::init<>())
         .def_static("getMeshDimension", &Mesh::getMeshDimension)
-        .def_static("getType",              &Mesh::getType)
-        .def("getTypeVirtual",              &Mesh::getTypeVirtual)
         .def_static("getSerializationType", &Mesh::getSerializationType)
         .def("getSerializationTypeVirtual", &Mesh::getSerializationTypeVirtual)
         .def("getEntitiesCount", &mesh_getEntitiesCount< Mesh >)
diff --git a/src/Python/pytnl/tnl/SparseMatrix.h b/src/Python/pytnl/tnl/SparseMatrix.h
index 6788d1a68ffd738aef1ca395af9d88b3082b98bc..1a32bd257f52a14f07579abe3671df1978cfc4d2 100644
--- a/src/Python/pytnl/tnl/SparseMatrix.h
+++ b/src/Python/pytnl/tnl/SparseMatrix.h
@@ -56,8 +56,6 @@ void export_Matrix( py::module & m, const char* name )
     auto matrix = py::class_< Matrix, TNL::Object >( m, name )
         .def(py::init<>())
         // overloads (defined in Object)
-        .def_static("getType",              &Matrix::getType)
-        .def("getTypeVirtual",              &Matrix::getTypeVirtual)
         .def_static("getSerializationType", &Matrix::getSerializationType)
         .def("getSerializationTypeVirtual", &Matrix::getSerializationTypeVirtual)
         .def("print", &Matrix::print)
diff --git a/src/Python/pytnl/tnl/StaticVector.h b/src/Python/pytnl/tnl/StaticVector.h
index 6b5570647fd31f7c2c8692b9c5b7322227ea7781..ba7cfcaf4f8143c246fe7afb9db8facd03d2621d 100644
--- a/src/Python/pytnl/tnl/StaticVector.h
+++ b/src/Python/pytnl/tnl/StaticVector.h
@@ -14,7 +14,6 @@ void export_StaticVector( Scope & scope, const char* name )
     auto vector = py::class_<VectorType>(scope, name)
         .def(py::init< RealType >())
         .def(py::init< VectorType >())
-        .def_static("getType", &VectorType::getType)
         .def("getSize", &VectorType::getSize)
         // operator=
         .def("assign", []( VectorType& vector, const VectorType& other ) -> VectorType& {
diff --git a/src/Python/pytnl/tnl/String.cpp b/src/Python/pytnl/tnl/String.cpp
index f9fff7d920e49f4eedd55ad9ab5190b644171eaf..3203abda283d55ed20dc2a0eb96100bf84b65cc3 100644
--- a/src/Python/pytnl/tnl/String.cpp
+++ b/src/Python/pytnl/tnl/String.cpp
@@ -16,7 +16,6 @@ void export_String( py::module & m )
         .def(py::init<const char*, int, int>())
         .def(py::init([](int v){ return TNL::convertToString(v); }))
         .def(py::init([](double v){ return TNL::convertToString(v); }))
-        .def_static("getType", &TNL::String::getType)
         // __str__ (uses operator<<)
         // explicit namespace resolution is necessary, see http://stackoverflow.com/a/3084341/4180822
 //        .def(py::self_ns::str(py::self_ns::self))
diff --git a/src/Python/pytnl/tnl/Vector.h b/src/Python/pytnl/tnl/Vector.h
index 9fdac4072b4db7cac83c4eb95739a92a6abe671d..475a53736a97357cc6363d3f56e07b4308caecae 100644
--- a/src/Python/pytnl/tnl/Vector.h
+++ b/src/Python/pytnl/tnl/Vector.h
@@ -14,8 +14,6 @@ void export_Vector(py::module & m, const char* name)
     py::class_<VectorType, ArrayType>(m, name)
         .def(py::init<>())
         .def(py::init<int>())
-        .def_static("getType",              &VectorType::getType)
-        .def("getTypeVirtual",              &VectorType::getTypeVirtual)
         .def_static("getSerializationType", &VectorType::getSerializationType)
         .def("getSerializationTypeVirtual", &VectorType::getSerializationTypeVirtual)
         .def(py::self == py::self)
diff --git a/src/TNL/Containers/Algorithms/CudaMultireductionKernel.h b/src/TNL/Algorithms/CudaMultireductionKernel.h
similarity index 93%
rename from src/TNL/Containers/Algorithms/CudaMultireductionKernel.h
rename to src/TNL/Algorithms/CudaMultireductionKernel.h
index e67c11b419dca3ae83706d43c56f5f282aba4beb..6a078564796d42e28b2c896d8b0589acd1e2cc79 100644
--- a/src/TNL/Containers/Algorithms/CudaMultireductionKernel.h
+++ b/src/TNL/Algorithms/CudaMultireductionKernel.h
@@ -14,12 +14,12 @@
 
 #include <TNL/Assert.h>
 #include <TNL/Math.h>
-#include <TNL/Devices/CudaDeviceInfo.h>
-#include <TNL/Containers/Algorithms/CudaReductionBuffer.h>
+#include <TNL/Cuda/DeviceInfo.h>
+#include <TNL/Cuda/SharedMemory.h>
+#include <TNL/Algorithms/CudaReductionBuffer.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 #ifdef HAVE_CUDA
@@ -52,7 +52,7 @@ CudaMultireductionKernel( const Result zero,
                           const int n,
                           Result* output )
 {
-   Result* sdata = Devices::Cuda::getSharedMemory< Result >();
+   Result* sdata = Cuda::getSharedMemory< Result >();
 
    // Get the thread id (tid), global thread id (gid) and gridSize.
    const Index tid = threadIdx.y * blockDim.x + threadIdx.x;
@@ -160,10 +160,10 @@ CudaMultireductionKernelLauncher( const Result zero,
    // where blocksPerMultiprocessor is determined according to the number of
    // available registers on the multiprocessor.
    // On Tesla K40c, desGridSize = 8 * 15 = 120.
-   const int activeDevice = Devices::CudaDeviceInfo::getActiveDevice();
-   const int blocksdPerMultiprocessor = Devices::CudaDeviceInfo::getRegistersPerMultiprocessor( activeDevice )
+   const int activeDevice = Cuda::DeviceInfo::getActiveDevice();
+   const int blocksdPerMultiprocessor = Cuda::DeviceInfo::getRegistersPerMultiprocessor( activeDevice )
                                       / ( Multireduction_maxThreadsPerBlock * Multireduction_registersPerThread );
-   const int desGridSizeX = blocksdPerMultiprocessor * Devices::CudaDeviceInfo::getCudaMultiprocessors( activeDevice );
+   const int desGridSizeX = blocksdPerMultiprocessor * Cuda::DeviceInfo::getCudaMultiprocessors( activeDevice );
    dim3 blockSize, gridSize;
 
    // version A: max 16 rows of threads
@@ -189,10 +189,10 @@ CudaMultireductionKernelLauncher( const Result zero,
    while( blockSize.x * blockSize.y > Multireduction_maxThreadsPerBlock )
       blockSize.x /= 2;
 
-   gridSize.x = TNL::min( Devices::Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSizeX );
-   gridSize.y = Devices::Cuda::getNumberOfBlocks( n, blockSize.y );
+   gridSize.x = TNL::min( Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSizeX );
+   gridSize.y = Cuda::getNumberOfBlocks( n, blockSize.y );
 
-   if( gridSize.y > (unsigned) Devices::Cuda::getMaxGridSize() ) {
+   if( gridSize.y > (unsigned) Cuda::getMaxGridSize() ) {
       std::cerr << "Maximum gridSize.y limit exceeded (limit is 65535, attempted " << gridSize.y << ")." << std::endl;
       throw 1;
    }
@@ -281,5 +281,4 @@ CudaMultireductionKernelLauncher( const Result zero,
 }
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/CudaReductionBuffer.h b/src/TNL/Algorithms/CudaReductionBuffer.h
similarity index 96%
rename from src/TNL/Containers/Algorithms/CudaReductionBuffer.h
rename to src/TNL/Algorithms/CudaReductionBuffer.h
index 2897c7280a6bc61f9b60a9cb3c7b44a94ad20de3..af9b3fcc254fa11384a83a353d383d9e6ddefc6b 100644
--- a/src/TNL/Containers/Algorithms/CudaReductionBuffer.h
+++ b/src/TNL/Algorithms/CudaReductionBuffer.h
@@ -14,12 +14,11 @@
 
 #include <stdlib.h>
 
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/CheckDevice.h>
 #include <TNL/Exceptions/CudaBadAlloc.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 class CudaReductionBuffer
@@ -92,5 +91,4 @@ class CudaReductionBuffer
 };
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/CudaReductionKernel.h b/src/TNL/Algorithms/CudaReductionKernel.h
similarity index 95%
rename from src/TNL/Containers/Algorithms/CudaReductionKernel.h
rename to src/TNL/Algorithms/CudaReductionKernel.h
index 82b030e1a9198eebee91609db3c384d69e237079..b97295e0004c1f5c2ccf1ca2155bca76251b44cd 100644
--- a/src/TNL/Containers/Algorithms/CudaReductionKernel.h
+++ b/src/TNL/Algorithms/CudaReductionKernel.h
@@ -14,13 +14,13 @@
 
 #include <TNL/Assert.h>
 #include <TNL/Math.h>
-#include <TNL/Devices/CudaDeviceInfo.h>
-#include <TNL/Containers/Algorithms/CudaReductionBuffer.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
+#include <TNL/Cuda/DeviceInfo.h>
+#include <TNL/Cuda/SharedMemory.h>
+#include <TNL/Algorithms/CudaReductionBuffer.h>
+#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 /****
@@ -52,7 +52,7 @@ CudaReductionKernel( const Result zero,
                      const Index size,
                      Result* output )
 {
-   Result* sdata = Devices::Cuda::getSharedMemory< Result >();
+   Result* sdata = Cuda::getSharedMemory< Result >();
 
    // Get the thread id (tid), global thread id (gid) and gridSize.
    const Index tid = threadIdx.x;
@@ -147,7 +147,7 @@ CudaReductionWithArgumentKernel( const Result zero,
                                  Index* idxOutput,
                                  const Index* idxInput = nullptr )
 {
-   Result* sdata = Devices::Cuda::getSharedMemory< Result >();
+   Result* sdata = Cuda::getSharedMemory< Result >();
    Index* sidx = reinterpret_cast< Index* >( &sdata[ blockDim.x ] );
 
    // Get the thread id (tid), global thread id (gid) and gridSize.
@@ -282,11 +282,11 @@ struct CudaReductionKernelLauncher
    // It seems to be better to map only one CUDA block per one multiprocessor or maybe
    // just slightly more. Therefore we omit blocksdPerMultiprocessor in the following.
    CudaReductionKernelLauncher( const Index size )
-   : activeDevice( Devices::CudaDeviceInfo::getActiveDevice() ),
-     blocksdPerMultiprocessor( Devices::CudaDeviceInfo::getRegistersPerMultiprocessor( activeDevice )
+   : activeDevice( Cuda::DeviceInfo::getActiveDevice() ),
+     blocksdPerMultiprocessor( Cuda::DeviceInfo::getRegistersPerMultiprocessor( activeDevice )
                                / ( Reduction_maxThreadsPerBlock * Reduction_registersPerThread ) ),
-     //desGridSize( blocksdPerMultiprocessor * Devices::CudaDeviceInfo::getCudaMultiprocessors( activeDevice ) ),
-     desGridSize( Devices::CudaDeviceInfo::getCudaMultiprocessors( activeDevice ) ),
+     //desGridSize( blocksdPerMultiprocessor * Cuda::DeviceInfo::getCudaMultiprocessors( activeDevice ) ),
+     desGridSize( Cuda::DeviceInfo::getCudaMultiprocessors( activeDevice ) ),
      originalSize( size )
    {
    }
@@ -351,7 +351,7 @@ struct CudaReductionKernelLauncher
 
       // Copy result on CPU
       Result result;
-      ArrayOperations< Devices::Host, Devices::Cuda >::copy( &result, output, 1 );
+      MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( &result, output, 1 );
       return result;
    }
 
@@ -384,8 +384,8 @@ struct CudaReductionKernelLauncher
       ////
       // Copy result on CPU
       std::pair< Index, Result > result;
-      ArrayOperations< Devices::Host, Devices::Cuda >::copy( &result.first, idxOutput, 1 );
-      ArrayOperations< Devices::Host, Devices::Cuda >::copy( &result.second, output, 1 );
+      MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( &result.first, idxOutput, 1 );
+      MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( &result.second, output, 1 );
       return result;
    }
 
@@ -402,7 +402,7 @@ struct CudaReductionKernelLauncher
 #ifdef HAVE_CUDA
          dim3 blockSize, gridSize;
          blockSize.x = Reduction_maxThreadsPerBlock;
-         gridSize.x = TNL::min( Devices::Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSize );
+         gridSize.x = TNL::min( Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSize );
 
          // when there is only one warp per blockSize.x, we need to allocate two warps
          // worth of shared memory so that we don't index shared memory out of bounds
@@ -473,6 +473,7 @@ struct CudaReductionKernelLauncher
             default:
                TNL_ASSERT( false, std::cerr << "Block size is " << blockSize. x << " which is none of 1, 2, 4, 8, 16, 32, 64, 128, 256 or 512." );
          }
+         cudaStreamSynchronize(0);
          TNL_CHECK_CUDA_DEVICE;
 */
 
@@ -482,6 +483,8 @@ struct CudaReductionKernelLauncher
 
             CudaReductionKernel< Reduction_maxThreadsPerBlock >
             <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output);
+            cudaStreamSynchronize(0);
+            TNL_CHECK_CUDA_DEVICE;
          }
          else {
             TNL_ASSERT( false, std::cerr << "Block size was expected to be " << Reduction_maxThreadsPerBlock << ", but " << blockSize.x << " was specified." << std::endl; );
@@ -507,7 +510,7 @@ struct CudaReductionKernelLauncher
 #ifdef HAVE_CUDA
          dim3 blockSize, gridSize;
          blockSize.x = Reduction_maxThreadsPerBlock;
-         gridSize.x = TNL::min( Devices::Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSize );
+         gridSize.x = TNL::min( Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSize );
 
          // when there is only one warp per blockSize.x, we need to allocate two warps
          // worth of shared memory so that we don't index shared memory out of bounds
@@ -578,6 +581,7 @@ struct CudaReductionKernelLauncher
             default:
                TNL_ASSERT( false, std::cerr << "Block size is " << blockSize. x << " which is none of 1, 2, 4, 8, 16, 32, 64, 128, 256 or 512." );
          }
+         cudaStreamSynchronize(0);
          TNL_CHECK_CUDA_DEVICE;
 */
 
@@ -587,6 +591,8 @@ struct CudaReductionKernelLauncher
 
             CudaReductionWithArgumentKernel< Reduction_maxThreadsPerBlock >
             <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput );
+            cudaStreamSynchronize(0);
+            TNL_CHECK_CUDA_DEVICE;
          }
          else {
             TNL_ASSERT( false, std::cerr << "Block size was expected to be " << Reduction_maxThreadsPerBlock << ", but " << blockSize.x << " was specified." << std::endl; );
@@ -608,5 +614,4 @@ struct CudaReductionKernelLauncher
 };
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/CudaScanKernel.h b/src/TNL/Algorithms/CudaScanKernel.h
similarity index 83%
rename from src/TNL/Containers/Algorithms/CudaScanKernel.h
rename to src/TNL/Algorithms/CudaScanKernel.h
index a8c3548757668df966b094a9da19e37b88ab7ed8..79a2019594922eee640672edb12d8ef6e9132dd0 100644
--- a/src/TNL/Containers/Algorithms/CudaScanKernel.h
+++ b/src/TNL/Algorithms/CudaScanKernel.h
@@ -13,12 +13,11 @@
 #include <iostream>
 
 #include <TNL/Math.h>
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/SharedMemory.h>
 #include <TNL/Exceptions/CudaBadAlloc.h>
 #include <TNL/Containers/Array.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 #ifdef HAVE_CUDA
@@ -36,8 +35,8 @@ cudaFirstPhaseBlockScan( const ScanType scanType,
                          Real* output,
                          Real* auxArray )
 {
-   Real* sharedData = TNL::Devices::Cuda::getSharedMemory< Real >();
-   Real* auxData = &sharedData[ elementsInBlock + elementsInBlock / Devices::Cuda::getNumberOfSharedMemoryBanks() + 2 ];
+   Real* sharedData = TNL::Cuda::getSharedMemory< Real >();
+   Real* auxData = &sharedData[ elementsInBlock + elementsInBlock / Cuda::getNumberOfSharedMemoryBanks() + 2 ];
    Real* warpSums = &auxData[ blockDim.x ];
 
    const Index lastElementIdx = size - blockIdx.x * elementsInBlock;
@@ -54,7 +53,7 @@ cudaFirstPhaseBlockScan( const ScanType scanType,
          sharedData[ 0 ] = zero;
       while( idx < elementsInBlock && blockOffset + idx < size )
       {
-         sharedData[ Devices::Cuda::getInterleaving( idx + 1 ) ] = input[ blockOffset + idx ];
+         sharedData[ Cuda::getInterleaving( idx + 1 ) ] = input[ blockOffset + idx ];
          idx += blockDim.x;
       }
    }
@@ -62,7 +61,7 @@ cudaFirstPhaseBlockScan( const ScanType scanType,
    {
       while( idx < elementsInBlock && blockOffset + idx < size )
       {
-         sharedData[ Devices::Cuda::getInterleaving( idx ) ] = input[ blockOffset + idx ];
+         sharedData[ Cuda::getInterleaving( idx ) ] = input[ blockOffset + idx ];
          idx += blockDim.x;
       }
    }
@@ -78,33 +77,33 @@ cudaFirstPhaseBlockScan( const ScanType scanType,
    if( chunkOffset < lastElementInBlock )
    {
       auxData[ threadIdx.x ] =
-         sharedData[ Devices::Cuda::getInterleaving( chunkOffset ) ];
+         sharedData[ Cuda::getInterleaving( chunkOffset ) ];
    }
 
    int chunkPointer = 1;
    while( chunkPointer < chunkSize &&
           chunkOffset + chunkPointer < lastElementInBlock )
    {
-      sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer ) ] =
-         reduction( sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer ) ],
-                    sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer - 1 ) ] );
+      sharedData[ Cuda::getInterleaving( chunkOffset + chunkPointer ) ] =
+         reduction( sharedData[ Cuda::getInterleaving( chunkOffset + chunkPointer ) ],
+                    sharedData[ Cuda::getInterleaving( chunkOffset + chunkPointer - 1 ) ] );
       auxData[ threadIdx.x ] =
-         sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer ) ];
+         sharedData[ Cuda::getInterleaving( chunkOffset + chunkPointer ) ];
       chunkPointer++;
    }
 
    /***
     *  Perform the parallel prefix-sum inside warps.
     */
-   const int threadInWarpIdx = threadIdx.x % Devices::Cuda::getWarpSize();
-   const int warpIdx = threadIdx.x / Devices::Cuda::getWarpSize();
-   for( int stride = 1; stride < Devices::Cuda::getWarpSize(); stride *= 2 ) {
+   const int threadInWarpIdx = threadIdx.x % Cuda::getWarpSize();
+   const int warpIdx = threadIdx.x / Cuda::getWarpSize();
+   for( int stride = 1; stride < Cuda::getWarpSize(); stride *= 2 ) {
       if( threadInWarpIdx >= stride && threadIdx.x < numberOfChunks )
          auxData[ threadIdx.x ] = reduction( auxData[ threadIdx.x ], auxData[ threadIdx.x - stride ] );
       __syncwarp();
    }
 
-   if( threadInWarpIdx == Devices::Cuda::getWarpSize() - 1 )
+   if( threadInWarpIdx == Cuda::getWarpSize() - 1 )
       warpSums[ warpIdx ] = auxData[ threadIdx.x ];
    __syncthreads();
 
@@ -112,7 +111,7 @@ cudaFirstPhaseBlockScan( const ScanType scanType,
     * Compute prefix-sum of warp sums using one warp
     */
    if( warpIdx == 0 )
-      for( int stride = 1; stride < Devices::Cuda::getWarpSize(); stride *= 2 ) {
+      for( int stride = 1; stride < Cuda::getWarpSize(); stride *= 2 ) {
          if( threadInWarpIdx >= stride )
             warpSums[ threadIdx.x ] = reduction( warpSums[ threadIdx.x ], warpSums[ threadIdx.x - stride ] );
          __syncwarp();
@@ -136,9 +135,9 @@ cudaFirstPhaseBlockScan( const ScanType scanType,
       Real chunkShift( zero );
       if( chunkIdx > 0 )
          chunkShift = auxData[ chunkIdx - 1 ];
-      sharedData[ Devices::Cuda::getInterleaving( idx ) ] =
-         reduction( sharedData[ Devices::Cuda::getInterleaving( idx ) ], chunkShift );
-      output[ blockOffset + idx ] = sharedData[ Devices::Cuda::getInterleaving( idx ) ];
+      sharedData[ Cuda::getInterleaving( idx ) ] =
+         reduction( sharedData[ Cuda::getInterleaving( idx ) ], chunkShift );
+      output[ blockOffset + idx ] = sharedData[ Cuda::getInterleaving( idx ) ];
       idx += blockDim.x;
    }
    __syncthreads();
@@ -147,11 +146,11 @@ cudaFirstPhaseBlockScan( const ScanType scanType,
    {
       if( scanType == ScanType::Exclusive )
       {
-         auxArray[ blockIdx.x ] = reduction( sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock - 1 ) ],
-                                             sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock ) ] );
+         auxArray[ blockIdx.x ] = reduction( sharedData[ Cuda::getInterleaving( lastElementInBlock - 1 ) ],
+                                             sharedData[ Cuda::getInterleaving( lastElementInBlock ) ] );
       }
       else
-         auxArray[ blockIdx.x ] = sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock - 1 ) ];
+         auxArray[ blockIdx.x ] = sharedData[ Cuda::getInterleaving( lastElementInBlock - 1 ) ];
    }
 }
 
@@ -245,11 +244,11 @@ struct CudaScanKernelLauncher
       // compute the number of grids
       const int elementsInBlock = 8 * blockSize;
       const Index numberOfBlocks = roundUpDivision( size, elementsInBlock );
-      const Index numberOfGrids = Devices::Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize() );
+      const Index numberOfGrids = Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize() );
       //std::cerr << "numberOfgrids =  " << numberOfGrids << std::endl;
 
       // allocate array for the block sums
-      Array< Real, Devices::Cuda > blockSums;
+      Containers::Array< Real, Devices::Cuda > blockSums;
       blockSums.setSize( numberOfBlocks );
 
       // loop over all grids
@@ -268,8 +267,8 @@ struct CudaScanKernelLauncher
 
          // run the kernel
          const std::size_t sharedDataSize = elementsInBlock +
-                                            elementsInBlock / Devices::Cuda::getNumberOfSharedMemoryBanks() + 2;
-         const std::size_t sharedMemory = ( sharedDataSize + blockSize + Devices::Cuda::getWarpSize() ) * sizeof( Real );
+                                            elementsInBlock / Cuda::getNumberOfSharedMemoryBanks() + 2;
+         const std::size_t sharedMemory = ( sharedDataSize + blockSize + Cuda::getWarpSize() ) * sizeof( Real );
          cudaFirstPhaseBlockScan<<< cudaGridSize, cudaBlockSize, sharedMemory >>>
             ( scanType,
               reduction,
@@ -330,7 +329,7 @@ struct CudaScanKernelLauncher
       // compute the number of grids
       const int elementsInBlock = 8 * blockSize;
       const Index numberOfBlocks = roundUpDivision( size, elementsInBlock );
-      const Index numberOfGrids = Devices::Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize() );
+      const Index numberOfGrids = Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize() );
 
       // loop over all grids
       for( Index gridIdx = 0; gridIdx < numberOfGrids; gridIdx++ ) {
@@ -369,13 +368,13 @@ struct CudaScanKernelLauncher
     */
    static int& maxGridSize()
    {
-      static int maxGridSize = Devices::Cuda::getMaxGridSize();
+      static int maxGridSize = Cuda::getMaxGridSize();
       return maxGridSize;
    }
 
    static void resetMaxGridSize()
    {
-      maxGridSize() = Devices::Cuda::getMaxGridSize();
+      maxGridSize() = Cuda::getMaxGridSize();
    }
 
    static int& gridsCount()
@@ -388,5 +387,4 @@ struct CudaScanKernelLauncher
 #endif
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/DistributedScan.h b/src/TNL/Algorithms/DistributedScan.h
similarity index 92%
rename from src/TNL/Containers/Algorithms/DistributedScan.h
rename to src/TNL/Algorithms/DistributedScan.h
index 44fd425b92efcd5ab047111271e9f5b8a319e080..742acd5ed923b4d0e0cbf14e37be8fb40866ec06 100644
--- a/src/TNL/Containers/Algorithms/DistributedScan.h
+++ b/src/TNL/Algorithms/DistributedScan.h
@@ -12,11 +12,10 @@
 
 #pragma once
 
-#include <TNL/Containers/Algorithms/Scan.h>
+#include <TNL/Algorithms/Scan.h>
 #include <TNL/Containers/Vector.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 template< ScanType Type >
@@ -51,11 +50,11 @@ struct DistributedScan
          const int nproc = CommunicatorType::GetSize( group );
          RealType dataForScatter[ nproc ];
          for( int i = 0; i < nproc; i++ ) dataForScatter[ i ] = localSum;
-         Vector< RealType, Devices::Host > rankSums( nproc );
+         Containers::Vector< RealType, Devices::Host > rankSums( nproc );
          // NOTE: exchanging general data types does not work with MPI
          CommunicatorType::Alltoall( dataForScatter, 1, rankSums.getData(), 1, group );
 
-         // compute prefix-sum of the per-rank sums
+         // compute the scan of the per-rank sums
          Scan< Devices::Host, ScanType::Exclusive >::perform( rankSums, 0, nproc, reduction, zero );
 
          // perform second phase: shift by the per-block and per-rank offsets
@@ -66,5 +65,4 @@ struct DistributedScan
 };
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/ArrayOperations.h b/src/TNL/Algorithms/MemoryOperations.h
similarity index 58%
rename from src/TNL/Containers/Algorithms/ArrayOperations.h
rename to src/TNL/Algorithms/MemoryOperations.h
index ca62f5b7ea45254298cb02d0ac909ee2242e72f2..59da324028c513853fdc6da81ba21d877bb98334 100644
--- a/src/TNL/Containers/Algorithms/ArrayOperations.h
+++ b/src/TNL/Algorithms/MemoryOperations.h
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          ArrayOperations.h  -  description
+                          MemoryOperations.h  -  description
                              -------------------
     begin                : Jul 15, 2013
     copyright            : (C) 2013 by Tomas Oberhuber
@@ -10,21 +10,19 @@
 
 #pragma once
 
+#include <TNL/Devices/Sequential.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/Devices/MIC.h>
+#include <TNL/Cuda/CudaCallable.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
-template< typename DestinationDevice,
-          typename SourceDevice = DestinationDevice >
-struct ArrayOperations;
+template< typename DestinationDevice >
+struct MemoryOperations;
 
-// TODO: establish the concept of a "void device" for static computations in the whole TNL
 template<>
-struct ArrayOperations< void >
+struct MemoryOperations< Devices::Sequential >
 {
    template< typename Element >
    __cuda_callable__
@@ -49,37 +47,6 @@ struct ArrayOperations< void >
                      const SourceElement* source,
                      const Index size );
 
-   template< typename Element1,
-             typename Element2,
-             typename Index >
-   __cuda_callable__
-   static bool compare( const Element1* destination,
-                        const Element2* source,
-                        const Index size );
-};
-
-template<>
-struct ArrayOperations< Devices::Host >
-{
-   template< typename Element >
-   static void setElement( Element* data,
-                           const Element& value );
-
-   template< typename Element >
-   static Element getElement( const Element* data );
-
-   template< typename Element, typename Index >
-   static void set( Element* data,
-                    const Element& value,
-                    const Index size );
-
-   template< typename DestinationElement,
-             typename SourceElement,
-             typename Index >
-   static void copy( DestinationElement* destination,
-                     const SourceElement* source,
-                     const Index size );
-
    template< typename DestinationElement,
              typename Index,
              typename SourceIterator >
@@ -91,25 +58,28 @@ struct ArrayOperations< Devices::Host >
    template< typename Element1,
              typename Element2,
              typename Index >
+   __cuda_callable__
    static bool compare( const Element1* destination,
                         const Element2* source,
                         const Index size );
 
    template< typename Element,
              typename Index >
+   __cuda_callable__
    static bool containsValue( const Element* data,
                               const Index size,
                               const Element& value );
 
    template< typename Element,
              typename Index >
+   __cuda_callable__
    static bool containsOnlyValue( const Element* data,
                                   const Index size,
                                   const Element& value );
 };
 
 template<>
-struct ArrayOperations< Devices::Cuda >
+struct MemoryOperations< Devices::Host >
 {
    template< typename Element >
    static void setElement( Element* data,
@@ -159,44 +129,7 @@ struct ArrayOperations< Devices::Cuda >
 };
 
 template<>
-struct ArrayOperations< Devices::Cuda, Devices::Host >
-{
-   template< typename DestinationElement,
-             typename SourceElement,
-             typename Index >
-   static void copy( DestinationElement* destination,
-                     const SourceElement* source,
-                     const Index size );
-
-   template< typename DestinationElement,
-             typename SourceElement,
-             typename Index >
-   static bool compare( const DestinationElement* destination,
-                        const SourceElement* source,
-                        const Index size );
-};
-
-template<>
-struct ArrayOperations< Devices::Host, Devices::Cuda >
-{
-   template< typename DestinationElement,
-             typename SourceElement,
-             typename Index >
-   static void copy( DestinationElement* destination,
-                     const SourceElement* source,
-                     const Index size );
-
-   template< typename Element1,
-             typename Element2,
-             typename Index >
-   static bool compare( const Element1* destination,
-                        const Element2* source,
-                        const Index size );
-};
-
-
-template<>
-struct ArrayOperations< Devices::MIC >
+struct MemoryOperations< Devices::Cuda >
 {
    template< typename Element >
    static void setElement( Element* data,
@@ -245,49 +178,9 @@ struct ArrayOperations< Devices::MIC >
                                   const Element& value );
 };
 
-template<>
-struct ArrayOperations< Devices::MIC, Devices::Host >
-{
-   public:
-
-      template< typename DestinationElement,
-                typename SourceElement,
-                typename Index >
-      static void copy( DestinationElement* destination,
-                        const SourceElement* source,
-                        const Index size );
-
-      template< typename DestinationElement,
-                typename SourceElement,
-                typename Index >
-      static bool compare( const DestinationElement* destination,
-                           const SourceElement* source,
-                           const Index size );
-};
-
-template<>
-struct ArrayOperations< Devices::Host, Devices::MIC >
-{
-   template< typename DestinationElement,
-             typename SourceElement,
-             typename Index >
-   static void copy( DestinationElement* destination,
-                     const SourceElement* source,
-                     const Index size );
-
-   template< typename DestinationElement,
-             typename SourceElement,
-             typename Index >
-   static bool compare( const DestinationElement* destination,
-                        const SourceElement* source,
-                        const Index size );
-};
-
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
 
-#include <TNL/Containers/Algorithms/ArrayOperationsStatic.hpp>
-#include <TNL/Containers/Algorithms/ArrayOperationsHost.hpp>
-#include <TNL/Containers/Algorithms/ArrayOperationsCuda.hpp>
-#include <TNL/Containers/Algorithms/ArrayOperationsMIC.hpp>
+#include <TNL/Algorithms/MemoryOperationsSequential.hpp>
+#include <TNL/Algorithms/MemoryOperationsHost.hpp>
+#include <TNL/Algorithms/MemoryOperationsCuda.hpp>
diff --git a/src/TNL/Algorithms/MemoryOperationsCuda.hpp b/src/TNL/Algorithms/MemoryOperationsCuda.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..ea4b92b61ba5d52fdc6ea98f656d25a97db02ab9
--- /dev/null
+++ b/src/TNL/Algorithms/MemoryOperationsCuda.hpp
@@ -0,0 +1,159 @@
+/***************************************************************************
+                          MemoryOperationsCuda.hpp  -  description
+                             -------------------
+    begin                : Jul 16, 2013
+    copyright            : (C) 2013 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <iostream>
+#include <memory>  // std::unique_ptr
+#include <stdexcept>
+
+#include <TNL/Algorithms/MemoryOperations.h>
+#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Algorithms/Reduction.h>
+#include <TNL/Exceptions/CudaSupportMissing.h>
+
+namespace TNL {
+namespace Algorithms {
+
+template< typename Element >
+void
+MemoryOperations< Devices::Cuda >::
+setElement( Element* data,
+            const Element& value )
+{
+   TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
+   MemoryOperations< Devices::Cuda >::set( data, value, 1 );
+}
+
+template< typename Element >
+Element
+MemoryOperations< Devices::Cuda >::
+getElement( const Element* data )
+{
+   TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." );
+   Element result;
+   MultiDeviceMemoryOperations< void, Devices::Cuda >::template copy< Element, Element, int >( &result, data, 1 );
+   return result;
+}
+
+template< typename Element, typename Index >
+void
+MemoryOperations< Devices::Cuda >::
+set( Element* data,
+     const Element& value,
+     const Index size )
+{
+   if( size == 0 ) return;
+   TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
+   auto kernel = [data, value] __cuda_callable__ ( Index i )
+   {
+      data[ i ] = value;
+   };
+   ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel );
+}
+
+template< typename DestinationElement,
+          typename SourceElement,
+          typename Index >
+void
+MemoryOperations< Devices::Cuda >::
+copy( DestinationElement* destination,
+      const SourceElement* source,
+      const Index size )
+{
+   if( size == 0 ) return;
+   TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
+   TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
+
+   // our ParallelFor kernel is faster than cudaMemcpy
+   auto kernel = [destination, source] __cuda_callable__ ( Index i )
+   {
+      destination[ i ] = source[ i ];
+   };
+   ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel );
+}
+
+template< typename DestinationElement,
+          typename Index,
+          typename SourceIterator >
+void
+MemoryOperations< Devices::Cuda >::
+copyFromIterator( DestinationElement* destination,
+                  Index destinationSize,
+                  SourceIterator first,
+                  SourceIterator last )
+{
+   using BaseType = typename std::remove_cv< DestinationElement >::type;
+   const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(BaseType), destinationSize );
+   std::unique_ptr< BaseType[] > buffer{ new BaseType[ buffer_size ] };
+   Index copiedElements = 0;
+   while( copiedElements < destinationSize && first != last ) {
+      Index i = 0;
+      while( i < buffer_size && first != last )
+         buffer[ i++ ] = *first++;
+      MultiDeviceMemoryOperations< Devices::Cuda, void >::copy( &destination[ copiedElements ], buffer.get(), i );
+      copiedElements += i;
+   }
+   if( first != last )
+      throw std::length_error( "Source iterator is larger than the destination array." );
+}
+
+template< typename Element1,
+          typename Element2,
+          typename Index >
+bool
+MemoryOperations< Devices::Cuda >::
+compare( const Element1* destination,
+         const Element2* source,
+         const Index size )
+{
+   if( size == 0 ) return true;
+   TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
+   TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
+
+   auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return destination[ i ] == source[ i ]; };
+   return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true );
+}
+
+template< typename Element,
+          typename Index >
+bool
+MemoryOperations< Devices::Cuda >::
+containsValue( const Element* data,
+               const Index size,
+               const Element& value )
+{
+   if( size == 0 ) return false;
+   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
+   TNL_ASSERT_GE( size, (Index) 0, "" );
+
+   auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; };
+   return Reduction< Devices::Cuda >::reduce( size, std::logical_or<>{}, fetch, false );
+}
+
+template< typename Element,
+          typename Index >
+bool
+MemoryOperations< Devices::Cuda >::
+containsOnlyValue( const Element* data,
+                   const Index size,
+                   const Element& value )
+{
+   if( size == 0 ) return false;
+   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
+   TNL_ASSERT_GE( size, 0, "" );
+
+   auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; };
+   return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true );
+}
+
+} // namespace Algorithms
+} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsHost.hpp b/src/TNL/Algorithms/MemoryOperationsHost.hpp
similarity index 55%
rename from src/TNL/Containers/Algorithms/ArrayOperationsHost.hpp
rename to src/TNL/Algorithms/MemoryOperationsHost.hpp
index 3351444141e2eb3584d2f582e1be7026fc34532e..cc85975f55700c5da73fc6bca509fffc75b0e7bb 100644
--- a/src/TNL/Containers/Algorithms/ArrayOperationsHost.hpp
+++ b/src/TNL/Algorithms/MemoryOperationsHost.hpp
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          ArrayOperationsHost.hpp  -  description
+                          MemoryOperationsHost.hpp  -  description
                              -------------------
     begin                : Jul 16, 2013
     copyright            : (C) 2013 by Tomas Oberhuber
@@ -12,19 +12,18 @@
 
 #include <type_traits>
 #include <stdexcept>
-#include <string.h>
+#include <algorithm>  // std::copy, std::equal
 
-#include <TNL/ParallelFor.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/MemoryOperations.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Algorithms/Reduction.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 template< typename Element >
 void
-ArrayOperations< Devices::Host >::
+MemoryOperations< Devices::Host >::
 setElement( Element* data,
             const Element& value )
 {
@@ -34,7 +33,7 @@ setElement( Element* data,
 
 template< typename Element >
 Element
-ArrayOperations< Devices::Host >::
+MemoryOperations< Devices::Host >::
 getElement( const Element* data )
 {
    TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." );
@@ -43,7 +42,7 @@ getElement( const Element* data )
 
 template< typename Element, typename Index >
 void
-ArrayOperations< Devices::Host >::
+MemoryOperations< Devices::Host >::
 set( Element* data,
      const Element& value,
      const Index size )
@@ -61,60 +60,47 @@ template< typename DestinationElement,
           typename SourceElement,
           typename Index >
 void
-ArrayOperations< Devices::Host >::
+MemoryOperations< Devices::Host >::
 copy( DestinationElement* destination,
       const SourceElement* source,
       const Index size )
 {
    if( size == 0 ) return;
-   if( std::is_same< DestinationElement, SourceElement >::value &&
-       ( std::is_fundamental< DestinationElement >::value ||
-         std::is_pointer< DestinationElement >::value ) )
-   {
-      // GCC 8.1 complains that we bypass a non-trivial copy-constructor
-      // (in C++17 we could use constexpr if to avoid compiling this branch in that case)
-      #if defined(__GNUC__) && ( __GNUC__ > 8 || ( __GNUC__ == 8 && __GNUC_MINOR__ > 0 ) ) && !defined(__clang__)
-         #pragma GCC diagnostic push
-         #pragma GCC diagnostic ignored "-Wclass-memaccess"
-      #endif
-      memcpy( destination, source, size * sizeof( DestinationElement ) );
-      #if defined(__GNUC__) && !defined(__clang__) && !defined(__NVCC__)
-         #pragma GCC diagnostic pop
-      #endif
-   }
-   else
-   {
+   TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
+   TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
+
+   // our ParallelFor version is faster than std::copy iff we use more than 1 thread
+   if( Devices::Host::isOMPEnabled() && Devices::Host::getMaxThreadsCount() > 1 ) {
       auto kernel = [destination, source]( Index i )
       {
          destination[ i ] = source[ i ];
       };
       ParallelFor< Devices::Host >::exec( (Index) 0, size, kernel );
    }
+   else {
+      // std::copy usually uses std::memcpy for TriviallyCopyable types
+      std::copy( source, source + size, destination );
+   }
 }
 
 template< typename DestinationElement,
           typename Index,
           typename SourceIterator >
 void
-ArrayOperations< Devices::Host >::
+MemoryOperations< Devices::Host >::
 copyFromIterator( DestinationElement* destination,
                   Index destinationSize,
                   SourceIterator first,
                   SourceIterator last )
 {
-   Index i = 0;
-   while( i < destinationSize && first != last )
-      destination[ i++ ] = *first++;
-   if( first != last )
-      throw std::length_error( "Source iterator is larger than the destination array." );
+   MemoryOperations< Devices::Sequential >::copyFromIterator( destination, destinationSize, first, last );
 }
 
-
 template< typename DestinationElement,
           typename SourceElement,
           typename Index >
 bool
-ArrayOperations< Devices::Host >::
+MemoryOperations< Devices::Host >::
 compare( const DestinationElement* destination,
          const SourceElement* source,
          const Index size )
@@ -122,24 +108,21 @@ compare( const DestinationElement* destination,
    if( size == 0 ) return true;
    TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
    TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
-   if( std::is_same< DestinationElement, SourceElement >::value &&
-       ( std::is_fundamental< DestinationElement >::value ||
-         std::is_pointer< DestinationElement >::value ) )
-   {
-      if( memcmp( destination, source, size * sizeof( DestinationElement ) ) != 0 )
-         return false;
+
+   if( Devices::Host::isOMPEnabled() && Devices::Host::getMaxThreadsCount() > 1 ) {
+      auto fetch = [destination, source] ( Index i ) -> bool { return destination[ i ] == source[ i ]; };
+      return Reduction< Devices::Host >::reduce( size, std::logical_and<>{}, fetch, true );
+   }
+   else {
+      // sequential algorithm can return as soon as it finds a mismatch
+      return std::equal( source, source + size, destination );
    }
-   else
-      for( Index i = 0; i < size; i++ )
-         if( ! ( destination[ i ] == source[ i ] ) )
-            return false;
-   return true;
 }
 
 template< typename Element,
           typename Index >
 bool
-ArrayOperations< Devices::Host >::
+MemoryOperations< Devices::Host >::
 containsValue( const Element* data,
                const Index size,
                const Element& value )
@@ -148,16 +131,20 @@ containsValue( const Element* data,
    TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
    TNL_ASSERT_GE( size, 0, "" );
 
-   for( Index i = 0; i < size; i++ )
-      if( data[ i ] == value )
-         return true;
-   return false;
+   if( Devices::Host::isOMPEnabled() && Devices::Host::getMaxThreadsCount() > 1 ) {
+      auto fetch = [=] ( Index i ) -> bool { return data[ i ] == value; };
+      return Reduction< Devices::Host >::reduce( size, std::logical_or<>{}, fetch, false );
+   }
+   else {
+      // sequential algorithm can return as soon as it finds a match
+      return MemoryOperations< Devices::Sequential >::containsValue( data, size, value );
+   }
 }
 
 template< typename Element,
           typename Index >
 bool
-ArrayOperations< Devices::Host >::
+MemoryOperations< Devices::Host >::
 containsOnlyValue( const Element* data,
                    const Index size,
                    const Element& value )
@@ -166,12 +153,15 @@ containsOnlyValue( const Element* data,
    TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
    TNL_ASSERT_GE( size, 0, "" );
 
-   for( Index i = 0; i < size; i++ )
-      if( ! ( data[ i ] == value ) )
-         return false;
-   return true;
+   if( Devices::Host::isOMPEnabled() && Devices::Host::getMaxThreadsCount() > 1 ) {
+      auto fetch = [data, value] ( Index i ) -> bool { return data[ i ] == value; };
+      return Reduction< Devices::Host >::reduce( size, std::logical_and<>{}, fetch, true );
+   }
+   else {
+      // sequential algorithm can return as soon as it finds a mismatch
+      return MemoryOperations< Devices::Sequential >::containsOnlyValue( data, size, value );
+   }
 }
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Algorithms/MemoryOperationsSequential.hpp b/src/TNL/Algorithms/MemoryOperationsSequential.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..9e5ad25b1392ccd093952b0dbb25b941370eb833
--- /dev/null
+++ b/src/TNL/Algorithms/MemoryOperationsSequential.hpp
@@ -0,0 +1,135 @@
+/***************************************************************************
+                          MemoryOperationsSequential.hpp  -  description
+                             -------------------
+    begin                : Apr 8, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Algorithms/MemoryOperations.h>
+
+namespace TNL {
+namespace Algorithms {
+
+template< typename Element >
+__cuda_callable__
+void
+MemoryOperations< Devices::Sequential >::
+setElement( Element* data,
+            const Element& value )
+{
+   *data = value;
+}
+
+template< typename Element >
+__cuda_callable__
+Element
+MemoryOperations< Devices::Sequential >::
+getElement( const Element* data )
+{
+   return *data;
+}
+
+template< typename Element, typename Index >
+__cuda_callable__
+void
+MemoryOperations< Devices::Sequential >::
+set( Element* data,
+     const Element& value,
+     const Index size )
+{
+   for( Index i = 0; i < size; i ++ )
+      data[ i ] = value;
+}
+
+template< typename DestinationElement,
+          typename SourceElement,
+          typename Index >
+__cuda_callable__
+void
+MemoryOperations< Devices::Sequential >::
+copy( DestinationElement* destination,
+      const SourceElement* source,
+      const Index size )
+{
+   for( Index i = 0; i < size; i ++ )
+      destination[ i ] = source[ i ];
+}
+
+template< typename DestinationElement,
+          typename Index,
+          typename SourceIterator >
+void
+MemoryOperations< Devices::Sequential >::
+copyFromIterator( DestinationElement* destination,
+                  Index destinationSize,
+                  SourceIterator first,
+                  SourceIterator last )
+{
+   Index i = 0;
+   while( i < destinationSize && first != last )
+      destination[ i++ ] = *first++;
+   if( first != last )
+      throw std::length_error( "Source iterator is larger than the destination array." );
+}
+
+template< typename Element1,
+          typename Element2,
+          typename Index >
+__cuda_callable__
+bool
+MemoryOperations< Devices::Sequential >::
+compare( const Element1* destination,
+         const Element2* source,
+         const Index size )
+{
+   for( Index i = 0; i < size; i++ )
+      if( ! ( destination[ i ] == source[ i ] ) )
+         return false;
+   return true;
+}
+
+template< typename Element,
+          typename Index >
+__cuda_callable__
+bool
+MemoryOperations< Devices::Sequential >::
+containsValue( const Element* data,
+               const Index size,
+               const Element& value )
+{
+   if( size == 0 ) return false;
+   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
+   TNL_ASSERT_GE( size, 0, "" );
+
+   for( Index i = 0; i < size; i++ )
+      if( data[ i ] == value )
+         return true;
+   return false;
+}
+
+template< typename Element,
+          typename Index >
+__cuda_callable__
+bool
+MemoryOperations< Devices::Sequential >::
+containsOnlyValue( const Element* data,
+                   const Index size,
+                   const Element& value )
+{
+   if( size == 0 ) return false;
+   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
+   TNL_ASSERT_GE( size, 0, "" );
+
+   for( Index i = 0; i < size; i++ )
+      if( ! ( data[ i ] == value ) )
+         return false;
+   return true;
+}
+
+} // namespace Algorithms
+} // namespace TNL
diff --git a/src/TNL/Algorithms/MultiDeviceMemoryOperations.h b/src/TNL/Algorithms/MultiDeviceMemoryOperations.h
new file mode 100644
index 0000000000000000000000000000000000000000..48e5ad64750c5dc8b7a84a9b4346b345e6ff3f1a
--- /dev/null
+++ b/src/TNL/Algorithms/MultiDeviceMemoryOperations.h
@@ -0,0 +1,278 @@
+/***************************************************************************
+                          MultiDeviceMemoryOperations.h  -  description
+                             -------------------
+    begin                : Aug 18, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Algorithms/MemoryOperations.h>
+
+namespace TNL {
+namespace Algorithms {
+
+template< typename DestinationDevice,
+          typename SourceDevice = DestinationDevice >
+struct MultiDeviceMemoryOperations
+{
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+   static void copy( DestinationElement* destination,
+                     const SourceElement* source,
+                     const Index size )
+   {
+      // use DestinationDevice, unless it is void
+      using Device = std::conditional_t< std::is_void< DestinationDevice >::value, SourceDevice, DestinationDevice >;
+      MemoryOperations< Device >::copy( destination, source, size );
+   }
+
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+   static bool compare( const DestinationElement* destination,
+                        const SourceElement* source,
+                        const Index size )
+   {
+      // use DestinationDevice, unless it is void
+      using Device = std::conditional_t< std::is_void< DestinationDevice >::value, SourceDevice, DestinationDevice >;
+      return MemoryOperations< Device >::compare( destination, source, size );
+   }
+};
+
+
+template< typename DeviceType >
+struct MultiDeviceMemoryOperations< Devices::Cuda, DeviceType >
+{
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+   static void copy( DestinationElement* destination,
+                     const SourceElement* source,
+                     const Index size );
+
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+   static bool compare( const DestinationElement* destination,
+                        const SourceElement* source,
+                        const Index size );
+};
+
+template< typename DeviceType >
+struct MultiDeviceMemoryOperations< DeviceType, Devices::Cuda >
+{
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+   static void copy( DestinationElement* destination,
+                     const SourceElement* source,
+                     const Index size );
+
+   template< typename Element1,
+             typename Element2,
+             typename Index >
+   static bool compare( const Element1* destination,
+                        const Element2* source,
+                        const Index size );
+};
+
+
+// CUDA <-> CUDA to disambiguate from partial specializations below
+template<>
+struct MultiDeviceMemoryOperations< Devices::Cuda, Devices::Cuda >
+{
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+   static void copy( DestinationElement* destination,
+                     const SourceElement* source,
+                     const Index size )
+   {
+      MemoryOperations< Devices::Cuda >::copy( destination, source, size );
+   }
+
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+   static bool compare( const DestinationElement* destination,
+                        const SourceElement* source,
+                        const Index size )
+   {
+      return MemoryOperations< Devices::Cuda >::compare( destination, source, size );
+   }
+};
+
+
+/****
+ * Operations CUDA -> Host
+ */
+template< typename DeviceType >
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+void
+MultiDeviceMemoryOperations< DeviceType, Devices::Cuda >::
+copy( DestinationElement* destination,
+      const SourceElement* source,
+      const Index size )
+{
+   if( size == 0 ) return;
+   TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
+   TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
+#ifdef HAVE_CUDA
+   if( std::is_same< DestinationElement, SourceElement >::value )
+   {
+      if( cudaMemcpy( destination,
+                      source,
+                      size * sizeof( DestinationElement ),
+                      cudaMemcpyDeviceToHost ) != cudaSuccess )
+         std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
+      TNL_CHECK_CUDA_DEVICE;
+   }
+   else
+   {
+      using BaseType = typename std::remove_cv< SourceElement >::type;
+      const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(BaseType), size );
+      std::unique_ptr< BaseType[] > buffer{ new BaseType[ buffer_size ] };
+      Index i = 0;
+      while( i < size )
+      {
+         if( cudaMemcpy( (void*) buffer.get(),
+                         (void*) &source[ i ],
+                         TNL::min( size - i, buffer_size ) * sizeof(SourceElement),
+                         cudaMemcpyDeviceToHost ) != cudaSuccess )
+            std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
+         TNL_CHECK_CUDA_DEVICE;
+         int j = 0;
+         while( j < buffer_size && i + j < size )
+         {
+            destination[ i + j ] = buffer[ j ];
+            j++;
+         }
+         i += j;
+      }
+   }
+#else
+   throw Exceptions::CudaSupportMissing();
+#endif
+}
+
+
+template< typename DeviceType >
+   template< typename Element1,
+             typename Element2,
+             typename Index >
+bool
+MultiDeviceMemoryOperations< DeviceType, Devices::Cuda >::
+compare( const Element1* destination,
+         const Element2* source,
+         const Index size )
+{
+   if( size == 0 ) return true;
+   /***
+    * Here, destination is on host and source is on CUDA device.
+    */
+   TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
+   TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
+   TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." );
+#ifdef HAVE_CUDA
+   const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(Element2), size );
+   std::unique_ptr< Element2[] > host_buffer{ new Element2[ buffer_size ] };
+   Index compared = 0;
+   while( compared < size )
+   {
+      const int transfer = TNL::min( size - compared, buffer_size );
+      if( cudaMemcpy( (void*) host_buffer.get(),
+                      (void*) &source[ compared ],
+                      transfer * sizeof(Element2),
+                      cudaMemcpyDeviceToHost ) != cudaSuccess )
+         std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
+      TNL_CHECK_CUDA_DEVICE;
+      if( ! MemoryOperations< Devices::Host >::compare( &destination[ compared ], host_buffer.get(), transfer ) )
+         return false;
+      compared += transfer;
+   }
+   return true;
+#else
+   throw Exceptions::CudaSupportMissing();
+#endif
+}
+
+/****
+ * Operations Host -> CUDA
+ */
+template< typename DeviceType >
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+void
+MultiDeviceMemoryOperations< Devices::Cuda, DeviceType >::
+copy( DestinationElement* destination,
+      const SourceElement* source,
+      const Index size )
+{
+   if( size == 0 ) return;
+   TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
+   TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
+   TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." );
+#ifdef HAVE_CUDA
+   if( std::is_same< DestinationElement, SourceElement >::value )
+   {
+      if( cudaMemcpy( destination,
+                      source,
+                      size * sizeof( DestinationElement ),
+                      cudaMemcpyHostToDevice ) != cudaSuccess )
+         std::cerr << "Transfer of data from host to CUDA device failed." << std::endl;
+      TNL_CHECK_CUDA_DEVICE;
+   }
+   else
+   {
+      const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(DestinationElement), size );
+      std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ buffer_size ] };
+      Index i = 0;
+      while( i < size )
+      {
+         int j = 0;
+         while( j < buffer_size && i + j < size )
+         {
+            buffer[ j ] = source[ i + j ];
+            j++;
+         }
+         if( cudaMemcpy( (void*) &destination[ i ],
+                         (void*) buffer.get(),
+                         j * sizeof( DestinationElement ),
+                         cudaMemcpyHostToDevice ) != cudaSuccess )
+            std::cerr << "Transfer of data from host to CUDA device failed." << std::endl;
+         TNL_CHECK_CUDA_DEVICE;
+         i += j;
+      }
+   }
+#else
+   throw Exceptions::CudaSupportMissing();
+#endif
+}
+
+template< typename DeviceType >
+   template< typename Element1,
+             typename Element2,
+             typename Index >
+bool
+MultiDeviceMemoryOperations< Devices::Cuda, DeviceType >::
+compare( const Element1* hostData,
+         const Element2* deviceData,
+         const Index size )
+{
+   if( size == 0 ) return true;
+   TNL_ASSERT_TRUE( hostData, "Attempted to compare data through a nullptr." );
+   TNL_ASSERT_TRUE( deviceData, "Attempted to compare data through a nullptr." );
+   TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." );
+   return MultiDeviceMemoryOperations< DeviceType, Devices::Cuda >::compare( deviceData, hostData, size );
+}
+
+} // namespace Algorithms
+} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/Multireduction.h b/src/TNL/Algorithms/Multireduction.h
similarity index 73%
rename from src/TNL/Containers/Algorithms/Multireduction.h
rename to src/TNL/Algorithms/Multireduction.h
index 9802a295356db6e53c7f9c3d809c3dc9c70b38c8..8e63fa7eabce3e8d9d837770794d991fd12705e7 100644
--- a/src/TNL/Containers/Algorithms/Multireduction.h
+++ b/src/TNL/Algorithms/Multireduction.h
@@ -14,16 +14,45 @@
 
 #include <functional>  // reduction functions like std::plus, std::logical_and, std::logical_or etc.
 
+#include <TNL/Devices/Sequential.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 template< typename Device >
 struct Multireduction;
 
+template<>
+struct Multireduction< Devices::Sequential >
+{
+   /**
+    * Parameters:
+    *    zero: starting value for reduction
+    *    dataFetcher: callable object such that `dataFetcher( i, j )` yields
+    *                 the i-th value to be reduced from the j-th dataset
+    *                 (i = 0,...,size-1; j = 0,...,n-1)
+    *    reduction: callable object representing the reduction operation
+    *               for example, it can be an instance of std::plus, std::logical_and,
+    *               std::logical_or etc.
+    *    size: the size of each dataset
+    *    n: number of datasets to be reduced
+    *    result: output array of size = n
+    */
+   template< typename Result,
+             typename DataFetcher,
+             typename Reduction,
+             typename Index >
+   static constexpr void
+   reduce( const Result zero,
+           DataFetcher dataFetcher,
+           const Reduction reduction,
+           const Index size,
+           const int n,
+           Result* result );
+};
+
 template<>
 struct Multireduction< Devices::Host >
 {
@@ -83,7 +112,6 @@ struct Multireduction< Devices::Cuda >
 };
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
 
 #include "Multireduction.hpp"
diff --git a/src/TNL/Containers/Algorithms/Multireduction.hpp b/src/TNL/Algorithms/Multireduction.hpp
similarity index 65%
rename from src/TNL/Containers/Algorithms/Multireduction.hpp
rename to src/TNL/Algorithms/Multireduction.hpp
index 8c74ee9ac479a12c30c0e2b49df787c5bd2c277d..0bfead2871a5d216522845680c02912cdcd1d8b6 100644
--- a/src/TNL/Containers/Algorithms/Multireduction.hpp
+++ b/src/TNL/Algorithms/Multireduction.hpp
@@ -17,9 +17,9 @@
 //#define CUDA_REDUCTION_PROFILING
 
 #include <TNL/Assert.h>
-#include <TNL/Containers/Algorithms/Multireduction.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
-#include <TNL/Containers/Algorithms/CudaMultireductionKernel.h>
+#include <TNL/Algorithms/Multireduction.h>
+#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
+#include <TNL/Algorithms/CudaMultireductionKernel.h>
 
 #ifdef CUDA_REDUCTION_PROFILING
 #include <TNL/Timer.h>
@@ -27,9 +27,85 @@
 #endif
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
+template< typename Result,
+          typename DataFetcher,
+          typename Reduction,
+          typename Index >
+void constexpr
+Multireduction< Devices::Sequential >::
+reduce( const Result zero,
+        DataFetcher dataFetcher,
+        const Reduction reduction,
+        const Index size,
+        const int n,
+        Result* result )
+{
+   TNL_ASSERT_GT( size, 0, "The size of datasets must be positive." );
+   TNL_ASSERT_GT( n, 0, "The number of datasets must be positive." );
+
+   constexpr int block_size = 128;
+   const int blocks = size / block_size;
+
+   if( blocks > 1 ) {
+      // initialize array for unrolled results
+      // (it is accessed as a row-major matrix with n rows and 4 columns)
+      Result r[ n * 4 ];
+      for( int k = 0; k < n * 4; k++ )
+         r[ k ] = zero;
+
+      // main reduction (explicitly unrolled loop)
+      for( int b = 0; b < blocks; b++ ) {
+         const Index offset = b * block_size;
+         for( int k = 0; k < n; k++ ) {
+            Result* _r = r + 4 * k;
+            for( int i = 0; i < block_size; i += 4 ) {
+               _r[ 0 ] = reduction( _r[ 0 ], dataFetcher( offset + i,     k ) );
+               _r[ 1 ] = reduction( _r[ 1 ], dataFetcher( offset + i + 1, k ) );
+               _r[ 2 ] = reduction( _r[ 2 ], dataFetcher( offset + i + 2, k ) );
+               _r[ 3 ] = reduction( _r[ 3 ], dataFetcher( offset + i + 3, k ) );
+            }
+         }
+      }
+
+      // reduction of the last, incomplete block (not unrolled)
+      for( int k = 0; k < n; k++ ) {
+         Result* _r = r + 4 * k;
+         for( Index i = blocks * block_size; i < size; i++ )
+            _r[ 0 ] = reduction( _r[ 0 ], dataFetcher( i, k ) );
+      }
+
+      // reduction of unrolled results
+      for( int k = 0; k < n; k++ ) {
+         Result* _r = r + 4 * k;
+         _r[ 0 ] = reduction( _r[ 0 ], _r[ 1 ] );
+         _r[ 0 ] = reduction( _r[ 0 ], _r[ 2 ] );
+         _r[ 0 ] = reduction( _r[ 0 ], _r[ 3 ] );
+
+         // copy the result into the output parameter
+         result[ k ] = _r[ 0 ];
+      }
+   }
+   else {
+      for( int k = 0; k < n; k++ )
+         result[ k ] = zero;
+
+      for( int b = 0; b < blocks; b++ ) {
+         const Index offset = b * block_size;
+         for( int k = 0; k < n; k++ ) {
+            for( int i = 0; i < block_size; i++ )
+               result[ k ] = reduction( result[ k ], dataFetcher( offset + i, k ) );
+         }
+      }
+
+      for( int k = 0; k < n; k++ ) {
+         for( Index i = blocks * block_size; i < size; i++ )
+            result[ k ] = reduction( result[ k ], dataFetcher( i, k ) );
+      }
+   }
+}
+
 template< typename Result,
           typename DataFetcher,
           typename Reduction,
@@ -46,10 +122,10 @@ reduce( const Result zero,
    TNL_ASSERT_GT( size, 0, "The size of datasets must be positive." );
    TNL_ASSERT_GT( n, 0, "The number of datasets must be positive." );
 
+#ifdef HAVE_OPENMP
    constexpr int block_size = 128;
    const int blocks = size / block_size;
 
-#ifdef HAVE_OPENMP
    if( Devices::Host::isOMPEnabled() && blocks >= 2 ) {
       const int threads = TNL::min( blocks, Devices::Host::getMaxThreadsCount() );
 #pragma omp parallel num_threads(threads)
@@ -107,67 +183,9 @@ reduce( const Result zero,
          }
       }
    }
-   else {
-#endif
-      if( blocks > 1 ) {
-         // initialize array for unrolled results
-         // (it is accessed as a row-major matrix with n rows and 4 columns)
-         Result r[ n * 4 ];
-         for( int k = 0; k < n * 4; k++ )
-            r[ k ] = zero;
-
-         // main reduction (explicitly unrolled loop)
-         for( int b = 0; b < blocks; b++ ) {
-            const Index offset = b * block_size;
-            for( int k = 0; k < n; k++ ) {
-               Result* _r = r + 4 * k;
-               for( int i = 0; i < block_size; i += 4 ) {
-                  _r[ 0 ] = reduction( _r[ 0 ], dataFetcher( offset + i,     k ) );
-                  _r[ 1 ] = reduction( _r[ 1 ], dataFetcher( offset + i + 1, k ) );
-                  _r[ 2 ] = reduction( _r[ 2 ], dataFetcher( offset + i + 2, k ) );
-                  _r[ 3 ] = reduction( _r[ 3 ], dataFetcher( offset + i + 3, k ) );
-               }
-            }
-         }
-
-         // reduction of the last, incomplete block (not unrolled)
-         for( int k = 0; k < n; k++ ) {
-            Result* _r = r + 4 * k;
-            for( Index i = blocks * block_size; i < size; i++ )
-               _r[ 0 ] = reduction( _r[ 0 ], dataFetcher( i, k ) );
-         }
-
-         // reduction of unrolled results
-         for( int k = 0; k < n; k++ ) {
-            Result* _r = r + 4 * k;
-            _r[ 0 ] = reduction( _r[ 0 ], _r[ 1 ] );
-            _r[ 0 ] = reduction( _r[ 0 ], _r[ 2 ] );
-            _r[ 0 ] = reduction( _r[ 0 ], _r[ 3 ] );
-
-            // copy the result into the output parameter
-            result[ k ] = _r[ 0 ];
-         }
-      }
-      else {
-         for( int k = 0; k < n; k++ )
-            result[ k ] = zero;
-
-         for( int b = 0; b < blocks; b++ ) {
-            const Index offset = b * block_size;
-            for( int k = 0; k < n; k++ ) {
-               for( int i = 0; i < block_size; i++ )
-                  result[ k ] = reduction( result[ k ], dataFetcher( offset + i, k ) );
-            }
-         }
-
-         for( int k = 0; k < n; k++ ) {
-            for( Index i = blocks * block_size; i < size; i++ )
-               result[ k ] = reduction( result[ k ], dataFetcher( i, k ) );
-         }
-      }
-#ifdef HAVE_OPENMP
-   }
+   else
 #endif
+      Multireduction< Devices::Sequential >::reduce( zero, dataFetcher, reduction, size, n, result );
 }
 
 template< typename Result,
@@ -205,7 +223,7 @@ reduce( const Result zero,
 
    // transfer the reduced data from device to host
    std::unique_ptr< Result[] > resultArray{ new Result[ n * reducedSize ] };
-   ArrayOperations< Devices::Host, Devices::Cuda >::copy( resultArray.get(), deviceAux1, n * reducedSize );
+   MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( resultArray.get(), deviceAux1, n * reducedSize );
 
    #ifdef CUDA_REDUCTION_PROFILING
       timer.stop();
@@ -216,7 +234,7 @@ reduce( const Result zero,
 
    // finish the reduction on the host
    auto dataFetcherFinish = [&] ( int i, int k ) { return resultArray[ i + k * reducedSize ]; };
-   Multireduction< Devices::Host >::reduce( zero, dataFetcherFinish, reduction, reducedSize, n, hostResult );
+   Multireduction< Devices::Sequential >::reduce( zero, dataFetcherFinish, reduction, reducedSize, n, hostResult );
 
    #ifdef CUDA_REDUCTION_PROFILING
       timer.stop();
@@ -225,5 +243,4 @@ reduce( const Result zero,
 };
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/ParallelFor.h b/src/TNL/Algorithms/ParallelFor.h
similarity index 79%
rename from src/TNL/ParallelFor.h
rename to src/TNL/Algorithms/ParallelFor.h
index 04af2740807b9139ca0b8452b9c1b7bc52f5a8c8..6d5e917ba4ac07246322a82c7d5edec38a1cb02b 100644
--- a/src/TNL/ParallelFor.h
+++ b/src/TNL/Algorithms/ParallelFor.h
@@ -10,9 +10,13 @@
 
 #pragma once
 
+#include <TNL/Devices/Sequential.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/Devices/CudaDeviceInfo.h>
+#include <TNL/Cuda/CheckDevice.h>
+#include <TNL/Cuda/DeviceInfo.h>
+#include <TNL/Cuda/LaunchHelpers.h>
+#include <TNL/Exceptions/CudaSupportMissing.h>
 #include <TNL/Math.h>
 
 /****
@@ -27,12 +31,57 @@
  */
 
 namespace TNL {
+namespace Algorithms {
 
 enum ParallelForMode { SynchronousMode, AsynchronousMode };
-   
-template< typename Device = Devices::Host,
+
+template< typename Device = Devices::Sequential,
           ParallelForMode Mode = SynchronousMode >
 struct ParallelFor
+{
+   template< typename Index,
+             typename Function,
+             typename... FunctionArgs >
+   static void exec( Index start, Index end, Function f, FunctionArgs... args )
+   {
+      for( Index i = start; i < end; i++ )
+         f( i, args... );
+   }
+};
+
+template< typename Device = Devices::Sequential,
+          ParallelForMode Mode = SynchronousMode >
+struct ParallelFor2D
+{
+   template< typename Index,
+             typename Function,
+             typename... FunctionArgs >
+   static void exec( Index startX, Index startY, Index endX, Index endY, Function f, FunctionArgs... args )
+   {
+      for( Index j = startY; j < endY; j++ )
+      for( Index i = startX; i < endX; i++ )
+         f( i, j, args... );
+   }
+};
+
+template< typename Device = Devices::Sequential,
+          ParallelForMode Mode = SynchronousMode >
+struct ParallelFor3D
+{
+   template< typename Index,
+             typename Function,
+             typename... FunctionArgs >
+   static void exec( Index startX, Index startY, Index startZ, Index endX, Index endY, Index endZ, Function f, FunctionArgs... args )
+   {
+      for( Index k = startZ; k < endZ; k++ )
+      for( Index j = startY; j < endY; j++ )
+      for( Index i = startX; i < endX; i++ )
+         f( i, j, k, args... );
+   }
+};
+
+template< ParallelForMode Mode >
+struct ParallelFor< Devices::Host, Mode >
 {
    template< typename Index,
              typename Function,
@@ -41,26 +90,23 @@ struct ParallelFor
    {
 #ifdef HAVE_OPENMP
       // Benchmarks show that this is significantly faster compared
-      // to '#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() && end - start > 512 )'
-      if( TNL::Devices::Host::isOMPEnabled() && end - start > 512 )
+      // to '#pragma omp parallel for if( Devices::Host::isOMPEnabled() && end - start > 512 )'
+      if( Devices::Host::isOMPEnabled() && end - start > 512 )
       {
-#pragma omp parallel for
+         #pragma omp parallel for
          for( Index i = start; i < end; i++ )
             f( i, args... );
       }
       else
-         for( Index i = start; i < end; i++ )
-            f( i, args... );
+         ParallelFor< Devices::Sequential >::exec( start, end, f, args... );
 #else
-      for( Index i = start; i < end; i++ )
-         f( i, args... );
+      ParallelFor< Devices::Sequential >::exec( start, end, f, args... );
 #endif
    }
 };
 
-template< typename Device = Devices::Host,
-          ParallelForMode Mode = SynchronousMode >
-struct ParallelFor2D
+template< ParallelForMode Mode >
+struct ParallelFor2D< Devices::Host, Mode >
 {
    template< typename Index,
              typename Function,
@@ -69,30 +115,24 @@ struct ParallelFor2D
    {
 #ifdef HAVE_OPENMP
       // Benchmarks show that this is significantly faster compared
-      // to '#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() )'
-      if( TNL::Devices::Host::isOMPEnabled() )
+      // to '#pragma omp parallel for if( Devices::Host::isOMPEnabled() )'
+      if( Devices::Host::isOMPEnabled() )
       {
-#pragma omp parallel for
-         for( Index j = startY; j < endY; j++ )
-         for( Index i = startX; i < endX; i++ )
-            f( i, j, args... );
-      }
-      else {
+         #pragma omp parallel for
          for( Index j = startY; j < endY; j++ )
          for( Index i = startX; i < endX; i++ )
             f( i, j, args... );
       }
+      else
+         ParallelFor2D< Devices::Sequential >::exec( startX, startY, endX, endY, f, args... );
 #else
-      for( Index j = startY; j < endY; j++ )
-      for( Index i = startX; i < endX; i++ )
-         f( i, j, args... );
+      ParallelFor2D< Devices::Sequential >::exec( startX, startY, endX, endY, f, args... );
 #endif
    }
 };
 
-template< typename Device = Devices::Host,
-          ParallelForMode Mode = SynchronousMode >
-struct ParallelFor3D
+template< ParallelForMode Mode >
+struct ParallelFor3D< Devices::Host, Mode >
 {
    template< typename Index,
              typename Function,
@@ -101,27 +141,19 @@ struct ParallelFor3D
    {
 #ifdef HAVE_OPENMP
       // Benchmarks show that this is significantly faster compared
-      // to '#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() )'
-     if( TNL::Devices::Host::isOMPEnabled() )
-     {
-#pragma omp parallel for collapse(2)
-      for( Index k = startZ; k < endZ; k++ )
-      for( Index j = startY; j < endY; j++ )
-      for( Index i = startX; i < endX; i++ )
-         f( i, j, k, args... );
-      }
-      else
+      // to '#pragma omp parallel for if( Devices::Host::isOMPEnabled() )'
+      if( Devices::Host::isOMPEnabled() )
       {
+         #pragma omp parallel for collapse(2)
          for( Index k = startZ; k < endZ; k++ )
          for( Index j = startY; j < endY; j++ )
          for( Index i = startX; i < endX; i++ )
             f( i, j, k, args... );
       }
+      else
+         ParallelFor3D< Devices::Sequential >::exec( startX, startY, startZ, endX, endY, endZ, f, args... );
 #else
-      for( Index k = startZ; k < endZ; k++ )
-      for( Index j = startY; j < endY; j++ )
-      for( Index i = startX; i < endX; i++ )
-         f( i, j, k, args... );
+      ParallelFor3D< Devices::Sequential >::exec( startX, startY, startZ, endX, endY, endZ, f, args... );
 #endif
    }
 };
@@ -203,14 +235,14 @@ struct ParallelFor< Devices::Cuda, Mode >
       if( end > start ) {
          dim3 blockSize( 256 );
          dim3 gridSize;
-         gridSize.x = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( end - start, blockSize.x ) );
+         gridSize.x = TNL::min( Cuda::getMaxGridSize(), Cuda::getNumberOfBlocks( end - start, blockSize.x ) );
 
          if( (std::size_t) blockSize.x * gridSize.x >= (std::size_t) end - start )
             ParallelForKernel< false ><<< gridSize, blockSize >>>( start, end, f, args... );
          else {
             // decrease the grid size and align to the number of multiprocessors
-            const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
-            gridSize.x = TNL::min( desGridSize, Devices::Cuda::getNumberOfBlocks( end - start, blockSize.x ) );
+            const int desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() );
+            gridSize.x = TNL::min( desGridSize, Cuda::getNumberOfBlocks( end - start, blockSize.x ) );
             ParallelForKernel< true ><<< gridSize, blockSize >>>( start, end, f, args... );
          }
 
@@ -253,8 +285,8 @@ struct ParallelFor2D< Devices::Cuda, Mode >
             blockSize.y = TNL::min( 8, sizeY );
          }
          dim3 gridSize;
-         gridSize.x = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( sizeX, blockSize.x ) );
-         gridSize.y = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( sizeY, blockSize.y ) );
+         gridSize.x = TNL::min( Cuda::getMaxGridSize(), Cuda::getNumberOfBlocks( sizeX, blockSize.x ) );
+         gridSize.y = TNL::min( Cuda::getMaxGridSize(), Cuda::getNumberOfBlocks( sizeY, blockSize.y ) );
 
          dim3 gridCount;
          gridCount.x = roundUpDivision( sizeX, blockSize.x * gridSize.x );
@@ -337,9 +369,9 @@ struct ParallelFor3D< Devices::Cuda, Mode >
             blockSize.z = TNL::min( 4, sizeZ );
          }
          dim3 gridSize;
-         gridSize.x = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( sizeX, blockSize.x ) );
-         gridSize.y = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( sizeY, blockSize.y ) );
-         gridSize.z = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( sizeZ, blockSize.z ) );
+         gridSize.x = TNL::min( Cuda::getMaxGridSize(), Cuda::getNumberOfBlocks( sizeX, blockSize.x ) );
+         gridSize.y = TNL::min( Cuda::getMaxGridSize(), Cuda::getNumberOfBlocks( sizeY, blockSize.y ) );
+         gridSize.z = TNL::min( Cuda::getMaxGridSize(), Cuda::getNumberOfBlocks( sizeZ, blockSize.z ) );
 
          dim3 gridCount;
          gridCount.x = roundUpDivision( sizeX, blockSize.x * gridSize.x );
@@ -383,4 +415,5 @@ struct ParallelFor3D< Devices::Cuda, Mode >
    }
 };
 
+} // namespace Algorithms
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/Reduction.h b/src/TNL/Algorithms/Reduction.h
similarity index 91%
rename from src/TNL/Containers/Algorithms/Reduction.h
rename to src/TNL/Algorithms/Reduction.h
index 83cedb01fcb5a93f4b225c92cc76f9500f72082d..c0d62684d57ccc0fa225f8ec0d56f92e8b0e904d 100644
--- a/src/TNL/Containers/Algorithms/Reduction.h
+++ b/src/TNL/Algorithms/Reduction.h
@@ -15,11 +15,11 @@
 #include <utility>  // std::pair
 #include <functional>  // reduction functions like std::plus, std::logical_and, std::logical_or etc.
 
+#include <TNL/Devices/Sequential.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 /**
@@ -37,6 +37,30 @@ namespace Algorithms {
 template< typename Device >
 struct Reduction;
 
+template<>
+struct Reduction< Devices::Sequential >
+{
+   template< typename Index,
+             typename Result,
+             typename ReductionOperation,
+             typename DataFetcher >
+   static constexpr Result
+   reduce( const Index size,
+           const ReductionOperation& reduction,
+           DataFetcher& dataFetcher,
+           const Result& zero );
+
+   template< typename Index,
+             typename Result,
+             typename ReductionOperation,
+             typename DataFetcher >
+   static constexpr std::pair< Index, Result >
+   reduceWithArgument( const Index size,
+                       const ReductionOperation& reduction,
+                       DataFetcher& dataFetcher,
+                       const Result& zero );
+};
+
 template<>
 struct Reduction< Devices::Host >
 {
@@ -236,7 +260,6 @@ struct Reduction< Devices::Cuda >
 };
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
 
-#include <TNL/Containers/Algorithms/Reduction.hpp>
+#include <TNL/Algorithms/Reduction.hpp>
diff --git a/src/TNL/Containers/Algorithms/Reduction.hpp b/src/TNL/Algorithms/Reduction.hpp
similarity index 73%
rename from src/TNL/Containers/Algorithms/Reduction.hpp
rename to src/TNL/Algorithms/Reduction.hpp
index 229af13797f82a4f27f67bc81a5bfb6886a65604..b07f04445e7481679daa8e119f87adce16fe37df 100644
--- a/src/TNL/Containers/Algorithms/Reduction.hpp
+++ b/src/TNL/Algorithms/Reduction.hpp
@@ -16,9 +16,9 @@
 
 //#define CUDA_REDUCTION_PROFILING
 
-#include <TNL/Containers/Algorithms/Reduction.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
-#include <TNL/Containers/Algorithms/CudaReductionKernel.h>
+#include <TNL/Algorithms/Reduction.h>
+#include <TNL/Algorithms/CudaReductionKernel.h>
+#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
 
 #ifdef CUDA_REDUCTION_PROFILING
 #include <iostream>
@@ -26,7 +26,6 @@
 #endif
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 /****
@@ -36,8 +35,115 @@ namespace Algorithms {
  */
 static constexpr int Reduction_minGpuDataSize = 256;//65536; //16384;//1024;//256;
 
-////
-// Reduction on host
+template< typename Index,
+          typename Result,
+          typename ReductionOperation,
+          typename DataFetcher >
+constexpr Result
+Reduction< Devices::Sequential >::
+reduce( const Index size,
+        const ReductionOperation& reduction,
+        DataFetcher& dataFetcher,
+        const Result& zero )
+{
+   constexpr int block_size = 128;
+   const int blocks = size / block_size;
+
+   if( blocks > 1 ) {
+      // initialize array for unrolled results
+      Result r[ 4 ] = { zero, zero, zero, zero };
+
+      // main reduction (explicitly unrolled loop)
+      for( int b = 0; b < blocks; b++ ) {
+         const Index offset = b * block_size;
+         for( int i = 0; i < block_size; i += 4 ) {
+            r[ 0 ] = reduction( r[ 0 ], dataFetcher( offset + i ) );
+            r[ 1 ] = reduction( r[ 1 ], dataFetcher( offset + i + 1 ) );
+            r[ 2 ] = reduction( r[ 2 ], dataFetcher( offset + i + 2 ) );
+            r[ 3 ] = reduction( r[ 3 ], dataFetcher( offset + i + 3 ) );
+         }
+      }
+
+      // reduction of the last, incomplete block (not unrolled)
+      for( Index i = blocks * block_size; i < size; i++ )
+         r[ 0 ] = reduction( r[ 0 ], dataFetcher( i ) );
+
+      // reduction of unrolled results
+      r[ 0 ] = reduction( r[ 0 ], r[ 2 ] );
+      r[ 1 ] = reduction( r[ 1 ], r[ 3 ] );
+      r[ 0 ] = reduction( r[ 0 ], r[ 1 ] );
+      return r[ 0 ];
+   }
+   else {
+      Result result = zero;
+      for( Index i = 0; i < size; i++ )
+         result = reduction( result, dataFetcher( i ) );
+      return result;
+   }
+}
+
+template< typename Index,
+          typename Result,
+          typename ReductionOperation,
+          typename DataFetcher >
+constexpr std::pair< Index, Result >
+Reduction< Devices::Sequential >::
+reduceWithArgument( const Index size,
+                    const ReductionOperation& reduction,
+                    DataFetcher& dataFetcher,
+                    const Result& zero )
+{
+   constexpr int block_size = 128;
+   const int blocks = size / block_size;
+
+   if( blocks > 1 ) {
+      // initialize array for unrolled results
+      Index arg[ 4 ] = { 0, 0, 0, 0 };
+      Result r[ 4 ] = { zero, zero, zero, zero };
+      bool initialized( false );
+
+      // main reduction (explicitly unrolled loop)
+      for( int b = 0; b < blocks; b++ ) {
+         const Index offset = b * block_size;
+         for( int i = 0; i < block_size; i += 4 ) {
+            if( ! initialized )
+            {
+               arg[ 0 ] = offset + i;
+               arg[ 1 ] = offset + i + 1;
+               arg[ 2 ] = offset + i + 2;
+               arg[ 3 ] = offset + i + 3;
+               r[ 0 ] = dataFetcher( offset + i );
+               r[ 1 ] = dataFetcher( offset + i + 1 );
+               r[ 2 ] = dataFetcher( offset + i + 2 );
+               r[ 3 ] = dataFetcher( offset + i + 3 );
+               initialized = true;
+               continue;
+            }
+            reduction( arg[ 0 ], offset + i,     r[ 0 ], dataFetcher( offset + i ) );
+            reduction( arg[ 1 ], offset + i + 1, r[ 1 ], dataFetcher( offset + i + 1 ) );
+            reduction( arg[ 2 ], offset + i + 2, r[ 2 ], dataFetcher( offset + i + 2 ) );
+            reduction( arg[ 3 ], offset + i + 3, r[ 3 ], dataFetcher( offset + i + 3 ) );
+         }
+      }
+
+      // reduction of the last, incomplete block (not unrolled)
+      for( Index i = blocks * block_size; i < size; i++ )
+         reduction( arg[ 0 ], i, r[ 0 ], dataFetcher( i ) );
+
+      // reduction of unrolled results
+      reduction( arg[ 0 ], arg[ 2 ], r[ 0 ], r[ 2 ] );
+      reduction( arg[ 1 ], arg[ 3 ], r[ 1 ], r[ 3 ] );
+      reduction( arg[ 0 ], arg[ 1 ], r[ 0 ], r[ 1 ] );
+      return std::make_pair( arg[ 0 ], r[ 0 ] );
+   }
+   else {
+      std::pair< Index, Result > result( 0, dataFetcher( 0 ) );
+      for( Index i = 1; i < size; i++ )
+         reduction( result.first, i, result.second, dataFetcher( i ) );
+      return result;
+   }
+}
+
 template< typename Index,
           typename Result,
           typename ReductionOperation,
@@ -49,10 +155,10 @@ reduce( const Index size,
         DataFetcher& dataFetcher,
         const Result& zero )
 {
+#ifdef HAVE_OPENMP
    constexpr int block_size = 128;
    const int blocks = size / block_size;
 
-#ifdef HAVE_OPENMP
    if( Devices::Host::isOMPEnabled() && blocks >= 2 ) {
       // global result variable
       Result result = zero;
@@ -93,42 +199,9 @@ reduce( const Index size,
       }
       return result;
    }
-   else {
-#endif
-      if( blocks > 1 ) {
-         // initialize array for unrolled results
-         Result r[ 4 ] = { zero, zero, zero, zero };
-
-         // main reduction (explicitly unrolled loop)
-         for( int b = 0; b < blocks; b++ ) {
-            const Index offset = b * block_size;
-            for( int i = 0; i < block_size; i += 4 ) {
-               r[ 0 ] = reduction( r[ 0 ], dataFetcher( offset + i ) );
-               r[ 1 ] = reduction( r[ 1 ], dataFetcher( offset + i + 1 ) );
-               r[ 2 ] = reduction( r[ 2 ], dataFetcher( offset + i + 2 ) );
-               r[ 3 ] = reduction( r[ 3 ], dataFetcher( offset + i + 3 ) );
-            }
-         }
-
-         // reduction of the last, incomplete block (not unrolled)
-         for( Index i = blocks * block_size; i < size; i++ )
-            r[ 0 ] = reduction( r[ 0 ], dataFetcher( i ) );
-
-         // reduction of unrolled results
-         r[ 0 ] = reduction( r[ 0 ], r[ 2 ] );
-         r[ 1 ] = reduction( r[ 1 ], r[ 3 ] );
-         r[ 0 ] = reduction( r[ 0 ], r[ 1 ] );
-         return r[ 0 ];
-      }
-      else {
-         Result result = zero;
-         for( Index i = 0; i < size; i++ )
-            result = reduction( result, dataFetcher( i ) );
-         return result;
-      }
-#ifdef HAVE_OPENMP
-   }
+   else
 #endif
+      return Reduction< Devices::Sequential >::reduce( size, reduction, dataFetcher, zero );
 }
 
 template< typename Index,
@@ -142,10 +215,10 @@ reduceWithArgument( const Index size,
                     DataFetcher& dataFetcher,
                     const Result& zero )
 {
+#ifdef HAVE_OPENMP
    constexpr int block_size = 128;
    const int blocks = size / block_size;
 
-#ifdef HAVE_OPENMP
    if( Devices::Host::isOMPEnabled() && blocks >= 2 ) {
       // global result variable
       std::pair< Index, Result > result( -1, zero );
@@ -202,57 +275,9 @@ reduceWithArgument( const Index size,
       }
       return result;
    }
-   else {
-#endif
-      if( blocks > 1 ) {
-         // initialize array for unrolled results
-         Index arg[ 4 ] = { 0, 0, 0, 0 };
-         Result r[ 4 ] = { zero, zero, zero, zero };
-         bool initialized( false );
-
-         // main reduction (explicitly unrolled loop)
-         for( int b = 0; b < blocks; b++ ) {
-            const Index offset = b * block_size;
-            for( int i = 0; i < block_size; i += 4 ) {
-               if( ! initialized )
-               {
-                  arg[ 0 ] = offset + i;
-                  arg[ 1 ] = offset + i + 1;
-                  arg[ 2 ] = offset + i + 2;
-                  arg[ 3 ] = offset + i + 3;
-                  r[ 0 ] = dataFetcher( offset + i );
-                  r[ 1 ] = dataFetcher( offset + i + 1 );
-                  r[ 2 ] = dataFetcher( offset + i + 2 );
-                  r[ 3 ] = dataFetcher( offset + i + 3 );
-                  initialized = true;
-                  continue;
-               }
-               reduction( arg[ 0 ], offset + i,     r[ 0 ], dataFetcher( offset + i ) );
-               reduction( arg[ 1 ], offset + i + 1, r[ 1 ], dataFetcher( offset + i + 1 ) );
-               reduction( arg[ 2 ], offset + i + 2, r[ 2 ], dataFetcher( offset + i + 2 ) );
-               reduction( arg[ 3 ], offset + i + 3, r[ 3 ], dataFetcher( offset + i + 3 ) );
-            }
-         }
-
-         // reduction of the last, incomplete block (not unrolled)
-         for( Index i = blocks * block_size; i < size; i++ )
-            reduction( arg[ 0 ], i, r[ 0 ], dataFetcher( i ) );
-
-         // reduction of unrolled results
-         reduction( arg[ 0 ], arg[ 2 ], r[ 0 ], r[ 2 ] );
-         reduction( arg[ 1 ], arg[ 3 ], r[ 1 ], r[ 3 ] );
-         reduction( arg[ 0 ], arg[ 1 ], r[ 0 ], r[ 1 ] );
-         return std::make_pair( arg[ 0 ], r[ 0 ] );
-      }
-      else {
-         std::pair< Index, Result > result( 0, dataFetcher( 0 ) );
-         for( Index i = 1; i < size; i++ )
-            reduction( result.first, i, result.second, dataFetcher( i ) );
-         return result;
-      }
-#ifdef HAVE_OPENMP
-   }
+   else
 #endif
+      return Reduction< Devices::Sequential >::reduceWithArgument( size, reduction, dataFetcher, zero );
 }
 
 template< typename Index,
@@ -310,7 +335,7 @@ reduce( const Index size,
          new Result[ reducedSize ]
          #endif
       };
-      ArrayOperations< Devices::Host, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize );
+      MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize );
 
       #ifdef CUDA_REDUCTION_PROFILING
          timer.stop();
@@ -321,7 +346,7 @@ reduce( const Index size,
 
       // finish the reduction on the host
       auto fetch = [&] ( Index i ) { return resultArray[ i ]; };
-      const Result result = Reduction< Devices::Host >::reduce( reducedSize, reduction, fetch, zero );
+      const Result result = Reduction< Devices::Sequential >::reduce( reducedSize, reduction, fetch, zero );
 
       #ifdef CUDA_REDUCTION_PROFILING
          timer.stop();
@@ -415,8 +440,8 @@ reduceWithArgument( const Index size,
          new Index[ reducedSize ]
          #endif
       };
-      ArrayOperations< Devices::Host, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize );
-      ArrayOperations< Devices::Host, Devices::Cuda >::copy( indexArray.get(), deviceIndexes, reducedSize );
+      MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize );
+      MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( indexArray.get(), deviceIndexes, reducedSize );
 
       #ifdef CUDA_REDUCTION_PROFILING
          timer.stop();
@@ -427,7 +452,7 @@ reduceWithArgument( const Index size,
 
       // finish the reduction on the host
 //      auto fetch = [&] ( Index i ) { return resultArray[ i ]; };
-//      const Result result = Reduction< Devices::Host >::reduceWithArgument( reducedSize, argument, reduction, fetch, zero );
+//      const Result result = Reduction< Devices::Sequential >::reduceWithArgument( reducedSize, argument, reduction, fetch, zero );
       for( Index i = 1; i < reducedSize; i++ )
          reduction( indexArray[ 0 ], indexArray[ i ], resultArray[ 0 ], resultArray[ i ] );
 
@@ -453,5 +478,4 @@ reduceWithArgument( const Index size,
 }
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/Scan.h b/src/TNL/Algorithms/Scan.h
similarity index 71%
rename from src/TNL/Containers/Algorithms/Scan.h
rename to src/TNL/Algorithms/Scan.h
index 5587c627307da329db40eef54b41ba3c2d4e994a..81a5d2f7e753b64391e134e93a0c5bb652e54310 100644
--- a/src/TNL/Containers/Algorithms/Scan.h
+++ b/src/TNL/Algorithms/Scan.h
@@ -12,17 +12,17 @@
 
 #pragma once
 
+#include <TNL/Devices/Sequential.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
 /**
  * \brief Scan (or prefix sum) type - inclusive or exclusive.
- * 
- * See \ref TNL::Containers::Algorithms::Scan
+ *
+ * See \ref TNL::Algorithms::Scan.
  */
 enum class ScanType {
    Exclusive,
@@ -31,22 +31,22 @@ enum class ScanType {
 
 /**
  * \brief Computes scan (or prefix sum) on a vector.
- * 
- * [Scan (or prefix sum)](https://en.wikipedia.org/wiki/Prefix_sum) operation turns a sequence 
+ *
+ * [Scan (or prefix sum)](https://en.wikipedia.org/wiki/Prefix_sum) operation turns a sequence
  * \f$a_1, \ldots, a_n\f$ into a sequence \f$s_1, \ldots, s_n\f$ defined as
- * 
+ *
  * \f[
  * s_i = \sum_{j=1}^i a_i.
  * \f]
  * Exclusive scan (or prefix sum) is defined as
- * 
+ *
  * \f[
  * \sigma_i = \sum_{j=1}^{i-1} a_i.
  * \f]
- * 
+ *
  * \tparam Device parameter says on what device the reduction is gonna be performed.
  * \tparam Type parameter says if inclusive or exclusive is scan is to be computed.
- * 
+ *
  * See \ref Scan< Devices::Host, Type > and \ref Scan< Devices::Cuda, Type >.
  */
 template< typename Device,
@@ -55,41 +55,41 @@ struct Scan;
 
 /**
  * \brief Computes segmented scan (or prefix sum) on a vector.
- * 
+ *
  * Segmented scan is a modification of common scan. In this case the sequence of
  * numbers in hand is divided into segments like this, for example
- * 
+ *
  * ```
  * [1,3,5][2,4,6,9][3,5],[3,6,9,12,15]
  * ```
- * 
+ *
  * and we want to compute inclusive or exclusive scan of each segment. For inclusive segmented prefix sum we get
- * 
+ *
  * ```
  * [1,4,9][2,6,12,21][3,8][3,9,18,30,45]
  * ```
- * 
+ *
  * and for exclusive segmented prefix sum it is
- * 
+ *
  * ```
  * [0,1,4][0,2,6,12][0,3][0,3,9,18,30]
  * ```
- * 
+ *
  * In addition to common scan, we need to encode the segments of the input sequence.
  * It is done by auxiliary flags array (it can be array of booleans) having `1` at the
  * beginning of each segment and `0` on all other positions. In our example, it would be like this:
- * 
+ *
  * ```
  * [1,0,0,1,0,0,0,1,0,1,0,0, 0, 0]
  * [1,3,5,2,4,6,9,3,5,3,6,9,12,15]
- * 
+ *
  * ```
- * 
+ *
  * \tparam Device parameter says on what device the reduction is gonna be performed.
  * \tparam Type parameter says if inclusive or exclusive is scan is to be computed.
- * 
+ *
  * See \ref Scan< Devices::Host, Type > and \ref Scan< Devices::Cuda, Type >.
- * 
+ *
  * **Note: Segmented scan is not implemented for CUDA yet.**
  */
 template< typename Device,
@@ -97,15 +97,75 @@ template< typename Device,
 struct SegmentedScan;
 
 
+template< ScanType Type >
+struct Scan< Devices::Sequential, Type >
+{
+   /**
+    * \brief Computes scan (prefix sum) sequentially.
+    *
+    * \tparam Vector type vector being used for the scan.
+    * \tparam Reduction lambda function defining the reduction operation
+    *
+    * \param v input vector, the result of scan is stored in the same vector
+    * \param begin the first element in the array to be scanned
+    * \param end the last element in the array to be scanned
+    * \param reduction lambda function implementing the reduction operation
+    * \param zero is the idempotent element for the reduction operation, i.e. element which
+    *             does not change the result of the reduction.
+    *
+    * The reduction lambda function takes two variables which are supposed to be reduced:
+    *
+    * ```
+    * auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... };
+    * ```
+    *
+    * \par Example
+    *
+    * \include ReductionAndScan/ScanExample.cpp
+    *
+    * \par Output
+    *
+    * \include ScanExample.out
+    */
+   template< typename Vector,
+             typename Reduction >
+   static void
+   perform( Vector& v,
+            const typename Vector::IndexType begin,
+            const typename Vector::IndexType end,
+            const Reduction& reduction,
+            const typename Vector::RealType zero );
+
+   template< typename Vector,
+             typename Reduction >
+   static auto
+   performFirstPhase( Vector& v,
+                      const typename Vector::IndexType begin,
+                      const typename Vector::IndexType end,
+                      const Reduction& reduction,
+                      const typename Vector::RealType zero );
+
+   template< typename Vector,
+             typename BlockShifts,
+             typename Reduction >
+   static void
+   performSecondPhase( Vector& v,
+                       const BlockShifts& blockShifts,
+                       const typename Vector::IndexType begin,
+                       const typename Vector::IndexType end,
+                       const Reduction& reduction,
+                       const typename Vector::RealType shift );
+};
+
 template< ScanType Type >
 struct Scan< Devices::Host, Type >
 {
    /**
-    * \brief Computes scan (prefix sum) on CPU.
-    * 
+    * \brief Computes scan (prefix sum) using OpenMP.
+    *
     * \tparam Vector type vector being used for the scan.
     * \tparam Reduction lambda function defining the reduction operation
-    * 
+    *
     * \param v input vector, the result of scan is stored in the same vector
     * \param begin the first element in the array to be scanned
     * \param end the last element in the array to be scanned
@@ -162,10 +222,10 @@ struct Scan< Devices::Cuda, Type >
 {
    /**
     * \brief Computes scan (prefix sum) on GPU.
-    * 
+    *
     * \tparam Vector type vector being used for the scan.
     * \tparam Reduction lambda function defining the reduction operation
-    * 
+    *
     * \param v input vector, the result of scan is stored in the same vector
     * \param begin the first element in the array to be scanned
     * \param end the last element in the array to be scanned
@@ -217,16 +277,60 @@ struct Scan< Devices::Cuda, Type >
                        const typename Vector::RealType shift );
 };
 
+template< ScanType Type >
+struct SegmentedScan< Devices::Sequential, Type >
+{
+   /**
+    * \brief Computes segmented scan (prefix sum) sequentially.
+    *
+    * \tparam Vector type vector being used for the scan.
+    * \tparam Reduction lambda function defining the reduction operation
+    * \tparam Flags array type containing zeros and ones defining the segments begining
+    *
+    * \param v input vector, the result of scan is stored in the same vector
+    * \param flags is an array with zeros and ones defining the segments begining
+    * \param begin the first element in the array to be scanned
+    * \param end the last element in the array to be scanned
+    * \param reduction lambda function implementing the reduction operation
+    * \param zero is the idempotent element for the reduction operation, i.e. element which
+    *             does not change the result of the reduction.
+    *
+    * The reduction lambda function takes two variables which are supposed to be reduced:
+    *
+    * ```
+    * auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... };
+    * ```
+    *
+    * \par Example
+    *
+    * \include ReductionAndScan/SegmentedScanExample.cpp
+    *
+    * \par Output
+    *
+    * \include SegmentedScanExample.out
+    */
+   template< typename Vector,
+             typename Reduction,
+             typename Flags >
+   static void
+   perform( Vector& v,
+            Flags& flags,
+            const typename Vector::IndexType begin,
+            const typename Vector::IndexType end,
+            const Reduction& reduction,
+            const typename Vector::RealType zero );
+};
+
 template< ScanType Type >
 struct SegmentedScan< Devices::Host, Type >
 {
    /**
-    * \brief Computes segmented scan (prefix sum) on CPU.
-    * 
+    * \brief Computes segmented scan (prefix sum) using OpenMP.
+    *
     * \tparam Vector type vector being used for the scan.
     * \tparam Reduction lambda function defining the reduction operation
     * \tparam Flags array type containing zeros and ones defining the segments begining
-    * 
+    *
     * \param v input vector, the result of scan is stored in the same vector
     * \param flags is an array with zeros and ones defining the segments begining
     * \param begin the first element in the array to be scanned
@@ -266,11 +370,11 @@ struct SegmentedScan< Devices::Cuda, Type >
 {
    /**
     * \brief Computes segmented scan (prefix sum) on GPU.
-    * 
+    *
     * \tparam Vector type vector being used for the scan.
     * \tparam Reduction lambda function defining the reduction operation
     * \tparam Flags array type containing zeros and ones defining the segments begining
-    * 
+    *
     * \param v input vector, the result of scan is stored in the same vector
     * \param flags is an array with zeros and ones defining the segments begining
     * \param begin the first element in the array to be scanned
@@ -292,7 +396,7 @@ struct SegmentedScan< Devices::Cuda, Type >
     * \par Output
     *
     * \include SegmentedScanExample.out
-    * 
+    *
     * **Note: Segmented scan is not implemented for CUDA yet.**
     */
    template< typename Vector,
@@ -308,7 +412,6 @@ struct SegmentedScan< Devices::Cuda, Type >
 };
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
 
-#include <TNL/Containers/Algorithms/Scan.hpp>
+#include <TNL/Algorithms/Scan.hpp>
diff --git a/src/TNL/Containers/Algorithms/Scan.hpp b/src/TNL/Algorithms/Scan.hpp
similarity index 77%
rename from src/TNL/Containers/Algorithms/Scan.hpp
rename to src/TNL/Algorithms/Scan.hpp
index d7d2b181150344b44fa4403c26c6c5f5ba3b1eb3..7b6d31ece513144c5b0cec9947a232b940fb5e30 100644
--- a/src/TNL/Containers/Algorithms/Scan.hpp
+++ b/src/TNL/Algorithms/Scan.hpp
@@ -17,14 +17,85 @@
 #include <TNL/Assert.h>
 #include <TNL/Containers/Array.h>
 #include <TNL/Containers/StaticArray.h>
-#include <TNL/Containers/Algorithms/CudaScanKernel.h>
+#include <TNL/Algorithms/CudaScanKernel.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 #include <TNL/Exceptions/NotImplementedError.h>
 
 namespace TNL {
-namespace Containers {
 namespace Algorithms {
 
+template< ScanType Type >
+   template< typename Vector,
+             typename Reduction >
+void
+Scan< Devices::Sequential, Type >::
+perform( Vector& v,
+         const typename Vector::IndexType begin,
+         const typename Vector::IndexType end,
+         const Reduction& reduction,
+         const typename Vector::RealType zero )
+{
+   // sequential prefix-sum does not need a second phase
+   performFirstPhase( v, begin, end, reduction, zero );
+}
+
+template< ScanType Type >
+   template< typename Vector,
+             typename Reduction >
+auto
+Scan< Devices::Sequential, Type >::
+performFirstPhase( Vector& v,
+                   const typename Vector::IndexType begin,
+                   const typename Vector::IndexType end,
+                   const Reduction& reduction,
+                   const typename Vector::RealType zero )
+{
+   using RealType = typename Vector::RealType;
+   using IndexType = typename Vector::IndexType;
+
+   // FIXME: StaticArray does not have getElement() which is used in DistributedScan
+//   return Containers::StaticArray< 1, RealType > block_sums;
+   Containers::Array< RealType, Devices::Host > block_sums( 1 );
+   block_sums[ 0 ] = zero;
+
+   if( Type == ScanType::Inclusive ) {
+      for( IndexType i = begin + 1; i < end; i++ )
+         v[ i ] = reduction( v[ i ], v[ i - 1 ] );
+      block_sums[ 0 ] = v[ end - 1 ];
+   }
+   else // Exclusive prefix sum
+   {
+      RealType aux = zero;
+      for( IndexType i = begin; i < end; i++ ) {
+         const RealType x = v[ i ];
+         v[ i ] = aux;
+         aux = reduction( aux, x );
+      }
+      block_sums[ 0 ] = aux;
+   }
+
+   return block_sums;
+}
+
+template< ScanType Type >
+   template< typename Vector,
+             typename BlockShifts,
+             typename Reduction >
+void
+Scan< Devices::Sequential, Type >::
+performSecondPhase( Vector& v,
+                    const BlockShifts& blockShifts,
+                    const typename Vector::IndexType begin,
+                    const typename Vector::IndexType end,
+                    const Reduction& reduction,
+                    const typename Vector::RealType shift )
+{
+   using IndexType = typename Vector::IndexType;
+
+   for( IndexType i = begin; i < end; i++ )
+      v[ i ] = reduction( v[ i ], shift );
+}
+
 template< ScanType Type >
    template< typename Vector,
              typename Reduction >
@@ -40,8 +111,7 @@ perform( Vector& v,
    const auto blockShifts = performFirstPhase( v, begin, end, reduction, zero );
    performSecondPhase( v, blockShifts, begin, end, reduction, zero );
 #else
-   // sequential prefix-sum does not need a second phase
-   performFirstPhase( v, begin, end, reduction, zero );
+   Scan< Devices::Sequential, Type >::perform( v, begin, end, reduction, zero );
 #endif
 }
 
@@ -56,12 +126,12 @@ performFirstPhase( Vector& v,
                    const Reduction& reduction,
                    const typename Vector::RealType zero )
 {
+#ifdef HAVE_OPENMP
    using RealType = typename Vector::RealType;
    using IndexType = typename Vector::IndexType;
 
-#ifdef HAVE_OPENMP
    const int threads = Devices::Host::getMaxThreadsCount();
-   Array< RealType, Devices::Host > block_sums( threads + 1 );
+   Containers::Array< RealType > block_sums( threads + 1 );
    block_sums[ 0 ] = zero;
 
    #pragma omp parallel num_threads(threads)
@@ -99,28 +169,7 @@ performFirstPhase( Vector& v,
    // block_sums now contains shift values for each block - to be used in the second phase
    return block_sums;
 #else
-   // FIXME: StaticArray does not have getElement() which is used in DistributedScan
-//   return StaticArray< 1, RealType > block_sums;
-   Array< RealType, Devices::Host > block_sums( 1 );
-   block_sums[ 0 ] = zero;
-
-   if( Type == ScanType::Inclusive ) {
-      for( IndexType i = begin + 1; i < end; i++ )
-         v[ i ] = reduction( v[ i ], v[ i - 1 ] );
-      block_sums[ 0 ] = v[ end - 1 ];
-   }
-   else // Exclusive prefix sum
-   {
-      RealType aux = zero;
-      for( IndexType i = begin; i < end; i++ ) {
-         const RealType x = v[ i ];
-         v[ i ] = aux;
-         aux = reduction( aux, x );
-      }
-      block_sums[ 0 ] = aux;
-   }
-
-   return block_sums;
+   return Scan< Devices::Sequential, Type >::performFirstPhase( v, begin, end, reduction, zero );
 #endif
 }
 
@@ -137,10 +186,10 @@ performSecondPhase( Vector& v,
                     const Reduction& reduction,
                     const typename Vector::RealType shift )
 {
+#ifdef HAVE_OPENMP
    using RealType = typename Vector::RealType;
    using IndexType = typename Vector::IndexType;
 
-#ifdef HAVE_OPENMP
    const int threads = blockShifts.getSize() - 1;
 
    // launch exactly the same number of threads as in the first phase
@@ -155,8 +204,7 @@ performSecondPhase( Vector& v,
          v[ i ] = reduction( v[ i ], offset );
    }
 #else
-   for( IndexType i = begin; i < end; i++ )
-      v[ i ] = reduction( v[ i ], shift );
+   Scan< Devices::Sequential, Type >::performSecondPhase( v, blockShifts, begin, end, reduction, shift );
 #endif
 }
 
@@ -246,7 +294,7 @@ template< ScanType Type >
              typename Reduction,
              typename Flags >
 void
-SegmentedScan< Devices::Host, Type >::
+SegmentedScan< Devices::Sequential, Type >::
 perform( Vector& v,
          Flags& flags,
          const typename Vector::IndexType begin,
@@ -257,7 +305,6 @@ perform( Vector& v,
    using RealType = typename Vector::RealType;
    using IndexType = typename Vector::IndexType;
 
-   // TODO: parallelize with OpenMP
    if( Type == ScanType::Inclusive )
    {
       for( IndexType i = begin + 1; i < end; i++ )
@@ -279,6 +326,27 @@ perform( Vector& v,
    }
 }
 
+template< ScanType Type >
+   template< typename Vector,
+             typename Reduction,
+             typename Flags >
+void
+SegmentedScan< Devices::Host, Type >::
+perform( Vector& v,
+         Flags& flags,
+         const typename Vector::IndexType begin,
+         const typename Vector::IndexType end,
+         const Reduction& reduction,
+         const typename Vector::RealType zero )
+{
+#ifdef HAVE_OPENMP
+   // TODO: parallelize with OpenMP
+   SegmentedScan< Devices::Sequential, Type >::perform( v, flags, begin, end, reduction, zero );
+#else
+   SegmentedScan< Devices::Sequential, Type >::perform( v, flags, begin, end, reduction, zero );
+#endif
+}
+
 template< ScanType Type >
    template< typename Vector,
              typename Reduction,
@@ -296,12 +364,11 @@ perform( Vector& v,
    using RealType = typename Vector::RealType;
    using IndexType = typename Vector::IndexType;
 
-   throw Exceptions::NotImplementedError( "Segmented prefix sum is not implemented for CUDA." );
+   throw Exceptions::NotImplementedError( "Segmented scan (prefix sum) is not implemented for CUDA." );
 #else
    throw Exceptions::CudaSupportMissing();
 #endif
 }
 
 } // namespace Algorithms
-} // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/StaticFor.h b/src/TNL/Algorithms/StaticFor.h
similarity index 95%
rename from src/TNL/StaticFor.h
rename to src/TNL/Algorithms/StaticFor.h
index 990036dfc0090708851468e03e991a30a07cc835..c7404545840143bd053ed371c5813a7a0feaa185 100644
--- a/src/TNL/StaticFor.h
+++ b/src/TNL/Algorithms/StaticFor.h
@@ -10,9 +10,10 @@
 
 #pragma once
 
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/CudaCallable.h>
 
 namespace TNL {
+namespace Algorithms {
 
 // Manual unrolling does not make sense for loops with a large iterations
 // count. For a very large iterations count it would trigger the compiler's
@@ -57,4 +58,5 @@ struct StaticFor< Begin, End, false >
    }
 };
 
+} // namespace Algorithms
 } // namespace TNL
diff --git a/src/TNL/StaticVectorFor.h b/src/TNL/Algorithms/StaticVectorFor.h
similarity index 97%
rename from src/TNL/StaticVectorFor.h
rename to src/TNL/Algorithms/StaticVectorFor.h
index 59af0fcb8256619d41014be4fa21023fee4679e2..664f97aed95651249447788d62a6f19be8855bd6 100644
--- a/src/TNL/StaticVectorFor.h
+++ b/src/TNL/Algorithms/StaticVectorFor.h
@@ -13,6 +13,7 @@
 #include <TNL/Containers/StaticVector.h>
 
 namespace TNL {
+namespace Algorithms {
 
 struct StaticVectorFor
 {
@@ -48,4 +49,5 @@ struct StaticVectorFor
    }
 };
 
+} // namespace Algorithms
 } // namespace TNL
diff --git a/src/TNL/TemplateStaticFor.h b/src/TNL/Algorithms/TemplateStaticFor.h
similarity index 97%
rename from src/TNL/TemplateStaticFor.h
rename to src/TNL/Algorithms/TemplateStaticFor.h
index 88ad764fd9b78d0348469a115ee1cb83ecb7993b..753ad9b2618b2704292517e9b74ffff7192d22b7 100644
--- a/src/TNL/TemplateStaticFor.h
+++ b/src/TNL/Algorithms/TemplateStaticFor.h
@@ -13,9 +13,10 @@
 #include <utility>
 #include <type_traits>
 
-#include <TNL/Devices/CudaCallable.h>
+#include <TNL/Cuda/CudaCallable.h>
 
 namespace TNL {
+namespace Algorithms {
 namespace detail {
 
 template< typename IndexType,
@@ -89,4 +90,5 @@ struct TemplateStaticFor
    }
 };
 
+} // namespace Algorithms
 } // namespace TNL
diff --git a/src/TNL/Allocators/Cuda.h b/src/TNL/Allocators/Cuda.h
index 74ebb840432136d9033a17a86684607098a80d86..1b648f1ce3818978e086f26e64128536f40a8806 100644
--- a/src/TNL/Allocators/Cuda.h
+++ b/src/TNL/Allocators/Cuda.h
@@ -12,7 +12,9 @@
 
 #pragma once
 
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Exceptions/CudaBadAlloc.h>
+#include <TNL/Exceptions/CudaSupportMissing.h>
+#include <TNL/Cuda/CheckDevice.h>
 
 namespace TNL {
 namespace Allocators {
diff --git a/src/TNL/Allocators/CudaHost.h b/src/TNL/Allocators/CudaHost.h
index 284c91fe9b8dbc7abe8e3d4685ef1d7551d19a89..9047e0b9af632b9f6fd466352d2cd3659f67210a 100644
--- a/src/TNL/Allocators/CudaHost.h
+++ b/src/TNL/Allocators/CudaHost.h
@@ -12,7 +12,9 @@
 
 #pragma once
 
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Exceptions/CudaBadAlloc.h>
+#include <TNL/Exceptions/CudaSupportMissing.h>
+#include <TNL/Cuda/CheckDevice.h>
 
 namespace TNL {
 namespace Allocators {
diff --git a/src/TNL/Allocators/CudaManaged.h b/src/TNL/Allocators/CudaManaged.h
index db29f86cb618bf79e4f1c0fa0ac1ad2750d476bc..bb878ca66bef97491c6db407128c7c3322fdce7a 100644
--- a/src/TNL/Allocators/CudaManaged.h
+++ b/src/TNL/Allocators/CudaManaged.h
@@ -12,7 +12,9 @@
 
 #pragma once
 
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Exceptions/CudaBadAlloc.h>
+#include <TNL/Exceptions/CudaSupportMissing.h>
+#include <TNL/Cuda/CheckDevice.h>
 
 namespace TNL {
 namespace Allocators {
diff --git a/src/TNL/Allocators/Default.h b/src/TNL/Allocators/Default.h
index 6906a905c3a82d3e2400c4ba6a767848bf1be061..109539d0c92def3628d288b97b7ea82681b5df8b 100644
--- a/src/TNL/Allocators/Default.h
+++ b/src/TNL/Allocators/Default.h
@@ -14,10 +14,9 @@
 
 #include <TNL/Allocators/Host.h>
 #include <TNL/Allocators/Cuda.h>
-#include <TNL/Allocators/MIC.h>
+#include <TNL/Devices/Sequential.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/Devices/MIC.h>
 
 namespace TNL {
 namespace Allocators {
@@ -29,28 +28,28 @@ namespace Allocators {
 template< typename Device >
 struct Default;
 
-//! Sets \ref Allocators::Host as the default allocator for \ref Devices::Host.
+//! Sets \ref Allocators::Host as the default allocator for \ref Devices::Sequential.
 template<>
-struct Default< Devices::Host >
+struct Default< Devices::Sequential >
 {
    template< typename T >
    using Allocator = Allocators::Host< T >;
 };
 
-//! Sets \ref Allocators::Cuda as the default allocator for \ref Devices::Cuda.
+//! Sets \ref Allocators::Host as the default allocator for \ref Devices::Host.
 template<>
-struct Default< Devices::Cuda >
+struct Default< Devices::Host >
 {
    template< typename T >
-   using Allocator = Allocators::Cuda< T >;
+   using Allocator = Allocators::Host< T >;
 };
 
-//! Sets \ref Allocators::MIC as the default allocator for \ref Devices::MIC.
+//! Sets \ref Allocators::Cuda as the default allocator for \ref Devices::Cuda.
 template<>
-struct Default< Devices::MIC >
+struct Default< Devices::Cuda >
 {
    template< typename T >
-   using Allocator = Allocators::MIC< T >;
+   using Allocator = Allocators::Cuda< T >;
 };
 
 } // namespace Allocators
diff --git a/src/TNL/Allocators/MIC.h b/src/TNL/Allocators/MIC.h
deleted file mode 100644
index c3599f449cd85f9f83c0ef0e5974bb015d04a6ef..0000000000000000000000000000000000000000
--- a/src/TNL/Allocators/MIC.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/***************************************************************************
-                          MIC.h  -  description
-                             -------------------
-    begin                : Jul 2, 2019
-    copyright            : (C) 2019 by Tomas Oberhuber et al.
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-// Implemented by: Jakub Klinkovsky
-
-#pragma once
-
-#include <TNL/Devices/MIC.h>
-#include <TNL/Exceptions/MICSupportMissing.h>
-
-namespace TNL {
-namespace Allocators {
-
-/**
- * \brief Allocator for the MIC device memory space.
- */
-template< class T >
-struct MIC
-{
-   using value_type = T;
-   using size_type = std::size_t;
-   using difference_type = std::ptrdiff_t;
-
-   MIC() = default;
-   MIC( const MIC& ) = default;
-   MIC( MIC&& ) = default;
-
-   MIC& operator=( const MIC& ) = default;
-   MIC& operator=( MIC&& ) = default;
-
-   template< class U >
-   MIC( const MIC< U >& )
-   {}
-
-   template< class U >
-   MIC( MIC< U >&& )
-   {}
-
-   template< class U >
-   MIC& operator=( const MIC< U >& )
-   {
-      return *this;
-   }
-
-   template< class U >
-   MIC& operator=( MIC< U >&& )
-   {
-      return *this;
-   }
-
-   value_type* allocate( size_type size )
-   {
-#ifdef HAVE_MIC
-      Devices::MICHider<void> hide_ptr;
-      #pragma offload target(mic) out(hide_ptr) in(size)
-      {
-         hide_ptr.pointer = malloc(size * sizeof(value_type));
-      }
-      return hide_ptr.pointer;
-#else
-      throw Exceptions::MICSupportMissing();
-#endif
-   }
-
-   void deallocate(value_type* ptr, size_type)
-   {
-#ifdef HAVE_MIC
-      Devices::MICHider<void> hide_ptr;
-      hide_ptr.pointer=ptr;
-      #pragma offload target(mic) in(hide_ptr)
-      {
-         free(hide_ptr.pointer);
-      }
-#else
-      throw Exceptions::MICSupportMissing();
-#endif
-   }
-};
-
-template<class T1, class T2>
-bool operator==(const MIC<T1>&, const MIC<T2>&)
-{
-   return true;
-}
-
-template<class T1, class T2>
-bool operator!=(const MIC<T1>& lhs, const MIC<T2>& rhs)
-{
-   return !(lhs == rhs);
-}
-
-} // namespace Allocators
-} // namespace TNL
diff --git a/src/TNL/Assert.h b/src/TNL/Assert.h
index 27f3b11b28ed46a9741c3593c573a243f1e0a81d..df862956219ac03e1d1e1fa27e1c67a0e1035ad5 100644
--- a/src/TNL/Assert.h
+++ b/src/TNL/Assert.h
@@ -38,7 +38,7 @@
    #define TNL_NVCC_HD_WARNING_DISABLE
 #endif
 
-#if defined(NDEBUG) || defined(HAVE_MIC)
+#ifdef NDEBUG
 
 // empty macros for optimized build
 /**
@@ -120,7 +120,7 @@
 #include <iostream>
 #include <stdio.h>
 
-#include <TNL/Devices/CudaCallable.h>
+#include <TNL/Cuda/CudaCallable.h>
 #include <TNL/Debugging/StackBacktrace.h>
 
 namespace TNL {
diff --git a/src/TNL/Atomic.h b/src/TNL/Atomic.h
index ca36f9676e34a76f8427c39bbb190954e701ea5d..e84236287cb28f7cfa0823154201475f53c43be8 100644
--- a/src/TNL/Atomic.h
+++ b/src/TNL/Atomic.h
@@ -16,13 +16,37 @@
 
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/param-types.h>
+
+// double-precision atomicAdd function for Maxwell and older GPUs
+// copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions
+#ifdef HAVE_CUDA
+#if __CUDA_ARCH__ < 600
+namespace {
+   __device__ double atomicAdd(double* address, double val)
+   {
+       unsigned long long int* address_as_ull =
+                                 (unsigned long long int*)address;
+       unsigned long long int old = *address_as_ull, assumed;
+
+       do {
+           assumed = old;
+           old = atomicCAS(address_as_ull, assumed,
+                           __double_as_longlong(val +
+                                  __longlong_as_double(assumed)));
+
+       // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
+       } while (assumed != old);
+
+       return __longlong_as_double(old);
+   }
+} // namespace
+#endif
+#endif
 
 namespace TNL {
 
 template< typename T, typename Device >
-class Atomic
-{};
+class Atomic;
 
 template< typename T >
 class Atomic< T, Devices::Host >
@@ -48,14 +72,6 @@ public:
       return *this;
    }
 
-   // just for compatibility with TNL::Containers::Array...
-   static String getType()
-   {
-      return "Atomic< " +
-             TNL::getType< T >() + ", " +
-             Devices::Host::getDeviceType() + " >";
-   }
-
    // CAS loops for updating maximum and minimum
    // reference: https://stackoverflow.com/a/16190791
    T fetch_max( T value ) noexcept
@@ -120,14 +136,6 @@ public:
       return *this;
    }
 
-   // just for compatibility with TNL::Containers::Array...
-   static String getType()
-   {
-      return "Atomic< " +
-             TNL::getType< T >() + ", " +
-             Devices::Cuda::getDeviceType() + " >";
-   }
-
    bool is_lock_free() const noexcept
    {
       return true;
diff --git a/src/TNL/Communicators/MpiCommunicator.h b/src/TNL/Communicators/MpiCommunicator.h
index 926fa329a6e88b3d84406f464aa3e82057e3ef24..0aa14a9ece5d518ce5e142898cea56593d375189 100644
--- a/src/TNL/Communicators/MpiCommunicator.h
+++ b/src/TNL/Communicators/MpiCommunicator.h
@@ -24,7 +24,7 @@
 #include <unistd.h>  // getpid
 
 #ifdef HAVE_CUDA
-    #include <TNL/Devices/Cuda.h>
+    #include <TNL/Cuda/CheckDevice.h>
 
     typedef struct __attribute__((__packed__))  {
        char name[MPI_MAX_PROCESSOR_NAME];
diff --git a/src/TNL/Config/ConfigDescription.h b/src/TNL/Config/ConfigDescription.h
index dc32c16840d1b0d8f1e7d479942de1e32f01fa19..648db1d445de1ef7362e0265927226e4ec734887 100644
--- a/src/TNL/Config/ConfigDescription.h
+++ b/src/TNL/Config/ConfigDescription.h
@@ -14,11 +14,9 @@
 #include <string>
 #include <vector>
 #include <memory>
-#include "make_unique.h"
 
 #include <TNL/Assert.h>
 #include <TNL/String.h>
-#include <TNL/param-types.h>
 #include <TNL/Config/ConfigEntryType.h>
 #include <TNL/Config/ConfigEntry.h>
 #include <TNL/Config/ConfigEntryList.h>
@@ -144,7 +142,7 @@ public:
       TNL_ASSERT_TRUE( this->currentEntry, "there is no current entry" );
       if( isCurrentEntryList ) {
          ConfigEntryList< EntryType >& entry = dynamic_cast< ConfigEntryList< EntryType >& >( *currentEntry );
-         entry.getEnumValues().push_back( entryEnum );         
+         entry.getEnumValues().push_back( entryEnum );
       }
       else {
          ConfigEntry< EntryType >& entry = dynamic_cast< ConfigEntry< EntryType >& >( *currentEntry );
@@ -218,7 +216,7 @@ public:
       std::cerr << "Asking for the default value of unknown parameter." << std::endl;
       return nullptr;
    }
- 
+
    //! Returns zero pointer if there is no default value
    template< class T >
    T* getDefaultValue( const String& name )
@@ -256,55 +254,59 @@ public:
          if( entries[ i ]->hasDefaultValue &&
              ! parameter_container.checkParameter( entry_name ) )
          {
-            if( entries[ i ]->getEntryType() == "String" )
+            if( entries[ i ]->getEntryType() == "TNL::String" )
             {
                ConfigEntry< String >& entry = dynamic_cast< ConfigEntry< String >& >( *entries[ i ] );
                parameter_container.addParameter< String >( entry_name, entry.defaultValue );
                continue;
             }
-            if( entries[ i ]->getEntryType() == "bool" )
+            else if( entries[ i ]->getEntryType() == "bool" )
             {
                ConfigEntry< bool >& entry = dynamic_cast< ConfigEntry< bool >& >( *entries[ i ] );
                parameter_container.addParameter< bool >( entry_name, entry.defaultValue );
                continue;
             }
-            if( entries[ i ]->getEntryType() == "int" )
+            else if( entries[ i ]->getEntryType() == "int" )
             {
                ConfigEntry< int >& entry = dynamic_cast< ConfigEntry< int >& >( *entries[ i ] );
                parameter_container.addParameter< int >( entry_name, entry.defaultValue );
                continue;
             }
-            if( entries[ i ]->getEntryType() == "double" )
+            else if( entries[ i ]->getEntryType() == "double" )
             {
                ConfigEntry< double >& entry = dynamic_cast< ConfigEntry< double >& >( *entries[ i ] );
                parameter_container.addParameter< double >( entry_name, entry.defaultValue );
                continue;
             }
-            
-            if( entries[ i ]->getEntryType() == "ConfigEntryList< String >" )
+            else if( entries[ i ]->getEntryType() == "ConfigEntryList< TNL::String >" )
             {
                ConfigEntryList< String >& entry = dynamic_cast< ConfigEntryList< String >& >( *entries[ i ] );
                parameter_container.addList< String >( entry_name, entry.defaultValue );
                continue;
             }
-            if( entries[ i ]->getEntryType() == "ConfigEntryList< bool >" )
+            else if( entries[ i ]->getEntryType() == "ConfigEntryList< bool >" )
             {
                ConfigEntryList< bool >& entry = dynamic_cast< ConfigEntryList< bool >& >( *entries[ i ] );
                parameter_container.addList< bool >( entry_name, entry.defaultValue );
                continue;
             }
-            if( entries[ i ]->getEntryType() == "ConfigEntryList< int >" )
+            else if( entries[ i ]->getEntryType() == "ConfigEntryList< int >" )
             {
                ConfigEntryList< int >& entry = dynamic_cast< ConfigEntryList< int >& >( *entries[ i ] );
                parameter_container.addList< int >( entry_name, entry.defaultValue );
                continue;
             }
-            if( entries[ i ]->getEntryType() == "ConfigEntryList< double >" )
+            else if( entries[ i ]->getEntryType() == "ConfigEntryList< double >" )
             {
                ConfigEntryList< double >& entry = dynamic_cast< ConfigEntryList< double >& >( *entries[ i ] );
                parameter_container.addList< double >( entry_name, entry.defaultValue );
                continue;
             }
+            else
+            {
+               throw std::runtime_error( "Method ConfigDescription::addMissingEntries encountered "
+                                         "unsupported entry type: " + entries[ i ]->getEntryType() );
+            }
          }
       }
    }
diff --git a/src/TNL/Config/ConfigEntry.h b/src/TNL/Config/ConfigEntry.h
index 1b56574cc3983a3425ab023e7466c699fbe9f982..370366e5ea3c7e906f948417d66c32de41b01aea 100644
--- a/src/TNL/Config/ConfigEntry.h
+++ b/src/TNL/Config/ConfigEntry.h
@@ -12,6 +12,7 @@
 
 #include <vector>
 
+#include <TNL/TypeInfo.h>
 #include <TNL/Config/ConfigEntryBase.h>
 
 namespace TNL {
diff --git a/src/TNL/Config/ConfigEntryList.h b/src/TNL/Config/ConfigEntryList.h
index 50284e37c1195916ab5c2ccfd3f72dd4d6ed7ed8..86f2642349ad470f2a8fd268ad117b1d3baf268a 100644
--- a/src/TNL/Config/ConfigEntryList.h
+++ b/src/TNL/Config/ConfigEntryList.h
@@ -12,6 +12,7 @@
 
 #include <vector>
 
+#include <TNL/TypeInfo.h>
 #include <TNL/Config/ConfigEntryBase.h>
 
 namespace TNL {
diff --git a/src/TNL/Config/ParameterContainer.h b/src/TNL/Config/ParameterContainer.h
index b298234d836188e316d93b87bcb0bf69e09afb1d..734db27f512a11124ef512b55ce578301d4c790c 100644
--- a/src/TNL/Config/ParameterContainer.h
+++ b/src/TNL/Config/ParameterContainer.h
@@ -12,9 +12,9 @@
 
 #include <vector>
 #include <memory>
-#include "make_unique.h"
 
-#include <TNL/param-types.h>
+#include <TNL/TypeInfo.h>
+#include <TNL/String.h>
 //#include <TNL/Debugging/StackBacktrace.h>
 
 namespace TNL {
@@ -63,7 +63,7 @@ public:
       parameters.push_back( std::make_unique< Parameter< T > >( name, TNL::getType< T >(), value ) );
       return true;
    }
-   
+
    /**
     * \brief Adds new parameter to the ParameterContainer.
     *
@@ -73,7 +73,7 @@ public:
     */
    template< class T >
    bool addList( const String& name,
-                      const T& value )
+                 const T& value )
    {
       std::vector< T > v;
       v.push_back( value );
diff --git a/src/TNL/Config/make_unique.h b/src/TNL/Config/make_unique.h
deleted file mode 100644
index 4a4078a028e2c1cb9feec357080d20bc3f05454b..0000000000000000000000000000000000000000
--- a/src/TNL/Config/make_unique.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#pragma once
-
-// std::make_unique does not exist until C++14
-// https://stackoverflow.com/a/9657991
-#if __cplusplus < 201402L
-#include <memory>
-
-namespace std {
-   template<typename T, typename ...Args>
-   std::unique_ptr<T> make_unique( Args&& ...args )
-   {
-      return std::unique_ptr<T>( new T( std::forward<Args>(args)... ) );
-   }
-}
-#endif
diff --git a/src/TNL/Config/parseCommandLine.h b/src/TNL/Config/parseCommandLine.h
index 34a555f2890ee6995e6ea464c00410f83ce1ccf3..8993de027b2d4149112a4d74098478d3bdee3268 100644
--- a/src/TNL/Config/parseCommandLine.h
+++ b/src/TNL/Config/parseCommandLine.h
@@ -13,7 +13,6 @@
 #include <cstring>
 #include <string>
 
-//#include <TNL/Object.h>
 #include <TNL/Config/ConfigDescription.h>
 #include <TNL/Config/ParameterContainer.h>
 
@@ -51,7 +50,7 @@ parseCommandLine( int argc, char* argv[],
 
    int i;
    bool parse_error( false );
-   for( i = 1; i < argc; i ++ )
+   for( i = 1; i < argc; i++ )
    {
       const char* _option = argv[ i ];
       if( _option[ 0 ] != '-' )
@@ -75,7 +74,7 @@ parseCommandLine( int argc, char* argv[],
       else
       {
          const String& entryType = entry->getEntryType();
-         const char* value = argv[ ++ i ];
+         const char* value = argv[ ++i ];
          if( ! value )
          {
             std::cerr << "Missing value for the parameter " << option << "." << std::endl;
@@ -97,11 +96,11 @@ parseCommandLine( int argc, char* argv[],
             while( i < argc && ( ( argv[ i ] )[ 0 ] != '-' || ( atof( argv[ i ] ) < 0.0 && ( parsedEntryType[ 1 ] == "int" || parsedEntryType[ 1 ] == "double" ) ) ) )
             {
                const char* value = argv[ i ++ ];
-               if( parsedEntryType[ 1 ] == "String" )
+               if( parsedEntryType[ 1 ] == "TNL::String" )
                {
                   string_list.push_back( String( value ) );
                }
-               if( parsedEntryType[ 1 ] == "bool" )
+               else if( parsedEntryType[ 1 ] == "bool" )
                {
                   const int v = matob( value );
                   if( v == -1 )
@@ -111,14 +110,19 @@ parseCommandLine( int argc, char* argv[],
                   }
                   else bool_list.push_back( v );
                }
-               if( parsedEntryType[ 1 ] == "int" )
+               else if( parsedEntryType[ 1 ] == "int" )
                {
                   integer_list.push_back( atoi( value ) );
                }
-               if( parsedEntryType[ 1 ] == "double" )
+               else if( parsedEntryType[ 1 ] == "double" )
                {
                   real_list.push_back( atof( value ) );
                }
+               else
+               {
+                  // this will not happen if all entry types are handled above
+                  throw std::runtime_error( "Function parseCommandLine encountered unsupported entry type: " + entryType );
+               }
             }
             if( string_list.size() )
                parameters.addParameter< std::vector< String > >( option, string_list );
@@ -133,14 +137,14 @@ parseCommandLine( int argc, char* argv[],
          }
          else
          {
-            if( parsedEntryType[ 0 ] == "String" )
+            if( parsedEntryType[ 0 ] == "TNL::String" )
             {
                if( ! ( ( ConfigEntry< String >* ) entry )->checkValue( value ) )
                   return false;
                 parameters.addParameter< String >( option, value );
                 continue;
             }
-            if( parsedEntryType[ 0 ] == "bool" )
+            else if( parsedEntryType[ 0 ] == "bool" )
             {
                const int v = matob( value );
                if( v == -1 )
@@ -151,7 +155,7 @@ parseCommandLine( int argc, char* argv[],
                else parameters.addParameter< bool >( option, v );
                continue;
             }
-            if( parsedEntryType[ 0 ] == "int" )
+            else if( parsedEntryType[ 0 ] == "int" )
             {
                /*if( ! std::isdigit( value ) ) //TODO: Check for real number
                {
@@ -163,7 +167,7 @@ parseCommandLine( int argc, char* argv[],
                   return false;
                parameters.addParameter< int >( option, atoi( value ) );
             }
-            if( parsedEntryType[ 0 ] == "double" )
+            else if( parsedEntryType[ 0 ] == "double" )
             {
                /*if( ! std::isdigit( value ) )  //TODO: Check for real number
                {
@@ -175,6 +179,11 @@ parseCommandLine( int argc, char* argv[],
                   return false;
                parameters.addParameter< double >( option, atof( value ) );
             }
+            else
+            {
+               // this will not happen if all entry types are handled above
+               throw std::runtime_error( "Function parseCommandLine encountered unsupported entry type: " + entryType );
+            }
          }
       }
    }
diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp b/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp
deleted file mode 100644
index b81fd7f2b7e5f0c11211ef8263da89d00cf243cf..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp
+++ /dev/null
@@ -1,333 +0,0 @@
-/***************************************************************************
-                          ArrayOperationsCuda.hpp  -  description
-                             -------------------
-    begin                : Jul 16, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <iostream>
-#include <memory>
-#include <stdexcept>
-
-#include <TNL/Math.h>
-#include <TNL/ParallelFor.h>
-#include <TNL/Exceptions/CudaSupportMissing.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
-
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-template< typename Element >
-void
-ArrayOperations< Devices::Cuda >::
-setElement( Element* data,
-            const Element& value )
-{
-   TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
-   ArrayOperations< Devices::Cuda >::set( data, value, 1 );
-}
-
-template< typename Element >
-Element
-ArrayOperations< Devices::Cuda >::
-getElement( const Element* data )
-{
-   TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." );
-   Element result;
-   ArrayOperations< Devices::Host, Devices::Cuda >::copy< Element, Element, int >( &result, data, 1 );
-   return result;
-}
-
-template< typename Element, typename Index >
-void
-ArrayOperations< Devices::Cuda >::
-set( Element* data,
-     const Element& value,
-     const Index size )
-{
-   if( size == 0 ) return;
-   TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
-   auto kernel = [data, value] __cuda_callable__ ( Index i )
-   {
-      data[ i ] = value;
-   };
-   ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel );
-}
-
-template< typename DestinationElement,
-          typename SourceElement,
-          typename Index >
-void
-ArrayOperations< Devices::Cuda >::
-copy( DestinationElement* destination,
-      const SourceElement* source,
-      const Index size )
-{
-   if( size == 0 ) return;
-   TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
-   TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
-   if( std::is_same< DestinationElement, SourceElement >::value )
-   {
-#ifdef HAVE_CUDA
-      cudaMemcpy( destination,
-                  source,
-                  size * sizeof( DestinationElement ),
-                  cudaMemcpyDeviceToDevice );
-      TNL_CHECK_CUDA_DEVICE;
-#else
-      throw Exceptions::CudaSupportMissing();
-#endif
-   }
-   else
-   {
-      auto kernel = [destination, source] __cuda_callable__ ( Index i )
-      {
-         destination[ i ] = source[ i ];
-      };
-      ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel );
-   }
-}
-
-template< typename DestinationElement,
-          typename Index,
-          typename SourceIterator >
-void
-ArrayOperations< Devices::Cuda >::
-copyFromIterator( DestinationElement* destination,
-                  Index destinationSize,
-                  SourceIterator first,
-                  SourceIterator last )
-{
-   using BaseType = typename std::remove_cv< DestinationElement >::type;
-   std::unique_ptr< BaseType[] > buffer{ new BaseType[ Devices::Cuda::getGPUTransferBufferSize() ] };
-   Index copiedElements = 0;
-   while( copiedElements < destinationSize && first != last ) {
-      Index i = 0;
-      while( i < Devices::Cuda::getGPUTransferBufferSize() && first != last )
-         buffer[ i++ ] = *first++;
-      ArrayOperations< Devices::Cuda, Devices::Host >::copy( &destination[ copiedElements ], buffer.get(), i );
-      copiedElements += i;
-   }
-   if( first != last )
-      throw std::length_error( "Source iterator is larger than the destination array." );
-}
-
-template< typename Element1,
-          typename Element2,
-          typename Index >
-bool
-ArrayOperations< Devices::Cuda >::
-compare( const Element1* destination,
-         const Element2* source,
-         const Index size )
-{
-   if( size == 0 ) return true;
-   TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
-   TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
-
-   auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return destination[ i ] == source[ i ]; };
-   return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true );
-}
-
-template< typename Element,
-          typename Index >
-bool
-ArrayOperations< Devices::Cuda >::
-containsValue( const Element* data,
-               const Index size,
-               const Element& value )
-{
-   if( size == 0 ) return false;
-   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
-   TNL_ASSERT_GE( size, (Index) 0, "" );
-
-   auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; };
-   return Reduction< Devices::Cuda >::reduce( size, std::logical_or<>{}, fetch, false );
-}
-
-template< typename Element,
-          typename Index >
-bool
-ArrayOperations< Devices::Cuda >::
-containsOnlyValue( const Element* data,
-                   const Index size,
-                   const Element& value )
-{
-   if( size == 0 ) return false;
-   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
-   TNL_ASSERT_GE( size, 0, "" );
-
-   auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; };
-   return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true );
-}
-
-
-/****
- * Operations CUDA -> Host
- */
-template< typename DestinationElement,
-          typename SourceElement,
-          typename Index >
-void
-ArrayOperations< Devices::Host, Devices::Cuda >::
-copy( DestinationElement* destination,
-      const SourceElement* source,
-      const Index size )
-{
-   if( size == 0 ) return;
-   TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
-   TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
-#ifdef HAVE_CUDA
-   if( std::is_same< DestinationElement, SourceElement >::value )
-   {
-      if( cudaMemcpy( destination,
-                      source,
-                      size * sizeof( DestinationElement ),
-                      cudaMemcpyDeviceToHost ) != cudaSuccess )
-         std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
-      TNL_CHECK_CUDA_DEVICE;
-   }
-   else
-   {
-      using BaseType = typename std::remove_cv< SourceElement >::type;
-      std::unique_ptr< BaseType[] > buffer{ new BaseType[ Devices::Cuda::getGPUTransferBufferSize() ] };
-      Index i( 0 );
-      while( i < size )
-      {
-         if( cudaMemcpy( (void*) buffer.get(),
-                         (void*) &source[ i ],
-                         TNL::min( size - i, Devices::Cuda::getGPUTransferBufferSize() ) * sizeof( SourceElement ),
-                         cudaMemcpyDeviceToHost ) != cudaSuccess )
-            std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
-         TNL_CHECK_CUDA_DEVICE;
-         Index j( 0 );
-         while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size )
-         {
-            destination[ i + j ] = buffer[ j ];
-            j++;
-         }
-         i += j;
-      }
-   }
-#else
-   throw Exceptions::CudaSupportMissing();
-#endif
-}
-
-
-template< typename Element1,
-          typename Element2,
-          typename Index >
-bool
-ArrayOperations< Devices::Host, Devices::Cuda >::
-compare( const Element1* destination,
-         const Element2* source,
-         const Index size )
-{
-   if( size == 0 ) return true;
-   /***
-    * Here, destination is on host and source is on CUDA device.
-    */
-   TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
-   TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
-   TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." );
-#ifdef HAVE_CUDA
-   std::unique_ptr< Element2[] > host_buffer{ new Element2[ Devices::Cuda::getGPUTransferBufferSize() ] };
-   Index compared( 0 );
-   while( compared < size )
-   {
-      Index transfer = min( size - compared, Devices::Cuda::getGPUTransferBufferSize() );
-      if( cudaMemcpy( (void*) host_buffer.get(),
-                      (void*) &source[ compared ],
-                      transfer * sizeof( Element2 ),
-                      cudaMemcpyDeviceToHost ) != cudaSuccess )
-         std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
-      TNL_CHECK_CUDA_DEVICE;
-      if( ! ArrayOperations< Devices::Host >::compare( &destination[ compared ], host_buffer.get(), transfer ) )
-         return false;
-      compared += transfer;
-   }
-   return true;
-#else
-   throw Exceptions::CudaSupportMissing();
-#endif
-}
-
-/****
- * Operations Host -> CUDA
- */
-template< typename DestinationElement,
-          typename SourceElement,
-          typename Index >
-void
-ArrayOperations< Devices::Cuda, Devices::Host >::
-copy( DestinationElement* destination,
-      const SourceElement* source,
-      const Index size )
-{
-   if( size == 0 ) return;
-   TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
-   TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
-   TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." );
-#ifdef HAVE_CUDA
-   if( std::is_same< DestinationElement, SourceElement >::value )
-   {
-      if( cudaMemcpy( destination,
-                      source,
-                      size * sizeof( DestinationElement ),
-                      cudaMemcpyHostToDevice ) != cudaSuccess )
-         std::cerr << "Transfer of data from host to CUDA device failed." << std::endl;
-      TNL_CHECK_CUDA_DEVICE;
-   }
-   else
-   {
-      std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ Devices::Cuda::getGPUTransferBufferSize() ] };
-      Index i( 0 );
-      while( i < size )
-      {
-         Index j( 0 );
-         while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size )
-         {
-            buffer[ j ] = source[ i + j ];
-            j++;
-         }
-         if( cudaMemcpy( (void*) &destination[ i ],
-                         (void*) buffer.get(),
-                         j * sizeof( DestinationElement ),
-                         cudaMemcpyHostToDevice ) != cudaSuccess )
-            std::cerr << "Transfer of data from host to CUDA device failed." << std::endl;
-         TNL_CHECK_CUDA_DEVICE;
-         i += j;
-      }
-   }
-#else
-   throw Exceptions::CudaSupportMissing();
-#endif
-}
-
-template< typename Element1,
-          typename Element2,
-          typename Index >
-bool
-ArrayOperations< Devices::Cuda, Devices::Host >::
-compare( const Element1* hostData,
-         const Element2* deviceData,
-         const Index size )
-{
-   if( size == 0 ) return true;
-   TNL_ASSERT_TRUE( hostData, "Attempted to compare data through a nullptr." );
-   TNL_ASSERT_TRUE( deviceData, "Attempted to compare data through a nullptr." );
-   TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." );
-   return ArrayOperations< Devices::Host, Devices::Cuda >::compare( deviceData, hostData, size );
-}
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsMIC.hpp b/src/TNL/Containers/Algorithms/ArrayOperationsMIC.hpp
deleted file mode 100644
index 4113bbcd90f0edce53d143cf65996a392c2a91b4..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/ArrayOperationsMIC.hpp
+++ /dev/null
@@ -1,429 +0,0 @@
-/***************************************************************************
-                          ArrayOperationsMIC_impl.h  -  description
-                             -------------------
-    begin                : Mar 4, 2017
-    copyright            : (C) 2017 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-// Implemented by: Vit Hanousek
-
-#pragma once
-
-#include <iostream>
-
-#include <TNL/Math.h>
-#include <TNL/Exceptions/MICSupportMissing.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
-#include <TNL/Exceptions/NotImplementedError.h>
-
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-static constexpr std::size_t MIC_STACK_VAR_LIM = 5*1024*1024;
-
-template< typename Element >
-void
-ArrayOperations< Devices::MIC >::
-setElement( Element* data,
-            const Element& value )
-{
-   TNL_ASSERT( data, );
-   ArrayOperations< Devices::MIC >::set( data, value, 1 );
-}
-
-template< typename Element >
-Element
-ArrayOperations< Devices::MIC >::
-getElement( const Element* data )
-{
-   TNL_ASSERT( data, );
-   Element result;
-   ArrayOperations< Devices::Host, Devices::MIC >::copy< Element, Element, int >( &result, data, 1 );
-   return result;
-}
-
-template< typename Element, typename Index >
-void
-ArrayOperations< Devices::MIC >::
-set( Element* data,
-     const Element& value,
-     const Index size )
-{
-   TNL_ASSERT( data, );
-#ifdef HAVE_MIC
-   Element tmp=value;
-   Devices::MICHider<Element> hide_ptr;
-   hide_ptr.pointer=data;
-   #pragma offload target(mic) in(hide_ptr,tmp,size)
-   {
-       Element * dst= hide_ptr.pointer;
-       for(int i=0;i<size;i++)
-           dst[i]=tmp;
-   }
-#else
-   throw Exceptions::MICSupportMissing();
-#endif
-}
-
-template< typename DestinationElement,
-          typename SourceElement,
-          typename Index >
-void
-ArrayOperations< Devices::MIC >::
-copy( DestinationElement* destination,
-      const SourceElement* source,
-      const Index size )
-{
-   TNL_ASSERT( destination, );
-   TNL_ASSERT( source, );
-   #ifdef HAVE_MIC
-      if( std::is_same< DestinationElement, SourceElement >::value )
-      {
-         Devices::MICHider<void> src_ptr;
-         src_ptr.pointer=(void*)source;
-         Devices::MICHider<void> dst_ptr;
-         dst_ptr.pointer=(void*)destination;
-         #pragma offload target(mic) in(src_ptr,dst_ptr,size)
-         {
-             memcpy(dst_ptr.pointer,src_ptr.pointer,size*sizeof(DestinationElement));
-         }
-      }
-      else
-      {
-         Devices::MICHider<const SourceElement> src_ptr;
-         src_ptr.pointer=source;
-         Devices::MICHider<DestinationElement> dst_ptr;
-         dst_ptr.pointer=destination;
-         #pragma offload target(mic) in(src_ptr,dst_ptr,size)
-         {
-             for(int i=0;i<size;i++)
-                 dst_ptr.pointer[i]=src_ptr.pointer[i];
-         }
-      }
-   #else
-      throw Exceptions::MICSupportMissing();
-   #endif
-}
-
-template< typename DestinationElement,
-          typename Index,
-          typename SourceIterator >
-void
-ArrayOperations< Devices::MIC >::
-copyFromIterator( DestinationElement* destination,
-                  Index destinationSize,
-                  SourceIterator first,
-                  SourceIterator last )
-{
-   throw Exceptions::NotImplementedError();
-}
-
-template< typename Element1,
-          typename Element2,
-          typename Index >
-bool
-ArrayOperations< Devices::MIC >::
-compare( const Element1* destination,
-         const Element2* source,
-         const Index size )
-{
-   TNL_ASSERT( destination, );
-   TNL_ASSERT( source, );
-#ifdef HAVE_MIC
-   if( std::is_same< Element1, Element2 >::value )
-   {
-      Devices::MICHider<void> src_ptr;
-      src_ptr.pointer=(void*)source;
-      Devices::MICHider<void> dst_ptr;
-      dst_ptr.pointer=(void*)destination;
-      int ret=0;
-      #pragma offload target(mic) in(src_ptr,dst_ptr,size) out(ret)
-      {
-          ret=memcmp(dst_ptr.pointer,src_ptr.pointer,size*sizeof(Element1));
-      }
-      if(ret==0)
-          return true;
-   }
-   else
-   {
-      Devices::MICHider<const Element2> src_ptr;
-      src_ptr.pointer=source;
-      Devices::MICHider<const Element1> dst_ptr;
-      dst_ptr.pointer=destination;
-      bool ret=false;
-      #pragma offload target(mic) in(src_ptr,dst_ptr,size) out(ret)
-      {
-          int i=0;
-          for(i=0;i<size;i++)
-              if(dst_ptr.pointer[i]!=src_ptr.pointer[i])
-                  break;
-          if(i==size)
-              ret=true;
-          else
-              ret=false;
-      }
-      return ret;
-   }
-   return false;
-#else
-   throw Exceptions::MICSupportMissing();
-#endif
-}
-
-template< typename Element,
-          typename Index >
-bool
-ArrayOperations< Devices::MIC >::
-containsValue( const Element* data,
-               const Index size,
-               const Element& value )
-{
-   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
-   TNL_ASSERT_GE( size, 0, "" );
-#ifdef HAVE_MIC
-   throw Exceptions::NotImplementedError();
-#else
-   throw Exceptions::MICSupportMissing();
-#endif
-}
-
-template< typename Element,
-          typename Index >
-bool
-ArrayOperations< Devices::MIC >::
-containsOnlyValue( const Element* data,
-                   const Index size,
-                   const Element& value )
-{
-   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
-   TNL_ASSERT_GE( size, 0, "" );
-#ifdef HAVE_MIC
-   throw Exceptions::NotImplementedError();
-#else
-   throw Exceptions::MICSupportMissing();
-#endif
-}
-
-
-
-/****
- * Operations MIC -> Host
- */
-
-template< typename DestinationElement,
-          typename SourceElement,
-          typename Index >
-void
-ArrayOperations< Devices::Host, Devices::MIC >::
-copy( DestinationElement* destination,
-      const SourceElement* source,
-      const Index size )
-{
-   TNL_ASSERT( destination, );
-   TNL_ASSERT( source, );
-#ifdef HAVE_MIC
-   if( std::is_same< DestinationElement, SourceElement >::value )
-   {
-      Devices::MICHider<void> src_ptr;
-      src_ptr.pointer=(void*)source;
-
-      //JAKA KONSTANTA se vejde do stacku 5MB?
-      if(size<MIC_STACK_VAR_LIM)
-      {
-         uint8_t tmp[size*sizeof(SourceElement)];
-
-         #pragma offload target(mic) in(src_ptr,size) out(tmp)
-         {
-              memcpy((void*)&tmp,src_ptr.pointer,size*sizeof(SourceElement));
-         }
-
-         memcpy((void*)destination,(void*)&tmp,size*sizeof(SourceElement));
-      }
-      else
-      {
-          //direct -- pomalejÅ¡Ã
-          uint8_t* tmp=(uint8_t*)destination;
-          #pragma offload target(mic) in(src_ptr,size) out(tmp:length(size))
-          {
-              memcpy((void*)tmp,src_ptr.pointer,size*sizeof(SourceElement));
-          }
-      }
-   }
-   else
-   {
-      Devices::MICHider<const SourceElement> src_ptr;
-      src_ptr.pointer=source;
-
-      if(size<MIC_STACK_VAR_LIM)
-      {
-         uint8_t tmp[size*sizeof(DestinationElement)];
-
-         #pragma offload target(mic) in(src_ptr,size) out(tmp)
-         {
-              DestinationElement *dst=(DestinationElement*)&tmp;
-              for(int i=0;i<size;i++)
-                  dst[i]=src_ptr.pointer[i];
-         }
-
-         memcpy((void*)destination,(void*)&tmp,size*sizeof(DestinationElement));
-      }
-      else
-      {
-          //direct pseudo heap-- pomalejÅ¡Ã
-          uint8_t* tmp=(uint8_t*)destination;
-          #pragma offload target(mic) in(src_ptr,size) out(tmp:length(size*sizeof(DestinationElement)))
-          {
-              DestinationElement *dst=(DestinationElement*)tmp;
-              for(int i=0;i<size;i++)
-                  dst[i]=src_ptr.pointer[i];
-          }
-      }
-   }
-#else
-   throw Exceptions::MICSupportMissing();
-#endif
-}
-
-
-template< typename Element1,
-          typename Element2,
-          typename Index >
-bool
-ArrayOperations< Devices::Host, Devices::MIC >::
-compare( const Element1* destination,
-         const Element2* source,
-         const Index size )
-{
-   /***
-    * Here, destination is on host and source is on MIC device.
-    */
-   TNL_ASSERT( destination, );
-   TNL_ASSERT( source, );
-   TNL_ASSERT( size >= 0, std::cerr << "size = " << size );
-#ifdef HAVE_MIC
-   Index compared( 0 );
-   Index transfer( 0 );
-   std::size_t max_transfer=MIC_STACK_VAR_LIM/sizeof(Element2);
-   uint8_t host_buffer[max_transfer*sizeof(Element2)];
-
-   Devices::MICHider<const Element2> src_ptr;
-
-   while( compared < size )
-   {
-     transfer=min(size-compared,max_transfer);
-     src_ptr.pointer=source+compared;
-     #pragma offload target(mic) out(host_buffer) in(src_ptr,transfer)
-     {
-         memcpy((void*)&host_buffer,(void*)src_ptr.pointer,transfer*sizeof(Element2));
-     }
-     if( ! ArrayOperations< Devices::Host >::compare( &destination[ compared ], (Element2*)&host_buffer, transfer ) )
-     {
-        return false;
-     }
-     compared += transfer;
-   }
-   return true;
-#else
-   throw Exceptions::MICSupportMissing();
-#endif
-}
-
-/****
- * Operations Host -> MIC
- */
-template< typename DestinationElement,
-          typename SourceElement,
-          typename Index >
-void
-ArrayOperations< Devices::MIC, Devices::Host >::
-copy( DestinationElement* destination,
-      const SourceElement* source,
-      const Index size )
-{
-   TNL_ASSERT( destination, );
-   TNL_ASSERT( source, );
-   TNL_ASSERT( size >= 0, std::cerr << "size = " << size );
-#ifdef HAVE_MIC
-   if( std::is_same< DestinationElement, SourceElement >::value )
-   {
-      Devices::MICHider<void> dst_ptr;
-      dst_ptr.pointer=(void*)destination;
-
-      //JAKA KONSTANTA se vejde do stacku 5MB?
-      if(size<MIC_STACK_VAR_LIM)
-      {
-         uint8_t tmp[size*sizeof(SourceElement)];
-         memcpy((void*)&tmp,(void*)source,size*sizeof(SourceElement));
-
-         #pragma offload target(mic) in(dst_ptr,tmp,size)
-         {
-              memcpy(dst_ptr.pointer,(void*)&tmp,size*sizeof(SourceElement));
-         }
-      }
-      else
-      {
-          //direct pseudo heap-- pomalejÅ¡Ã
-          uint8_t* tmp=(uint8_t*)source;
-          #pragma offload target(mic) in(dst_ptr,size) in(tmp:length(size))
-          {
-              memcpy(dst_ptr.pointer,(void*)tmp,size*sizeof(SourceElement));
-          }
-      }
-   }
-   else
-   {
-      Devices::MICHider<DestinationElement> dst_ptr;
-      dst_ptr.pointer=destination;
-
-      if(size<MIC_STACK_VAR_LIM)
-      {
-         uint8_t tmp[size*sizeof(SourceElement)];
-         memcpy((void*)&tmp,(void*)source,size*sizeof(SourceElement));
-
-         #pragma offload target(mic) in(dst_ptr,size,tmp)
-         {
-              SourceElement *src=(SourceElement*)&tmp;
-              for(int i=0;i<size;i++)
-                  dst_ptr.pointer[i]=src[i];
-         }
-      }
-      else
-      {
-          //direct pseudo heap-- pomalejÅ¡Ã
-          uint8_t* tmp=(uint8_t*)source;
-          #pragma offload target(mic) in(dst_ptr,size) in(tmp:length(size*sizeof(SourceElement)))
-          {
-              SourceElement *src=(SourceElement*)tmp;
-              for(int i=0;i<size;i++)
-                  dst_ptr.pointer[i]=src[i];
-          }
-      }
-   }
-#else
-   throw Exceptions::MICSupportMissing();
-#endif
-}
-
-template< typename Element1,
-          typename Element2,
-          typename Index >
-bool
-ArrayOperations< Devices::MIC, Devices::Host >::
-compare( const Element1* hostData,
-         const Element2* deviceData,
-         const Index size )
-{
-   TNL_ASSERT( hostData, );
-   TNL_ASSERT( deviceData, );
-   TNL_ASSERT( size >= 0, std::cerr << "size = " << size );
-   return ArrayOperations< Devices::Host, Devices::MIC >::compare( deviceData, hostData, size );
-}
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsStatic.hpp b/src/TNL/Containers/Algorithms/ArrayOperationsStatic.hpp
deleted file mode 100644
index d84933bde61f61c10108c47e13ba994b3b1709b4..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/ArrayOperationsStatic.hpp
+++ /dev/null
@@ -1,82 +0,0 @@
-/***************************************************************************
-                          ArrayOperationsStatic.hpp  -  description
-                             -------------------
-    begin                : Apr 8, 2019
-    copyright            : (C) 2019 by Tomas Oberhuber et al.
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
-
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-template< typename Element >
-__cuda_callable__
-void
-ArrayOperations< void >::
-setElement( Element* data,
-            const Element& value )
-{
-   *data = value;
-}
-
-template< typename Element >
-__cuda_callable__
-Element
-ArrayOperations< void >::
-getElement( const Element* data )
-{
-   return *data;
-}
-
-template< typename Element, typename Index >
-__cuda_callable__
-void
-ArrayOperations< void >::
-set( Element* data,
-     const Element& value,
-     const Index size )
-{
-   for( Index i = 0; i < size; i ++ )
-      data[ i ] = value;
-}
-
-template< typename DestinationElement,
-          typename SourceElement,
-          typename Index >
-__cuda_callable__
-void
-ArrayOperations< void >::
-copy( DestinationElement* destination,
-      const SourceElement* source,
-      const Index size )
-{
-   for( Index i = 0; i < size; i ++ )
-      destination[ i ] = source[ i ];
-}
-
-template< typename Element1,
-          typename Element2,
-          typename Index >
-__cuda_callable__
-bool
-ArrayOperations< void >::
-compare( const Element1* destination,
-         const Element2* source,
-         const Index size )
-{
-   for( Index i = 0; i < size; i++ )
-      if( ! ( destination[ i ] == source[ i ] ) )
-         return false;
-   return true;
-}
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Array.h b/src/TNL/Containers/Array.h
index d9a5e56d2291a1b8b9e6a13f06121c0203e859a0..45ef1e272e8affa96e6a77b5b9e74cec8a59b447 100644
--- a/src/TNL/Containers/Array.h
+++ b/src/TNL/Containers/Array.h
@@ -73,7 +73,6 @@ template< typename Value,
 class Array
 {
    public:
-
       /**
        * \brief Type of elements stored in this array.
        */
@@ -98,16 +97,6 @@ class Array
        */
       using AllocatorType = Allocator;
 
-      /**
-       * \brief Defines the same array type but allocated on host (CPU).
-       */
-      using HostType = Array< Value, TNL::Devices::Host, Index >;
-
-      /**
-       * \brief Defines the same array type but allocated on CUDA device (GPU).
-       */
-      using CudaType = Array< Value, TNL::Devices::Cuda, Index >;
-
       /**
        * \brief Compatible ArrayView type.
        */
@@ -118,6 +107,15 @@ class Array
        */
       using ConstViewType = ArrayView< std::add_const_t< Value >, Device, Index >;
 
+      /**
+       * \brief A template which allows to quickly obtain an \ref Array type with changed template parameters.
+       */
+      template< typename _Value,
+                typename _Device = Device,
+                typename _Index = Index,
+                typename _Allocator = typename Allocators::Default< _Device >::template Allocator< _Value > >
+      using Self = Array< _Value, _Device, _Index, _Allocator >;
+
 
       /**
        * \brief Constructs an empty array with zero size.
@@ -226,25 +224,15 @@ class Array
        */
       AllocatorType getAllocator() const;
 
-      /**
-       * \brief Returns a \ref String representation of the array type in C++ style.
-       */
-      static String getType();
-
-      /**
-       * \brief Returns a \ref String representation of the array type in C++ style.
-       */
-      virtual String getTypeVirtual() const;
-
       /**
        * \brief Returns a \ref String representation of the array type in C++ style,
-       * where device is always \ref Devices::Host.
+       * with a placeholder in place of \e Device and \e Allocator.
        */
       static String getSerializationType();
 
       /**
        * \brief Returns a \ref String representation of the array type in C++ style,
-       * where device is always \ref Devices::Host.
+       * with a placeholder in place of \e Device and \e Allocator.
        */
       virtual String getSerializationTypeVirtual() const;
 
diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp
index 4a9c484a4c19d84d30ef5d6760fad0362b64d42b..24e3f8b43a024c8c8c3b87213a31886c595caceb 100644
--- a/src/TNL/Containers/Array.hpp
+++ b/src/TNL/Containers/Array.hpp
@@ -15,10 +15,9 @@
 
 #include <TNL/Assert.h>
 #include <TNL/Math.h>
-#include <TNL/param-types.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
-#include <TNL/Containers/Algorithms/ArrayIO.h>
-#include <TNL/Containers/Algorithms/ArrayAssignment.h>
+#include <TNL/TypeInfo.h>
+#include <TNL/Containers/detail/ArrayIO.h>
+#include <TNL/Containers/detail/ArrayAssignment.h>
 
 #include "Array.h"
 
@@ -74,7 +73,7 @@ Array( Value* data,
 : allocator( allocator )
 {
    this->setSize( size );
-   Algorithms::ArrayOperations< Device >::copy( this->getData(), data, size );
+   Algorithms::MemoryOperations< Device >::copy( this->getData(), data, size );
 }
 
 template< typename Value,
@@ -85,7 +84,7 @@ Array< Value, Device, Index, Allocator >::
 Array( const Array< Value, Device, Index, Allocator >& array )
 {
    this->setSize( array.getSize() );
-   Algorithms::ArrayOperations< Device >::copy( this->getData(), array.getData(), array.getSize() );
+   Algorithms::MemoryOperations< Device >::copy( this->getData(), array.getData(), array.getSize() );
 }
 
 template< typename Value,
@@ -98,7 +97,7 @@ Array( const Array< Value, Device, Index, Allocator >& array,
 : allocator( allocator )
 {
    this->setSize( array.getSize() );
-   Algorithms::ArrayOperations< Device >::copy( this->getData(), array.getData(), array.getSize() );
+   Algorithms::MemoryOperations< Device >::copy( this->getData(), array.getData(), array.getSize() );
 }
 
 template< typename Value,
@@ -118,7 +117,7 @@ Array( const Array< Value, Device, Index, Allocator >& array,
    TNL_ASSERT_LE( begin + size, array.getSize(), "End of array is out of bounds." );
 
    this->setSize( size );
-   Algorithms::ArrayOperations< Device >::copy( this->getData(), &array.getData()[ begin ], size );
+   Algorithms::MemoryOperations< Device >::copy( this->getData(), &array.getData()[ begin ], size );
 }
 
 template< typename Value,
@@ -135,7 +134,7 @@ Array( const std::initializer_list< InValue >& list,
    // Here we assume that the underlying array for std::initializer_list is
    // const T[N] as noted here:
    // https://en.cppreference.com/w/cpp/utility/initializer_list
-   Algorithms::ArrayOperations< Device, Devices::Host >::copy( this->getData(), &( *list.begin() ), list.size() );
+   Algorithms::MultiDeviceMemoryOperations< Device, Devices::Host >::copy( this->getData(), &( *list.begin() ), list.size() );
 }
 
 template< typename Value,
@@ -149,7 +148,7 @@ Array( const std::list< InValue >& list,
 : allocator( allocator )
 {
    this->setSize( list.size() );
-   Algorithms::ArrayOperations< Device >::copyFromIterator( this->getData(), this->getSize(), list.cbegin(), list.cend() );
+   Algorithms::MemoryOperations< Device >::copyFromIterator( this->getData(), this->getSize(), list.cbegin(), list.cend() );
 }
 
 template< typename Value,
@@ -163,7 +162,7 @@ Array( const std::vector< InValue >& vector,
 : allocator( allocator )
 {
    this->setSize( vector.size() );
-   Algorithms::ArrayOperations< Device, Devices::Host >::copy( this->getData(), vector.data(), vector.size() );
+   Algorithms::MultiDeviceMemoryOperations< Device, Devices::Host >::copy( this->getData(), vector.data(), vector.size() );
 }
 
 template< typename Value,
@@ -177,31 +176,6 @@ getAllocator() const
    return allocator;
 }
 
-template< typename Value,
-          typename Device,
-          typename Index,
-          typename Allocator >
-String
-Array< Value, Device, Index, Allocator >::
-getType()
-{
-   return String( "Containers::Array< " ) +
-          TNL::getType< Value >() + ", " +
-          Device::getDeviceType() + ", " +
-          TNL::getType< Index >() + " >";
-}
-
-template< typename Value,
-          typename Device,
-          typename Index,
-          typename Allocator >
-String
-Array< Value, Device, Index, Allocator >::
-getTypeVirtual() const
-{
-   return this->getType();
-}
-
 template< typename Value,
           typename Device,
           typename Index,
@@ -210,7 +184,7 @@ String
 Array< Value, Device, Index, Allocator >::
 getSerializationType()
 {
-   return Algorithms::ArrayIO< Value, Device, Index >::getSerializationType();
+   return detail::ArrayIO< Value, Device, Index >::getSerializationType();
 }
 
 template< typename Value,
@@ -510,7 +484,7 @@ setElement( const Index& i, const Value& x )
 {
    TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
-   return Algorithms::ArrayOperations< Device >::setElement( &( this->data[ i ] ), x );
+   return Algorithms::MemoryOperations< Device >::setElement( &( this->data[ i ] ), x );
 }
 
 template< typename Value,
@@ -523,7 +497,7 @@ getElement( const Index& i ) const
 {
    TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
-   return Algorithms::ArrayOperations< Device >::getElement( & ( this->data[ i ] ) );
+   return Algorithms::MemoryOperations< Device >::getElement( & ( this->data[ i ] ) );
 }
 
 template< typename Value,
@@ -566,7 +540,7 @@ operator=( const Array< Value, Device, Index, Allocator >& array )
    if( this->getSize() != array.getSize() )
       this->setLike( array );
    if( this->getSize() > 0 )
-      Algorithms::ArrayOperations< Device >::
+      Algorithms::MemoryOperations< Device >::
          copy( this->getData(),
                      array.getData(),
                      array.getSize() );
@@ -605,8 +579,8 @@ Array< Value, Device, Index, Allocator >&
 Array< Value, Device, Index, Allocator >::
 operator=( const T& data )
 {
-   Algorithms::ArrayAssignment< Array, T >::resize( *this, data );
-   Algorithms::ArrayAssignment< Array, T >::assign( *this, data );
+   detail::ArrayAssignment< Array, T >::resize( *this, data );
+   detail::ArrayAssignment< Array, T >::assign( *this, data );
    return *this;
 }
 
@@ -620,7 +594,7 @@ Array< Value, Device, Index, Allocator >::
 operator=( const std::list< InValue >& list )
 {
    this->setSize( list.size() );
-   Algorithms::ArrayOperations< Device >::copyFromIterator( this->getData(), this->getSize(), list.cbegin(), list.cend() );
+   Algorithms::MemoryOperations< Device >::copyFromIterator( this->getData(), this->getSize(), list.cbegin(), list.cend() );
    return *this;
 }
 
@@ -635,7 +609,7 @@ operator=( const std::vector< InValue >& vector )
 {
    if( (std::size_t) this->getSize() != vector.size() )
       this->setSize( vector.size() );
-   Algorithms::ArrayOperations< Device, Devices::Host >::copy( this->getData(), vector.data(), vector.size() );
+   Algorithms::MultiDeviceMemoryOperations< Device, Devices::Host >::copy( this->getData(), vector.data(), vector.size() );
    return *this;
 }
 
@@ -652,7 +626,7 @@ operator==( const ArrayT& array ) const
       return false;
    if( this->getSize() == 0 )
       return true;
-   return Algorithms::ArrayOperations< Device, typename ArrayT::DeviceType >::
+   return Algorithms::MultiDeviceMemoryOperations< Device, typename ArrayT::DeviceType >::
             compare( this->getData(),
                            array.getData(),
                            array.getSize() );
@@ -683,7 +657,7 @@ setValue( const ValueType& v,
    TNL_ASSERT_TRUE( this->getData(), "Attempted to set a value of an empty array." );
    if( end == 0 )
       end = this->getSize();
-   Algorithms::ArrayOperations< Device >::set( &this->getData()[ begin ], v, end - begin );
+   Algorithms::MemoryOperations< Device >::set( &this->getData()[ begin ], v, end - begin );
 }
 
 template< typename Value,
@@ -715,7 +689,7 @@ containsValue( const ValueType& v,
    if( end == 0 )
       end = this->getSize();
 
-   return Algorithms::ArrayOperations< Device >::containsValue( &this->getData()[ begin ], end - begin, v );
+   return Algorithms::MemoryOperations< Device >::containsValue( &this->getData()[ begin ], end - begin, v );
 }
 
 template< typename Value,
@@ -732,7 +706,7 @@ containsOnlyValue( const ValueType& v,
    if( end == 0 )
       end = this->getSize();
 
-   return Algorithms::ArrayOperations< Device >::containsOnlyValue( &this->getData()[ begin ], end - begin, v );
+   return Algorithms::MemoryOperations< Device >::containsOnlyValue( &this->getData()[ begin ], end - begin, v );
 }
 
 template< typename Value,
@@ -785,7 +759,7 @@ std::ostream& operator<<( std::ostream& str, const Array< Value, Device, Index,
 template< typename Value, typename Device, typename Index, typename Allocator >
 File& operator<<( File& file, const Array< Value, Device, Index, Allocator >& array )
 {
-   using IO = Algorithms::ArrayIO< Value, Device, Index >;
+   using IO = detail::ArrayIO< Value, Index, Allocator >;
    saveObjectType( file, IO::getSerializationType() );
    const Index size = array.getSize();
    file.save( &size );
@@ -804,7 +778,7 @@ File& operator<<( File&& file, const Array< Value, Device, Index, Allocator >& a
 template< typename Value, typename Device, typename Index, typename Allocator >
 File& operator>>( File& file, Array< Value, Device, Index, Allocator >& array )
 {
-   using IO = Algorithms::ArrayIO< Value, Device, Index >;
+   using IO = detail::ArrayIO< Value, Index, Allocator >;
    const String type = getObjectType( file );
    if( type != IO::getSerializationType() )
       throw Exceptions::FileDeserializationError( file.getFileName(), "object type does not match (expected " + IO::getSerializationType() + ", found " + type + ")." );
diff --git a/src/TNL/Containers/ArrayView.h b/src/TNL/Containers/ArrayView.h
index 066ada8f248baf20e94c3cf9c1f91e412ed9acc2..d51f151f772f3828dc7ad27ca13041d01730ce76 100644
--- a/src/TNL/Containers/ArrayView.h
+++ b/src/TNL/Containers/ArrayView.h
@@ -80,16 +80,6 @@ public:
     */
    using IndexType = Index;
 
-   /**
-    * \brief Defines the same array type but allocated on host (CPU).
-    */
-   using HostType = ArrayView< Value, TNL::Devices::Host, Index >;
-
-   /**
-    * \brief Defines the same array type but allocated on CUDA device (GPU).
-    */
-   using CudaType = ArrayView< Value, TNL::Devices::Cuda, Index >;
-
    /**
     * \brief Compatible ArrayView type.
     */
@@ -101,9 +91,13 @@ public:
    using ConstViewType = ArrayView< std::add_const_t< Value >, Device, Index >;
 
    /**
-    * \brief Returns a \ref String representation of the array view type.
+    * \brief A template which allows to quickly obtain an \ref ArrayView type with changed template parameters.
     */
-   static String getType();
+   template< typename _Value,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = ArrayView< _Value, _Device, _Index >;
+
 
    /**
     * \brief Constructs an empty array view.
diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp
index ea7882516cd13647e66e971dbbec11179c1ea520..c3c39bc10be8dd846331d1086fc1d22b42b8c6c7 100644
--- a/src/TNL/Containers/ArrayView.hpp
+++ b/src/TNL/Containers/ArrayView.hpp
@@ -13,30 +13,19 @@
 #include <iostream>
 #include <stdexcept>
 
-#include <TNL/param-types.h>
-#include <TNL/ParallelFor.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
-#include <TNL/Containers/Algorithms/ArrayIO.h>
-#include <TNL/Containers/Algorithms/ArrayAssignment.h>
+#include <TNL/TypeInfo.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Algorithms/MemoryOperations.h>
+#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
+#include <TNL/Containers/detail/ArrayIO.h>
+#include <TNL/Containers/detail/ArrayAssignment.h>
+#include <TNL/Allocators/Default.h>
 
 #include "ArrayView.h"
 
 namespace TNL {
 namespace Containers {
 
-template< typename Value,
-          typename Device,
-          typename Index >
-String
-ArrayView< Value, Device, Index >::
-getType()
-{
-   return String( "Containers::ArrayView< " ) + ", " +
-                  TNL::getType< Value >() + ", " +
-                  Device::getDeviceType() + ", " +
-                  TNL::getType< Index >() + " >";
-}
-
 // explicit initialization by raw data pointer and size
 template< typename Value,
           typename Device,
@@ -113,7 +102,7 @@ operator=( const ArrayView& view )
 {
    TNL_ASSERT_EQ( getSize(), view.getSize(), "The sizes of the array views must be equal, views are not resizable." );
    if( getSize() > 0 )
-      Algorithms::ArrayOperations< Device >::copy( getData(), view.getData(), getSize() );
+      Algorithms::MemoryOperations< Device >::copy( getData(), view.getData(), getSize() );
    return *this;
 }
 
@@ -125,7 +114,7 @@ ArrayView< Value, Device, Index >&
 ArrayView< Value, Device, Index >::
 operator=( const T& data )
 {
-   Algorithms::ArrayAssignment< ArrayView, T >::assign( *this, data );
+   detail::ArrayAssignment< ArrayView, T >::assign( *this, data );
    return *this;
 }
 
@@ -228,7 +217,7 @@ setElement( Index i, Value value )
 {
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
-   return Algorithms::ArrayOperations< Device >::setElement( &data[ i ], value );
+   return Algorithms::MemoryOperations< Device >::setElement( &data[ i ], value );
 }
 
 template< typename Value,
@@ -240,7 +229,7 @@ getElement( Index i ) const
 {
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
-   return Algorithms::ArrayOperations< Device >::getElement( &data[ i ] );
+   return Algorithms::MemoryOperations< Device >::getElement( &data[ i ] );
 }
 
 template< typename Value,
@@ -280,7 +269,7 @@ operator==( const ArrayT& array ) const
       return false;
    if( this->getSize() == 0 )
       return true;
-   return Algorithms::ArrayOperations< DeviceType, typename ArrayT::DeviceType >::
+   return Algorithms::MultiDeviceMemoryOperations< DeviceType, typename ArrayT::DeviceType >::
             compare( this->getData(),
                            array.getData(),
                            array.getSize() );
@@ -307,7 +296,7 @@ setValue( Value value, const Index begin, Index end )
    TNL_ASSERT_GT( size, 0, "Attempted to set value to an empty array view." );
    if( end == 0 )
       end = this->getSize();
-   Algorithms::ArrayOperations< Device >::set( &getData()[ begin ], value, end - begin );
+   Algorithms::MemoryOperations< Device >::set( &getData()[ begin ], value, end - begin );
 }
 
 template< typename Value,
@@ -328,7 +317,7 @@ evaluate( const Function& f, const Index begin, Index end )
    if( end == 0 )
       end = this->getSize();
 
-   ParallelFor< DeviceType >::exec( begin, end, eval );
+   Algorithms::ParallelFor< DeviceType >::exec( begin, end, eval );
 }
 
 template< typename Value,
@@ -342,7 +331,7 @@ containsValue( Value value,
 {
    if( end == 0 )
       end = this->getSize();
-   return Algorithms::ArrayOperations< Device >::containsValue( &this->getData()[ begin ], end - begin, value );
+   return Algorithms::MemoryOperations< Device >::containsValue( &this->getData()[ begin ], end - begin, value );
 }
 
 template< typename Value,
@@ -356,7 +345,7 @@ containsOnlyValue( Value value,
 {
    if( end == 0 )
       end = this->getSize();
-   return Algorithms::ArrayOperations< Device >::containsOnlyValue( &this->getData()[ begin ], end - begin, value );
+   return Algorithms::MemoryOperations< Device >::containsOnlyValue( &this->getData()[ begin ], end - begin, value );
 }
 
 template< typename Value, typename Device, typename Index >
@@ -395,7 +384,7 @@ load( const String& fileName )
 template< typename Value, typename Device, typename Index >
 File& operator<<( File& file, const ArrayView< Value, Device, Index > view )
 {
-   using IO = Algorithms::ArrayIO< Value, Device, Index >;
+   using IO = detail::ArrayIO< Value, Index, typename Allocators::Default< Device >::template Allocator< Value > >;
    saveObjectType( file, IO::getSerializationType() );
    const Index size = view.getSize();
    file.save( &size );
@@ -414,7 +403,7 @@ File& operator<<( File&& file, const ArrayView< Value, Device, Index > view )
 template< typename Value, typename Device, typename Index >
 File& operator>>( File& file, ArrayView< Value, Device, Index > view )
 {
-   using IO = Algorithms::ArrayIO< Value, Device, Index >;
+   using IO = detail::ArrayIO< Value, Index, typename Allocators::Default< Device >::template Allocator< Value > >;
    const String type = getObjectType( file );
    if( type != IO::getSerializationType() )
       throw Exceptions::FileDeserializationError( file.getFileName(), "object type does not match (expected " + IO::getSerializationType() + ", found " + type + ")." );
diff --git a/src/TNL/Containers/DistributedArray.h b/src/TNL/Containers/DistributedArray.h
index 7f53c724a337ba9052520c250fbafa12c613c5f6..ce4e9ce5e15c6c110c117e984dc5a2e8ee26da67 100644
--- a/src/TNL/Containers/DistributedArray.h
+++ b/src/TNL/Containers/DistributedArray.h
@@ -35,11 +35,19 @@ public:
    using LocalRangeType = Subrange< Index >;
    using LocalViewType = Containers::ArrayView< Value, Device, Index >;
    using ConstLocalViewType = Containers::ArrayView< std::add_const_t< Value >, Device, Index >;
-   using HostType = DistributedArray< Value, Devices::Host, Index, Communicator >;
-   using CudaType = DistributedArray< Value, Devices::Cuda, Index, Communicator >;
    using ViewType = DistributedArrayView< Value, Device, Index, Communicator >;
    using ConstViewType = DistributedArrayView< std::add_const_t< Value >, Device, Index, Communicator >;
 
+   /**
+    * \brief A template which allows to quickly obtain a \ref DistributedArray type with changed template parameters.
+    */
+   template< typename _Value,
+             typename _Device = Device,
+             typename _Index = Index,
+             typename _Communicator = Communicator >
+   using Self = DistributedArray< _Value, _Device, _Index, _Communicator >;
+
+
    DistributedArray() = default;
 
    DistributedArray( DistributedArray& ) = default;
@@ -83,13 +91,6 @@ public:
    void copyFromGlobal( ConstLocalViewType globalArray );
 
 
-   static String getType();
-
-   virtual String getTypeVirtual() const;
-
-   // TODO: no getSerializationType method until there is support for serialization
-
-
    // Usual Array methods follow below.
 
    /**
diff --git a/src/TNL/Containers/DistributedArray.hpp b/src/TNL/Containers/DistributedArray.hpp
index b8c65552785ba857563436d89ba6611a0e777b6f..c146bbf9f8657e6af5f38a8506d9c944a539c57a 100644
--- a/src/TNL/Containers/DistributedArray.hpp
+++ b/src/TNL/Containers/DistributedArray.hpp
@@ -14,7 +14,7 @@
 
 #include "DistributedArray.h"
 
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Communicators/MpiDefs.h>  // important only when MPI is disabled
 
 namespace TNL {
@@ -110,7 +110,7 @@ copyFromGlobal( ConstLocalViewType globalArray )
       localView[ i ] = globalArray[ localRange.getGlobalIndex( i ) ];
    };
 
-   ParallelFor< DeviceType >::exec( (IndexType) 0, localRange.getSize(), kernel );
+   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, localRange.getSize(), kernel );
 }
 
 
@@ -160,33 +160,6 @@ operator ConstViewType() const
    return getConstView();
 }
 
-template< typename Value,
-          typename Device,
-          typename Index,
-          typename Communicator >
-String
-DistributedArray< Value, Device, Index, Communicator >::
-getType()
-{
-   return String( "Containers::DistributedArray< " ) +
-          TNL::getType< Value >() + ", " +
-          Device::getDeviceType() + ", " +
-          TNL::getType< Index >() + ", " +
-          // TODO: communicators don't have a getType method
-          "<Communicator> >";
-}
-
-template< typename Value,
-          typename Device,
-          typename Index,
-          typename Communicator >
-String
-DistributedArray< Value, Device, Index, Communicator >::
-getTypeVirtual() const
-{
-   return getType();
-}
-
 template< typename Value,
           typename Device,
           typename Index,
diff --git a/src/TNL/Containers/DistributedArrayView.h b/src/TNL/Containers/DistributedArrayView.h
index 82a662e39cebecc50735444888ba4c065a5a4287..6022521bc66bc41df4d144a5651a934a52c5e158 100644
--- a/src/TNL/Containers/DistributedArrayView.h
+++ b/src/TNL/Containers/DistributedArrayView.h
@@ -34,11 +34,19 @@ public:
    using LocalRangeType = Subrange< Index >;
    using LocalViewType = Containers::ArrayView< Value, Device, Index >;
    using ConstLocalViewType = Containers::ArrayView< std::add_const_t< Value >, Device, Index >;
-   using HostType = DistributedArrayView< Value, Devices::Host, Index, Communicator >;
-   using CudaType = DistributedArrayView< Value, Devices::Cuda, Index, Communicator >;
    using ViewType = DistributedArrayView< Value, Device, Index, Communicator >;
    using ConstViewType = DistributedArrayView< std::add_const_t< Value >, Device, Index, Communicator >;
 
+   /**
+    * \brief A template which allows to quickly obtain a \ref DistributedArrayView type with changed template parameters.
+    */
+   template< typename _Value,
+             typename _Device = Device,
+             typename _Index = Index,
+             typename _Communicator = Communicator >
+   using Self = DistributedArrayView< _Value, _Device, _Index, _Communicator >;
+
+
    // Initialization by raw data
    __cuda_callable__
    DistributedArrayView( const LocalRangeType& localRange, IndexType globalSize, CommunicationGroup group, LocalViewType localData )
@@ -108,9 +116,6 @@ public:
    void copyFromGlobal( ConstLocalViewType globalArray );
 
 
-   static String getType();
-
-
    /*
     * Usual ArrayView methods follow below.
     */
diff --git a/src/TNL/Containers/DistributedArrayView.hpp b/src/TNL/Containers/DistributedArrayView.hpp
index 5cb9c10ed0e5ce4155f9b48f20226baa9106e1a1..0199229d48cab585b78d6618437d9fbcf275092a 100644
--- a/src/TNL/Containers/DistributedArrayView.hpp
+++ b/src/TNL/Containers/DistributedArrayView.hpp
@@ -180,24 +180,7 @@ copyFromGlobal( ConstLocalViewType globalArray )
       localView[ i ] = globalArray[ localRange.getGlobalIndex( i ) ];
    };
 
-   ParallelFor< DeviceType >::exec( (IndexType) 0, localRange.getSize(), kernel );
-}
-
-
-template< typename Value,
-          typename Device,
-          typename Index,
-          typename Communicator >
-String
-DistributedArrayView< Value, Device, Index, Communicator >::
-getType()
-{
-   return String( "Containers::DistributedArrayView< " ) +
-          TNL::getType< Value >() + ", " +
-          Device::getDeviceType() + ", " +
-          TNL::getType< Index >() + ", " +
-          // TODO: communicators don't have a getType method
-          "<Communicator> >";
+   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, localRange.getSize(), kernel );
 }
 
 
diff --git a/src/TNL/Containers/DistributedNDArray.h b/src/TNL/Containers/DistributedNDArray.h
index 4b123d114faa37e9022d7b5caab6f9c7124c2263..57b94a34b1bd7c210d24462aa1859cc68f087f15 100644
--- a/src/TNL/Containers/DistributedNDArray.h
+++ b/src/TNL/Containers/DistributedNDArray.h
@@ -392,7 +392,7 @@ public:
    void allocate()
    {
       SizesHolderType localSizes;
-      TemplateStaticFor< std::size_t, 0, SizesHolderType::getDimension(), LocalSizesSetter >::execHost( localSizes, globalSizes, localBegins, localEnds );
+      Algorithms::TemplateStaticFor< std::size_t, 0, SizesHolderType::getDimension(), LocalSizesSetter >::execHost( localSizes, globalSizes, localBegins, localEnds );
       localArray.setSize( localSizes );
    }
 
diff --git a/src/TNL/Containers/DistributedNDArraySynchronizer.h b/src/TNL/Containers/DistributedNDArraySynchronizer.h
index e6e41ba3338010779a1b110af90e198cdae617aa..6985303785f78e931303d2bddfa51407f4cc7ebc 100644
--- a/src/TNL/Containers/DistributedNDArraySynchronizer.h
+++ b/src/TNL/Containers/DistributedNDArraySynchronizer.h
@@ -51,7 +51,7 @@ public:
          array_view.bind( array.getView() );
 
          // allocate buffers
-         TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), AllocateHelper >::execHost( buffers, array_view );
+         Algorithms::TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), AllocateHelper >::execHost( buffers, array_view );
       }
       else {
          // only bind to the actual data
@@ -80,18 +80,18 @@ protected:
       #endif
 
       // fill send buffers
-      TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), CopyHelper >::execHost( buffers, array_view, true );
+      Algorithms::TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), CopyHelper >::execHost( buffers, array_view, true );
 
       // issue all send and receive async operations
       std::vector< typename Communicator::Request > requests;
       const typename Communicator::CommunicationGroup group = array_view.getCommunicationGroup();
-      TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), SendHelper >::execHost( buffers, requests, group );
+      Algorithms::TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), SendHelper >::execHost( buffers, requests, group );
 
       // wait until send is done
       Communicator::WaitAll( requests.data(), requests.size() );
 
       // copy data from receive buffers
-      TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), CopyHelper >::execHost( buffers, array_view, false );
+      Algorithms::TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), CopyHelper >::execHost( buffers, array_view, false );
    }
 
    template< std::size_t dim >
diff --git a/src/TNL/Containers/DistributedVector.h b/src/TNL/Containers/DistributedVector.h
index 51d7c537c534d3b5a82178ccc2096f9cea1601f2..db4e46e68616244399e85fa7903ebe53a1e30585 100644
--- a/src/TNL/Containers/DistributedVector.h
+++ b/src/TNL/Containers/DistributedVector.h
@@ -34,11 +34,19 @@ public:
    using IndexType = Index;
    using LocalViewType = Containers::VectorView< Real, Device, Index >;
    using ConstLocalViewType = Containers::VectorView< std::add_const_t< Real >, Device, Index >;
-   using HostType = DistributedVector< Real, Devices::Host, Index, Communicator >;
-   using CudaType = DistributedVector< Real, Devices::Cuda, Index, Communicator >;
    using ViewType = DistributedVectorView< Real, Device, Index, Communicator >;
    using ConstViewType = DistributedVectorView< std::add_const_t< Real >, Device, Index, Communicator >;
 
+   /**
+    * \brief A template which allows to quickly obtain a \ref Vector type with changed template parameters.
+    */
+   template< typename _Real,
+             typename _Device = Device,
+             typename _Index = Index,
+             typename _Communicator = Communicator >
+   using Self = DistributedVector< _Real, _Device, _Index, _Communicator >;
+
+
    // inherit all constructors and assignment operators from Array
    using BaseType::DistributedArray;
    using BaseType::operator=;
@@ -69,11 +77,6 @@ public:
    operator ConstViewType() const;
 
 
-   static String getType();
-
-   virtual String getTypeVirtual() const;
-
-
    /*
     * Usual Vector methods follow below.
     */
@@ -128,7 +131,7 @@ public:
    DistributedVector& operator/=( const Vector& vector );
 
    template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive >
-   void prefixSum( IndexType begin = 0, IndexType end = 0 );
+   void scan( IndexType begin = 0, IndexType end = 0 );
 };
 
 } // namespace Containers
diff --git a/src/TNL/Containers/DistributedVector.hpp b/src/TNL/Containers/DistributedVector.hpp
index dbf8b10b8e2fe7714d16d7dcf399564abd01044d..fa49591e8ae53ffd06214772491c656b91601413 100644
--- a/src/TNL/Containers/DistributedVector.hpp
+++ b/src/TNL/Containers/DistributedVector.hpp
@@ -13,7 +13,7 @@
 #pragma once
 
 #include "DistributedVector.h"
-#include <TNL/Containers/Algorithms/DistributedScan.h>
+#include <TNL/Algorithms/DistributedScan.h>
 
 namespace TNL {
 namespace Containers {
@@ -83,34 +83,6 @@ operator ConstViewType() const
 }
 
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename Communicator >
-String
-DistributedVector< Real, Device, Index, Communicator >::
-getType()
-{
-   return String( "Containers::DistributedVector< " ) +
-          TNL::getType< Real >() + ", " +
-          Device::getDeviceType() + ", " +
-          TNL::getType< Index >() + ", " +
-          // TODO: communicators don't have a getType method
-          "<Communicator> >";
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename Communicator >
-String
-DistributedVector< Real, Device, Index, Communicator >::
-getTypeVirtual() const
-{
-   return getType();
-}
-
-
 /*
  * Usual Vector methods follow below.
  */
@@ -301,7 +273,7 @@ template< typename Real,
    template< Algorithms::ScanType Type >
 void
 DistributedVector< Real, Device, Index, Communicator >::
-prefixSum( IndexType begin, IndexType end )
+scan( IndexType begin, IndexType end )
 {
    if( end == 0 )
       end = this->getSize();
diff --git a/src/TNL/Containers/DistributedVectorView.h b/src/TNL/Containers/DistributedVectorView.h
index 99764432de6488aa557f4922e4e3ad1c3dbcfe7e..70452c50d17b8a08004a3ad162005154eb228ba8 100644
--- a/src/TNL/Containers/DistributedVectorView.h
+++ b/src/TNL/Containers/DistributedVectorView.h
@@ -35,11 +35,19 @@ public:
    using IndexType = Index;
    using LocalViewType = Containers::VectorView< Real, Device, Index >;
    using ConstLocalViewType = Containers::VectorView< std::add_const_t< Real >, Device, Index >;
-   using HostType = DistributedVectorView< Real, Devices::Host, Index, Communicator >;
-   using CudaType = DistributedVectorView< Real, Devices::Cuda, Index, Communicator >;
    using ViewType = DistributedVectorView< Real, Device, Index, Communicator >;
    using ConstViewType = DistributedVectorView< std::add_const_t< Real >, Device, Index, Communicator >;
 
+   /**
+    * \brief A template which allows to quickly obtain a \ref VectorView type with changed template parameters.
+    */
+   template< typename _Real,
+             typename _Device = Device,
+             typename _Index = Index,
+             typename _Communicator = Communicator >
+   using Self = DistributedVectorView< _Real, _Device, _Index, _Communicator >;
+
+
    // inherit all constructors and assignment operators from ArrayView
    using BaseType::DistributedArrayView;
    using BaseType::operator=;
@@ -72,8 +80,6 @@ public:
    __cuda_callable__
    ConstViewType getConstView() const;
 
-   static String getType();
-
    /*
     * Usual Vector methods follow below.
     */
@@ -128,7 +134,7 @@ public:
    DistributedVectorView& operator/=( const Vector& vector );
 
    template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive >
-   void prefixSum( IndexType begin = 0, IndexType end = 0 );
+   void scan( IndexType begin = 0, IndexType end = 0 );
 };
 
 } // namespace Containers
diff --git a/src/TNL/Containers/DistributedVectorView.hpp b/src/TNL/Containers/DistributedVectorView.hpp
index 6a934d8c25d2a786257785787f55adcafba79804..70f61979fd44fb8d3f9d1878eb2c4a6ecd5c169b 100644
--- a/src/TNL/Containers/DistributedVectorView.hpp
+++ b/src/TNL/Containers/DistributedVectorView.hpp
@@ -13,7 +13,7 @@
 #pragma once
 
 #include "DistributedVectorView.h"
-#include <TNL/Containers/Algorithms/DistributedScan.h>
+#include <TNL/Algorithms/DistributedScan.h>
 
 namespace TNL {
 namespace Containers {
@@ -64,22 +64,6 @@ getConstView() const
    return *this;
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename Communicator >
-String
-DistributedVectorView< Real, Device, Index, Communicator >::
-getType()
-{
-   return String( "Containers::DistributedVectorView< " ) +
-          TNL::getType< Real >() + ", " +
-          Device::getDeviceType() + ", " +
-          TNL::getType< Index >() + ", " +
-          // TODO: communicators don't have a getType method
-          "<Communicator> >";
-}
-
 
 /*
  * Usual Vector methods follow below.
@@ -277,7 +261,7 @@ template< typename Real,
    template< Algorithms::ScanType Type >
 void
 DistributedVectorView< Real, Device, Index, Communicator >::
-prefixSum( IndexType begin, IndexType end )
+scan( IndexType begin, IndexType end )
 {
    if( end == 0 )
       end = this->getSize();
diff --git a/src/TNL/Containers/Expressions/Comparison.h b/src/TNL/Containers/Expressions/Comparison.h
index 616ad5807864a1b1f2cd7b5af765132021c99a24..98e39ad8c561240f2ac0522844dd362059ebf710 100644
--- a/src/TNL/Containers/Expressions/Comparison.h
+++ b/src/TNL/Containers/Expressions/Comparison.h
@@ -14,8 +14,8 @@
 
 #include <TNL/Assert.h>
 #include <TNL/Containers/Expressions/ExpressionVariableType.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
+#include <TNL/Algorithms/Reduction.h>
+#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
 
 namespace TNL {
 namespace Containers {
@@ -45,7 +45,7 @@ struct VectorComparison< T1, T2, true >
          return false;
       if( a.getSize() == 0 )
          return true;
-      return Algorithms::ArrayOperations< typename T1::DeviceType, typename T2::DeviceType >::compare( a.getData(), b.getData(), a.getSize() );
+      return Algorithms::MultiDeviceMemoryOperations< typename T1::DeviceType, typename T2::DeviceType >::compare( a.getData(), b.getData(), a.getSize() );
    }
 };
 
diff --git a/src/TNL/Containers/Expressions/DistributedComparison.h b/src/TNL/Containers/Expressions/DistributedComparison.h
index 7a7d5c5bebf24fca09a2f1837561483d27f8f9b8..b5e0e96a9d756f55edb7bf15652ad1d0d34bc5d6 100644
--- a/src/TNL/Containers/Expressions/DistributedComparison.h
+++ b/src/TNL/Containers/Expressions/DistributedComparison.h
@@ -11,7 +11,6 @@
 #pragma once
 
 #include <TNL/Containers/Expressions/ExpressionVariableType.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
 #include <TNL/Communicators/MpiDefs.h>
 
 namespace TNL {
diff --git a/src/TNL/Containers/Expressions/DistributedExpressionTemplates.h b/src/TNL/Containers/Expressions/DistributedExpressionTemplates.h
index fe8997aac8a244dfbafd14c3b705fc31abb6b6f7..355689039884e8e05caff040e232ffbcf26119ba 100644
--- a/src/TNL/Containers/Expressions/DistributedExpressionTemplates.h
+++ b/src/TNL/Containers/Expressions/DistributedExpressionTemplates.h
@@ -2207,7 +2207,7 @@ Result evaluateAndReduce( Vector& lhs,
 
    RealType* lhs_data = lhs.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return ( lhs_data[ i ] = expression[ i ] ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 template< typename Vector,
@@ -2226,7 +2226,7 @@ Result evaluateAndReduce( Vector& lhs,
 
    RealType* lhs_data = lhs.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return ( lhs_data[ i ] = expression[ i ] ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 ////
@@ -2252,7 +2252,7 @@ Result addAndReduce( Vector& lhs,
       lhs_data[ i ] += aux;
       return aux;
    };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 template< typename Vector,
@@ -2275,7 +2275,7 @@ Result addAndReduce( Vector& lhs,
       lhs_data[ i ] += aux;
       return aux;
    };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 ////
@@ -2301,7 +2301,7 @@ Result addAndReduceAbs( Vector& lhs,
       lhs_data[ i ] += aux;
       return TNL::abs( aux );
    };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 template< typename Vector,
@@ -2324,7 +2324,7 @@ Result addAndReduceAbs( Vector& lhs,
       lhs_data[ i ] += aux;
       return TNL::abs( aux );
    };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 } // namespace TNL
diff --git a/src/TNL/Containers/Expressions/ExpressionTemplates.h b/src/TNL/Containers/Expressions/ExpressionTemplates.h
index 763bdbfd1e85545c5ddf420fc6725066c245b7de..a0980baf6cf8ee751444df7d27205dfa9260f593 100644
--- a/src/TNL/Containers/Expressions/ExpressionTemplates.h
+++ b/src/TNL/Containers/Expressions/ExpressionTemplates.h
@@ -2130,7 +2130,7 @@ Result evaluateAndReduce( Vector& lhs,
 
    RealType* lhs_data = lhs.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return ( lhs_data[ i ] = expression[ i ] ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 template< typename Vector,
@@ -2149,7 +2149,7 @@ Result evaluateAndReduce( Vector& lhs,
 
    RealType* lhs_data = lhs.getData();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return ( lhs_data[ i ] = expression[ i ] ); };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 ////
@@ -2175,7 +2175,7 @@ Result addAndReduce( Vector& lhs,
       lhs_data[ i ] += aux;
       return aux;
    };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 template< typename Vector,
@@ -2198,7 +2198,7 @@ Result addAndReduce( Vector& lhs,
       lhs_data[ i ] += aux;
       return aux;
    };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 ////
@@ -2224,7 +2224,7 @@ Result addAndReduceAbs( Vector& lhs,
       lhs_data[ i ] += aux;
       return TNL::abs( aux );
    };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 template< typename Vector,
@@ -2247,7 +2247,7 @@ Result addAndReduceAbs( Vector& lhs,
       lhs_data[ i ] += aux;
       return TNL::abs( aux );
    };
-   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
+   return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero );
 }
 
 } // namespace TNL
diff --git a/src/TNL/Containers/Expressions/VerticalOperations.h b/src/TNL/Containers/Expressions/VerticalOperations.h
index 29e904bbfafb5c338a523227bb9e226d4fda9970..84d362e8aab01b704cdefac44ae0d7e0e6a7150d 100644
--- a/src/TNL/Containers/Expressions/VerticalOperations.h
+++ b/src/TNL/Containers/Expressions/VerticalOperations.h
@@ -13,7 +13,7 @@
 #include <limits>
 #include <type_traits>
 
-#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Algorithms/Reduction.h>
 
 ////
 // By vertical operations we mean those applied across vector elements or
diff --git a/src/TNL/Containers/List.h b/src/TNL/Containers/List.h
deleted file mode 100644
index 0cf6f762dbfce6057af4132659064fc889c91082..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/List.h
+++ /dev/null
@@ -1,229 +0,0 @@
-/***************************************************************************
-                          List.h  -  description
-                             -------------------
-    begin                : Sat, 10 Apr 2004 15:58:51 +0100
-    copyright            : (C) 2004 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <iostream>
-
-#include <TNL/Assert.h>
-#include <TNL/File.h>
-#include <TNL/String.h>
-#include <TNL/param-types.h>
-
-namespace TNL {
-namespace Containers {
-
-template< class T > class ListDataElement;
-
-/// \brief Template for double linked lists
-/*! To acces elements in the list one can use method getSize() and
-    operator[](). To add elements there are methods Append(),
-    Prepend() and Insert() to insert an element at given
-    position. To erase particular element there is method
-    Erase() taking the element position. To erase all elements
-    there is method reset(). There are also alternatives DeepErase()
-    and DeepEraseAll() to free dynamicaly allocated data inside the
-    data elements.
-    The list stores pointer to last accesed element so if one goes
-    seqeuntialy through the list there is no inefficiency. The
-    accesing algorithm is also able to deside whether to start from
-    the last accesed position or from the begining resp. from the end
-    of the list. So with common use one does not need to worry about
-    efficiency :-)
- */
-template< class T > class List
-{
-   public:
-      typedef T ValueType;
-
-      /// \brief Basic constructor.
-      ///
-      /// Constructs an empty list.
-      List();
-
-      /// \brief Copy constructor.
-      ///
-      /// Construct a copy of \e list.
-      /// \param list Name of another list.
-      List( const List& list );
-
-      /// \brief Destructor.
-      ///
-      /// Destroys the list. References to the values in the list become invalid.
-      ~List();
-
-      /// Returns the type of list.
-      static String getType();
-
-      /// Returns \e true if the list contains no items, otherwise returns \e false.
-      bool isEmpty() const;
-
-      /// Returns number of items in the list.
-      int getSize() const;
-
-      /// Indexing operator.
-      T& operator[] ( const int& ind );
-
-      /// Indexing operator for constant instances.
-      const T& operator[] ( const int& ind ) const;
-
-      const List& operator = ( const List& lst );
-
-      bool operator == ( const List& lst ) const;
-
-      bool operator != ( const List& lst ) const;
-
-      /// \brief Appends new data element.
-      ///
-      /// Inserts \e data at the end of the list.
-      bool Append( const T& data );
-
-      /// \brief Prepends new data element.
-      ///
-      /// Inserts \e data at the beginning of the list.
-      bool Prepend( const T& data );
-
-      /// \brief Inserts new data element at given position.
-      ///
-      /// Inserts \e data at index position \e ind in the list.
-      bool Insert( const T& data, const int& ind );
-
-      /// Appends copy of another list.
-      ///
-      /// \param lst Name of another list.
-      bool AppendList( const List< T >& lst );
-
-      /// Prepends copy of another list.
-      ///
-      /// \param lst Name of another list.
-      bool PrependList( const List< T >& lst );
-
-      /// Transforms list to an \e array.
-      template< typename Array >
-      void toArray( Array& array );
-
-      /***
-       * \brief Checks if there is an element with value \e v in given array.
-       *
-       * \param v Reference to a value.
-       */
-      bool containsValue( const T& v ) const;
-
-      /// Erases data element at given position.
-      ///
-      /// \param ind Index of the data element one chooses to remove.
-      void Erase( const int& ind );
-
-      /// Erases data element with contained data at given position.
-      ///
-      /// \param ind Index of the data element one chooses to remove.
-      void DeepErase( const int& ind );
-
-      /// Erases all data elements.
-      void reset();
-
-      /// \brief Erases all data elements with contained data.
-      ///
-      /// Frees dynamicaly allocated data inside the data elements
-      void DeepEraseAll();
-
-      /// Saves the list in binary format.
-      ///
-      /// \param file Name of file.
-      bool Save( File& file ) const;
-
-      /// Saves the list in binary format using method save of type T.
-      ///
-      /// \param file Name of file.
-      bool DeepSave( File& file ) const;
-
-      /// Loads the list from file.
-      ///
-      /// \param file Name of file.
-      bool Load( File& file );
-
-      /// Loads the list from file using method Load of the type T.
-      ///
-      /// \param file Name of file.
-      bool DeepLoad( File& file );
-
-   protected:
-      /// Pointer to the first element.
-      ListDataElement< T >* first;
-
-      /// Pointer to the last element.
-      /*! We use pointer to last element while adding new element to keep order of elements
-       */
-      ListDataElement< T >* last;
-
-      /// List size.
-      int size;
-
-      /// Iterator.
-      mutable ListDataElement< T >* iterator;
-
-      /// Iterator index.
-      mutable int index;
-};
-
-template< typename T > std::ostream& operator << ( std::ostream& str, const List< T >& list );
-
-//! Data element for List and mStack
-template< class T > class ListDataElement
-{
-   //! Main data
-   T data;
-
-   //! Pointer to the next element
-   ListDataElement< T >* next;
-
-   //! Pointer to the previous element
-   ListDataElement< T >* previous;
-
-   public:
-   //! Basic constructor
-   ListDataElement()
-      : next( 0 ),
-        previous( 0 ){};
-
-   //! Constructor with given data and possibly pointer to next element
-   ListDataElement( const T& dt,
-                    ListDataElement< T >* prv = 0,
-                    ListDataElement< T >* nxt = 0 )
-      : data( dt ),
-        next( nxt ),
-        previous( prv ){};
-
-   //! Destructor
-   ~ListDataElement(){};
-
-   //! Return data for non-const instances
-   T& Data() { return data; };
-
-   //! Return data for const instances
-   const T& Data() const { return data; };
-
-   //! Return pointer to the next element for non-const instances
-   ListDataElement< T >*& Next() { return next; };
-
-   //! Return pointer to the next element for const instances
-   const ListDataElement< T >* Next() const { return next; };
-
-   //! Return pointer to the previous element for non-const instances
-   ListDataElement< T >*& Previous() { return previous; };
-
-   //! Return pointer to the previous element for const instances
-   const ListDataElement< T >* Previous() const { return previous; };
-};
-
-} // namespace Containers
-} // namespace TNL
-
-#include <TNL/Containers/List_impl.h>
diff --git a/src/TNL/Containers/List_impl.h b/src/TNL/Containers/List_impl.h
deleted file mode 100644
index a8bcb81158ad187b3a3573e2a4c34c758f64640f..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/List_impl.h
+++ /dev/null
@@ -1,352 +0,0 @@
-/***************************************************************************
-                          List_impl.h  -  description
-                             -------------------
-    begin                : Mar, 5 Apr 2016 12:46 PM
-    copyright            : (C) 2016 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <TNL/Containers/List.h>
-#include <TNL/Math.h>
-
-namespace TNL {
-namespace Containers {
-
-template< typename T >
-List< T >::List()
-   : first( 0 ),  last( 0 ), size( 0 ), iterator( 0 ), index( 0 )
-{
-}
-
-template< typename T >
-List< T >::List( const List& list )
-   : first( 0 ), last( 0 ), size( 0 ), iterator( 0 ), index( 0 )
-{
-   AppendList( list );
-}
-
-template< typename T >
-List< T >::~List()
-{
-   reset();
-}
-
-template< typename T >
-String List< T >::getType()
-{
-   return String( "Containers::List< " ) + TNL::getType< T >() +  String( " >" );
-}
-
-template< typename T >
-bool List< T >::isEmpty() const
-{
-   return ! size;
-}
- 
-template< typename T >
-int List< T >::getSize() const
-{
-   return size;
-}
-
-template< typename T >
-T& List< T >::operator[]( const int& ind )
-{
-   TNL_ASSERT( ind < size, );
-   int iter_dist = TNL::abs( index - ind );
-   if( ! iterator ||
-       iter_dist > ind ||
-       iter_dist > size - ind )
-   {
-      if( ind < size - ind )
-      {
-         //cout << "Setting curent index to 0." << std::endl;
-         index = 0;
-         iterator = first;
-      }
-      else
-      {
-         //cout << "Setting curent index to size - 1." << std::endl;
-         index = size - 1;
-         iterator = last;
-      }
-   }
-   while( index != ind )
-   {
-      //cout << " current index = " << index
-      //     << " index = " << ind << std::endl;
-      if( ind < index )
-      {
-         iterator = iterator -> Previous();
-         index --;
-      }
-      else
-      {
-         iterator = iterator -> Next();
-         index ++;
-      }
-      TNL_ASSERT( iterator, );
-   }
-   return iterator -> Data();
-};
- 
-template< typename T >
-const T& List< T >::operator[]( const int& ind ) const
-{
-   return const_cast< List< T >* >( this ) -> operator[]( ind );
-}
-
-template< typename T >
-const List< T >& List< T >::operator = ( const List& lst )
-{
-   AppendList( lst );
-   return( *this );
-}
-
-template< typename T >
-bool List< T >::operator == ( const List& lst ) const
-{
-   if( this->getSize() != lst.getSize() )
-      return false;
-   for( int i = 0; i < this->getSize(); i++ )
-      if( (*this)[ i ] != lst[ i ] )
-         return false;
-   return true;
-}
-
-template< typename T >
-bool List< T >::operator != ( const List& lst ) const
-{
-   return ! operator==( lst );
-}
-
-template< typename T >
-bool List< T >::Append( const T& data )
-{
-   if( ! first )
-   {
-      TNL_ASSERT( ! last, );
-      first = last = new ListDataElement< T >( data );
-   }
-   else
-   {
-      ListDataElement< T >* new_element =  new ListDataElement< T >( data, last, 0 );
-      TNL_ASSERT( last, );
-      last = last -> Next() = new_element;
-   }
-   size ++;
-   return true;
-};
-
-template< typename T >
-bool List< T >::Prepend( const T& data )
-{
-   if( ! first )
-   {
-      TNL_ASSERT( ! last, );
-      first = last = new ListDataElement< T >( data );
-   }
-   else
-   {
-      ListDataElement< T >* new_element =  new ListDataElement< T >( data, 0, first );
-      first = first -> Previous() = new_element;
-   }
-   size ++;
-   index ++;
-   return true;
-};
-
-template< typename T >
-bool List< T >::Insert( const T& data, const int& ind )
-{
-   TNL_ASSERT( ind <= size || ! size, );
-   if( ind == 0 ) return Prepend( data );
-   if( ind == size ) return Append( data );
-   operator[]( ind );
-   ListDataElement< T >* new_el =
-      new ListDataElement< T >( data,
-                             iterator -> Previous(),
-                             iterator );
-   iterator -> Previous() -> Next() = new_el;
-   iterator -> Previous() = new_el;
-   iterator = new_el;
-   size ++;
-   return true;
-};
-
-template< typename T >
-bool List< T >::AppendList( const List< T >& lst )
-{
-   int i;
-   for( i = 0; i < lst. getSize(); i ++ )
-   {
-      if( ! Append( lst[ i ] ) ) return false;
-   }
-   return true;
-};
- 
-template< typename T >
-bool List< T >::PrependList( const List< T >& lst )
-
-{
-   int i;
-   for( i = lst. getSize(); i > 0; i -- )
-      if( ! Prepend( lst[ i - 1 ] ) ) return false;
-   return true;
-};
-
-template< typename T >
-   template< typename Array >
-void List< T >::toArray( Array& array )
-{
-   array.setSize( this->getSize() );
-   for( int i = 0; i < this->getSize(); i++ )
-      array[ i ] = ( *this )[ i ];
-}
-template< typename T >
-bool List< T >::containsValue( const T& v ) const
-{
-   for( int i = 0; i < this->getSize(); i++ )
-      if( ( *this )[ i ] == v )
-         return true;
-   return false;
-}
-
-template< typename T >
-void List< T >::Erase( const int& ind )
-{
-   operator[]( ind );
-   ListDataElement< T >* tmp_it = iterator;
-   if( iterator -> Next() )
-      iterator -> Next() -> Previous() = iterator -> Previous();
-   if( iterator -> Previous() )
-     iterator -> Previous() -> Next() = iterator -> Next();
-   if( iterator -> Next() ) iterator = iterator -> Next();
-   else
-   {
-      iterator = iterator -> Previous();
-      index --;
-   }
-   if( first == tmp_it ) first = iterator;
-   if( last == tmp_it ) last = iterator;
-   delete tmp_it;
-   size --;
-};
-
-template< typename T >
-void List< T >::DeepErase( const int& ind )
-{
-   operator[]( ind );
-   delete iterator -> Data();
-   Erase( ind );
-};
-
-template< typename T >
-void List< T >::reset()
-{
-   iterator = first;
-   ListDataElement< T >* tmp_it;
-   while( iterator )
-   {
-      TNL_ASSERT( iterator, );
-      tmp_it = iterator;
-      iterator = iterator -> Next();
-      delete tmp_it;
-   }
-   first = last = 0;
-   size = 0;
-};
-
-template< typename T >
-void List< T >::DeepEraseAll()
-{
-   iterator = first;
-   ListDataElement< T >* tmp_it;
-   int i( 0 );
-   while( iterator )
-   {
-      tmp_it = iterator;
-      iterator = iterator -> Next();
-      delete tmp_it -> Data();
-      delete tmp_it;
-      i++;
-   }
-   first = last = 0;
-   size = 0;
-};
- 
-template< typename T >
-bool List< T >::Save( File& file ) const
-{
-   file.save( &size );
-   for( int i = 0; i < size; i ++ )
-      if( ! file. save( &operator[]( i ), 1 ) )
-         return false;
-   return true;
-}
-
-template< typename T >
-bool List< T >::DeepSave( File& file ) const
-{
-   file.save( &size );
-   for( int i = 0; i < size; i ++ )
-      if( ! operator[]( i ). save( file ) ) return false;
-   return true;
-}
-
-template< typename T >
-bool List< T >::Load( File& file )
-{
-   reset();
-   int _size;
-   file.load( &_size, 1 );
-   if( _size < 0 )
-   {
-      std::cerr << "The curve size is negative." << std::endl;
-      return false;
-   }
-   T t;
-   for( int i = 0; i < _size; i ++ )
-   {
-      if( ! file.load( &t, 1 ) )
-         return false;
-      Append( t );
-   }
-   return true;
-};
-
-template< typename T >
-bool List< T >::DeepLoad( File& file )
-{
-   reset();
-   int _size;
-   file.load( &_size );
-   if( _size < 0 )
-   {
-      std::cerr << "The list size is negative." << std::endl;
-      return false;
-   }
-   for( int i = 0; i < _size; i ++ )
-   {
-      T t;
-      if( ! t. load( file ) ) return false;
-      Append( t );
-   }
-   return true;
-};
- 
-template< typename T >
-std::ostream& operator << ( std::ostream& str, const List< T >& list )
-{
-   int i, size( list. getSize() );
-   for( i = 0; i < size; i ++ )
-      str << "Item " << i << ":" << list[ i ] << std::endl;
-   return str;
-};
-
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Multimaps/EllpackIndexMultimap.h b/src/TNL/Containers/Multimaps/EllpackIndexMultimap.h
index 76e61846afebe3e5dab4706556550e95db667db3..5e575cc21ce8292ba3f9d3d4c8ed4b189b056936 100644
--- a/src/TNL/Containers/Multimaps/EllpackIndexMultimap.h
+++ b/src/TNL/Containers/Multimaps/EllpackIndexMultimap.h
@@ -52,10 +52,6 @@ class EllpackIndexMultimap
       template< typename Device_ >
       EllpackIndexMultimap& operator=( const EllpackIndexMultimap< Index, Device_, LocalIndex, SliceSize >& other );
 
-      static String getType();
-
-      String getTypeVirtual() const;
-
       void setKeysRange( const IndexType& keysRange );
 
       __cuda_callable__
diff --git a/src/TNL/Containers/Multimaps/EllpackIndexMultimap.hpp b/src/TNL/Containers/Multimaps/EllpackIndexMultimap.hpp
index 8aaba006ac3ce7e9a71e333a8185031ff8de8c82..6fb1f4b26d5a4c3b8447e1156c89641118be3c32 100644
--- a/src/TNL/Containers/Multimaps/EllpackIndexMultimap.hpp
+++ b/src/TNL/Containers/Multimaps/EllpackIndexMultimap.hpp
@@ -43,34 +43,6 @@ operator=( const EllpackIndexMultimap< Index, Device_, LocalIndex, SliceSize >&
    return *this;
 }
 
-template< typename Index,
-          typename Device,
-          typename LocalIndex,
-          int SliceSize >
-String
-EllpackIndexMultimap< Index, Device, LocalIndex, SliceSize >::
-getType()
-{
-   return String( "EllpackIndexMultimap< ") +
-          String( TNL::getType< Index >() ) +
-          String( ", " ) +
-          Device :: getDeviceType() +
-          String( ", " ) +
-          String( TNL::getType< LocalIndexType >() ) +
-          String( " >" );
-}
-
-template< typename Index,
-          typename Device,
-          typename LocalIndex,
-          int SliceSize >
-String
-EllpackIndexMultimap< Index, Device, LocalIndex, SliceSize >::
-getTypeVirtual() const
-{
-   return this->getType();
-}
-
 template< typename Index,
           typename Device,
           typename LocalIndex,
diff --git a/src/TNL/Containers/Multimaps/EllpackIndexMultimapValues.h b/src/TNL/Containers/Multimaps/EllpackIndexMultimapValues.h
index fe7a0fb380230909be094042f69cf3ddabd24522..9be47980d1dbef78af8891ff50837d70fb851c22 100644
--- a/src/TNL/Containers/Multimaps/EllpackIndexMultimapValues.h
+++ b/src/TNL/Containers/Multimaps/EllpackIndexMultimapValues.h
@@ -13,7 +13,7 @@
 #include <type_traits>
 #include <ostream>
 
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/CudaCallable.h>
 
 namespace TNL {
 namespace Containers {
diff --git a/src/TNL/Containers/Multimaps/MultimapPermutationApplier.h b/src/TNL/Containers/Multimaps/MultimapPermutationApplier.h
index 2acd3c5d480aeb479fed6c2ab781e1d3c9cf68d2..9533393059255bc7151e803e18cec2f1829ea4b7 100644
--- a/src/TNL/Containers/Multimaps/MultimapPermutationApplier.h
+++ b/src/TNL/Containers/Multimaps/MultimapPermutationApplier.h
@@ -11,7 +11,7 @@
 #pragma once
 
 #include <TNL/Pointers/DevicePointer.h>
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 namespace TNL {
 namespace Containers {
@@ -48,11 +48,11 @@ void permuteMultimapKeys( Multimap& multimap, const PermutationVector& perm )
    Pointers::DevicePointer< Multimap > multimapPointer( multimap );
    Pointers::DevicePointer< Multimap > multimapCopyPointer( multimapCopy );
 
-   ParallelFor< DeviceType >::exec( (IndexType) 0, multimap.getKeysRange(),
-                                    kernel,
-                                    &multimapPointer.template getData< DeviceType >(),
-                                    &multimapCopyPointer.template modifyData< DeviceType >(),
-                                    perm.getData() );
+   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, multimap.getKeysRange(),
+                                                kernel,
+                                                &multimapPointer.template getData< DeviceType >(),
+                                                &multimapCopyPointer.template modifyData< DeviceType >(),
+                                                perm.getData() );
 
    // copy the permuted data back into the multimap
    multimap = multimapCopy;
@@ -79,10 +79,10 @@ void permuteMultimapValues( Multimap& multimap, const PermutationVector& iperm )
    };
 
    Pointers::DevicePointer< Multimap > multimapPointer( multimap );
-   ParallelFor< DeviceType >::exec( (IndexType) 0, multimap.getKeysRange(),
-                                    kernel,
-                                    &multimapPointer.template modifyData< DeviceType >(),
-                                    iperm.getData() );
+   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, multimap.getKeysRange(),
+                                                kernel,
+                                                &multimapPointer.template modifyData< DeviceType >(),
+                                                iperm.getData() );
 }
 
 } // namespace Multimaps
diff --git a/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.h b/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.h
index ba9994da9bb933fa7da825b550cca73bdc2e7498..f816cabd2c37626b978a03dbdf6a6ed63076036d 100644
--- a/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.h
+++ b/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.h
@@ -52,10 +52,6 @@ class StaticEllpackIndexMultimap
       template< typename Device_ >
       StaticEllpackIndexMultimap& operator=( const StaticEllpackIndexMultimap< ValuesCount, Index, Device_, LocalIndex, SliceSize >& other );
 
-      static String getType();
-
-      String getTypeVirtual() const;
-
       void setKeysRange( const IndexType& keysRange );
 
       __cuda_callable__
diff --git a/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.hpp b/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.hpp
index c8dcd637eeb05fe3d763606a40c16c2e7dd6833e..3e03f9e03a5c22cad10e242b93dff20151a37598 100644
--- a/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.hpp
+++ b/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.hpp
@@ -43,36 +43,6 @@ operator=( const StaticEllpackIndexMultimap< ValuesCount, Index, Device_, LocalI
    return *this;
 }
 
-template< int ValuesCount,
-          typename Index,
-          typename Device,
-          typename LocalIndex,
-          int SliceSize >
-String
-StaticEllpackIndexMultimap< ValuesCount, Index, Device, LocalIndex, SliceSize >::
-getType()
-{
-   return String( "StaticEllpackIndexMultimap< ") +
-          String( TNL::getType< Index >() ) +
-          String( ", " ) +
-          Device :: getDeviceType() +
-          String( ", " ) +
-          String( TNL::getType< LocalIndexType >() ) +
-          String( " >" );
-}
-
-template< int ValuesCount,
-          typename Index,
-          typename Device,
-          typename LocalIndex,
-          int SliceSize >
-String
-StaticEllpackIndexMultimap< ValuesCount, Index, Device, LocalIndex, SliceSize >::
-getTypeVirtual() const
-{
-   return this->getType();
-}
-
 template< int ValuesCount,
           typename Index,
           typename Device,
diff --git a/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimapValues.h b/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimapValues.h
index 95ffade9fcad3674a7092bc69701e2a2500ab819..efae4f05173b9f0531cc12e96dd5644a5c72fefe 100644
--- a/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimapValues.h
+++ b/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimapValues.h
@@ -13,7 +13,7 @@
 #include <type_traits>
 #include <ostream>
 
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/CudaCallable.h>
 
 namespace TNL {
 namespace Containers {
diff --git a/src/TNL/Containers/NDArray.h b/src/TNL/Containers/NDArray.h
index 8472f4d7151b9896a20c3f20af5d302286969022..3cbc8a7bc1c484cecbb847db248525a42b756ae5 100644
--- a/src/TNL/Containers/NDArray.h
+++ b/src/TNL/Containers/NDArray.h
@@ -352,13 +352,13 @@ class StaticNDArray
                          SizesHolder,
                          Permutation,
                          __ndarray_impl::NDArrayBase< SliceInfo< 0, 0 > >,
-                         void >
+                         Devices::Sequential >
 {
    using Base = NDArrayStorage< StaticArray< __ndarray_impl::StaticStorageSizeGetter< SizesHolder >::get(), Value >,
                          SizesHolder,
                          Permutation,
                          __ndarray_impl::NDArrayBase< SliceInfo< 0, 0 > >,
-                         void >;
+                         Devices::Sequential >;
    static_assert( __ndarray_impl::StaticStorageSizeGetter< SizesHolder >::get() > 0,
                   "All dimensions of a static array must to be positive." );
 
diff --git a/src/TNL/Containers/NDArrayView.h b/src/TNL/Containers/NDArrayView.h
index 3e37de372521cddb69db001ec05f6b238d644e15..d5d94d61eeee144491cd2d359a0bea16825e9b55 100644
--- a/src/TNL/Containers/NDArrayView.h
+++ b/src/TNL/Containers/NDArrayView.h
@@ -18,7 +18,8 @@
 #include <TNL/Containers/ndarray/Executors.h>
 #include <TNL/Containers/ndarray/BoundaryExecutors.h>
 #include <TNL/Containers/ndarray/Operations.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
+#include <TNL/Algorithms/MemoryOperations.h>
+#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
 
 namespace TNL {
 namespace Containers {
@@ -75,7 +76,7 @@ public:
    {
       TNL_ASSERT_EQ( getSizes(), other.getSizes(), "The sizes of the array views must be equal, views are not resizable." );
       if( getStorageSize() > 0 )
-         Algorithms::ArrayOperations< DeviceType >::copy( array, other.array, getStorageSize() );
+         Algorithms::MemoryOperations< DeviceType >::copy( array, other.array, getStorageSize() );
       return *this;
    }
 
@@ -93,7 +94,7 @@ public:
                        "The sizes of the array views must be equal, views are not resizable." );
       if( getStorageSize() > 0 ) {
          TNL_ASSERT_TRUE( array, "Attempted to assign to an empty view." );
-         Algorithms::ArrayOperations< DeviceType, typename OtherView::DeviceType >::copy( array, other.getData(), getStorageSize() );
+         Algorithms::MultiDeviceMemoryOperations< DeviceType, typename OtherView::DeviceType >::copy( array, other.getData(), getStorageSize() );
       }
       return *this;
    }
@@ -138,7 +139,7 @@ public:
       if( getSizes() != other.getSizes() )
          return false;
       // FIXME: uninitialized data due to alignment in NDArray and padding in SlicedNDArray
-      return Algorithms::ArrayOperations< Device, Device >::compare( array, other.array, getStorageSize() );
+      return Algorithms::MemoryOperations< Device >::compare( array, other.array, getStorageSize() );
    }
 
    TNL_NVCC_HD_WARNING_DISABLE
@@ -148,7 +149,7 @@ public:
       if( getSizes() != other.getSizes() )
          return true;
       // FIXME: uninitialized data due to alignment in NDArray and padding in SlicedNDArray
-      return ! Algorithms::ArrayOperations< Device, Device >::compare( array, other.array, getStorageSize() );
+      return ! Algorithms::MemoryOperations< Device >::compare( array, other.array, getStorageSize() );
    }
 
    __cuda_callable__
diff --git a/src/TNL/Containers/StaticArray.h b/src/TNL/Containers/StaticArray.h
index 2421305a7df26e1949a70d905092ddf6ab26edaa..51ee055066fab43d3eaca7a53e5a1bc1bee2abb0 100644
--- a/src/TNL/Containers/StaticArray.h
+++ b/src/TNL/Containers/StaticArray.h
@@ -84,6 +84,7 @@ public:
     * 
     * @param elems input initializer list
     */
+   __cuda_callable__
    StaticArray( const std::initializer_list< Value > &elems );
 
    /**
@@ -105,10 +106,6 @@ public:
    __cuda_callable__
    StaticArray( const Value& v1, const Value& v2, const Value& v3 );
 
-   /**
-    * \brief Gets type of this array.
-    */
-   static String getType();
 
    /**
     * \brief Gets pointer to data of this static array.
diff --git a/src/TNL/Containers/StaticArray.hpp b/src/TNL/Containers/StaticArray.hpp
index 89a66ecc91af572edd3307be9993e2dc330dddc5..c1ac8e62a7a2e8257d0824c36d7f4caaa443e661 100644
--- a/src/TNL/Containers/StaticArray.hpp
+++ b/src/TNL/Containers/StaticArray.hpp
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          StaticArray_impl.h  -  description
+                          StaticArray.hpp  -  description
                              -------------------
     begin                : Feb 10, 2014
     copyright            : (C) 2014 by Tomas Oberhuber
@@ -10,11 +10,11 @@
 
 #pragma once
 
-#include <TNL/param-types.h>
+#include <TNL/TypeInfo.h>
 #include <TNL/Math.h>
 #include <TNL/Containers/StaticArray.h>
-#include <TNL/Containers/Algorithms/StaticArrayAssignment.h>
-#include <TNL/StaticFor.h>
+#include <TNL/Containers/detail/StaticArrayAssignment.h>
+#include <TNL/Algorithms/StaticFor.h>
 
 namespace TNL {
 namespace Containers {
@@ -102,24 +102,25 @@ template< int Size, typename Value >
 __cuda_callable__
 StaticArray< Size, Value >::StaticArray( const Value v[ Size ] )
 {
-   StaticFor< 0, Size >::exec( Algorithms::detail::AssignArrayFunctor{}, data, v );
+   Algorithms::StaticFor< 0, Size >::exec( detail::AssignArrayFunctor{}, data, v );
 }
 
 template< int Size, typename Value >
 __cuda_callable__
 StaticArray< Size, Value >::StaticArray( const Value& v )
 {
-   StaticFor< 0, Size >::exec( Algorithms::detail::AssignValueFunctor{}, data, v );
+   Algorithms::StaticFor< 0, Size >::exec( detail::AssignValueFunctor{}, data, v );
 }
 
 template< int Size, typename Value >
 __cuda_callable__
 StaticArray< Size, Value >::StaticArray( const StaticArray< Size, Value >& v )
 {
-   StaticFor< 0, Size >::exec( Algorithms::detail::AssignArrayFunctor{}, data, v.getData() );
+   Algorithms::StaticFor< 0, Size >::exec( detail::AssignArrayFunctor{}, data, v.getData() );
 }
 
 template< int Size, typename Value >
+__cuda_callable__
 StaticArray< Size, Value >::StaticArray( const std::initializer_list< Value > &elems)
 {
    auto it = elems.begin();
@@ -146,16 +147,6 @@ StaticArray< Size, Value >::StaticArray( const Value& v1, const Value& v2, const
    data[ 2 ] = v3;
 }
 
-template< int Size, typename Value >
-String StaticArray< Size, Value >::getType()
-{
-   return String( "Containers::StaticArray< " ) +
-          convertToString( Size ) +
-          String( ", " ) +
-          TNL::getType< Value >() +
-          String( " >" );
-}
-
 template< int Size, typename Value >
 __cuda_callable__
 Value* StaticArray< Size, Value >::getData()
@@ -237,7 +228,7 @@ template< int Size, typename Value >
 __cuda_callable__
 StaticArray< Size, Value >& StaticArray< Size, Value >::operator=( const StaticArray< Size, Value >& array )
 {
-   StaticFor< 0, Size >::exec( Algorithms::detail::AssignArrayFunctor{}, data, array.getData() );
+   Algorithms::StaticFor< 0, Size >::exec( detail::AssignArrayFunctor{}, data, array.getData() );
    return *this;
 }
 
@@ -246,7 +237,7 @@ template< int Size, typename Value >
 __cuda_callable__
 StaticArray< Size, Value >& StaticArray< Size, Value >::operator=( const T& v )
 {
-   Algorithms::StaticArrayAssignment< StaticArray, T >::assign( *this, v );
+   detail::StaticArrayAssignment< StaticArray, T >::assign( *this, v );
    return *this;
 }
 
@@ -273,7 +264,7 @@ StaticArray< Size, Value >::
 operator StaticArray< Size, OtherValue >() const
 {
    StaticArray< Size, OtherValue > aux;
-   StaticFor< 0, Size >::exec( Algorithms::detail::AssignArrayFunctor{}, aux.getData(), data );
+   Algorithms::StaticFor< 0, Size >::exec( detail::AssignArrayFunctor{}, aux.getData(), data );
    return aux;
 }
 
@@ -281,20 +272,20 @@ template< int Size, typename Value >
 __cuda_callable__
 void StaticArray< Size, Value >::setValue( const ValueType& val )
 {
-   StaticFor< 0, Size >::exec( Algorithms::detail::AssignValueFunctor{}, data, val );
+   Algorithms::StaticFor< 0, Size >::exec( detail::AssignValueFunctor{}, data, val );
 }
 
 template< int Size, typename Value >
 bool StaticArray< Size, Value >::save( File& file ) const
 {
-   file.save< Value, Value, Devices::Host >( data, Size );
+   file.save( data, Size );
    return true;
 }
 
 template< int Size, typename Value >
 bool StaticArray< Size, Value >::load( File& file)
 {
-   file.load< Value, Value, Devices::Host >( data, Size );
+   file.load( data, Size );
    return true;
 }
 
diff --git a/src/TNL/Containers/StaticVector.h b/src/TNL/Containers/StaticVector.h
index a15420d07bb9edf1940329eb4597c7f4e81726de..2fe136ac951d4c02bb339e4c0edcee43f3c3b7d8 100644
--- a/src/TNL/Containers/StaticVector.h
+++ b/src/TNL/Containers/StaticVector.h
@@ -53,11 +53,13 @@ public:
    /**
     * \brief Default copy-assignment operator.
     */
+   __cuda_callable__
    StaticVector& operator=( const StaticVector& ) = default;
 
    /**
     * \brief Default move-assignment operator.
     */
+   __cuda_callable__
    StaticVector& operator=( StaticVector&& ) = default;
 
    //! Constructors and assignment operators are inherited from the class \ref StaticArray.
@@ -94,11 +96,6 @@ public:
    bool setup( const Config::ParameterContainer& parameters,
                const String& prefix = "" );
 
-   /**
-    * \brief Gets type of this vector.
-    */
-   static String getType();
-
    /**
     * \brief Assignment operator with a vector expression.
     * 
@@ -108,6 +105,7 @@ public:
     * \return reference to this vector
     */
    template< typename VectorExpression >
+   __cuda_callable__
    StaticVector& operator=( const VectorExpression& expression );
 
    /**
@@ -185,6 +183,7 @@ namespace TNL {
 namespace Containers {
 
 template< typename Real >
+__cuda_callable__
 StaticVector< 3, Real > VectorProduct( const StaticVector< 3, Real >& u,
                                        const StaticVector< 3, Real >& v )
 {
@@ -196,6 +195,7 @@ StaticVector< 3, Real > VectorProduct( const StaticVector< 3, Real >& u,
 }
 
 template< typename Real >
+__cuda_callable__
 Real TriangleArea( const StaticVector< 2, Real >& a,
                    const StaticVector< 2, Real >& b,
                    const StaticVector< 2, Real >& c )
@@ -213,6 +213,7 @@ Real TriangleArea( const StaticVector< 2, Real >& a,
 }
 
 template< typename Real >
+__cuda_callable__
 Real TriangleArea( const StaticVector< 3, Real >& a,
                    const StaticVector< 3, Real >& b,
                    const StaticVector< 3, Real >& c )
diff --git a/src/TNL/Containers/StaticVector.hpp b/src/TNL/Containers/StaticVector.hpp
index 8442db66124c20f32d1409398e716c012ca2fe1b..dc97eeea99b5d551e11beb7543e5f0822f79fad4 100644
--- a/src/TNL/Containers/StaticVector.hpp
+++ b/src/TNL/Containers/StaticVector.hpp
@@ -11,7 +11,7 @@
 #pragma once
 
 #include <TNL/Containers/StaticVector.h>
-#include <TNL/Containers/Algorithms/VectorAssignment.h>
+#include <TNL/Containers/detail/VectorAssignment.h>
 
 namespace TNL {
 namespace Containers {
@@ -20,9 +20,10 @@ template< int Size, typename Real >
    template< typename T1,
              typename T2,
              template< typename, typename > class Operation >
+__cuda_callable__
 StaticVector< Size, Real >::StaticVector( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& expr )
 {
-   Algorithms::VectorAssignment< StaticVector< Size, Real >, Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation > >::assignStatic( *this, expr );
+   detail::VectorAssignment< StaticVector< Size, Real >, Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation > >::assignStatic( *this, expr );
 }
 
 template< int Size,
@@ -32,7 +33,7 @@ template< int Size,
 __cuda_callable__
 StaticVector< Size, Real >::StaticVector( const Expressions::StaticUnaryExpressionTemplate< T, Operation >& expr )
 {
-   Algorithms::VectorAssignment< StaticVector< Size, Real >, Expressions::StaticUnaryExpressionTemplate< T, Operation > >::assignStatic( *this, expr );
+   detail::VectorAssignment< StaticVector< Size, Real >, Expressions::StaticUnaryExpressionTemplate< T, Operation > >::assignStatic( *this, expr );
 }
 
 template< int Size, typename Real >
@@ -50,22 +51,13 @@ StaticVector< Size, Real >::setup( const Config::ParameterContainer& parameters,
    return true;
 }
 
-template< int Size, typename Real >
-String StaticVector< Size, Real >::getType()
-{
-   return String( "Containers::StaticVector< " ) +
-          convertToString( Size ) +
-          String( ", " ) +
-          TNL::getType< Real >() +
-          String( " >" );
-}
-
 template< int Size, typename Real >
    template< typename VectorExpression >
+__cuda_callable__
 StaticVector< Size, Real >&
 StaticVector< Size, Real >::operator=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignment< StaticVector< Size, Real >, VectorExpression >::assignStatic( *this, expression );
+   detail::VectorAssignment< StaticVector< Size, Real >, VectorExpression >::assignStatic( *this, expression );
    return *this;
 }
 
@@ -74,7 +66,7 @@ template< int Size, typename Real >
 __cuda_callable__
 StaticVector< Size, Real >& StaticVector< Size, Real >::operator+=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< StaticVector, VectorExpression >::additionStatic( *this, expression );
+   detail::VectorAssignmentWithOperation< StaticVector, VectorExpression >::additionStatic( *this, expression );
    return *this;
 }
 
@@ -83,7 +75,7 @@ template< int Size, typename Real >
 __cuda_callable__
 StaticVector< Size, Real >& StaticVector< Size, Real >::operator-=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< StaticVector, VectorExpression >::subtractionStatic( *this, expression );
+   detail::VectorAssignmentWithOperation< StaticVector, VectorExpression >::subtractionStatic( *this, expression );
    return *this;
 }
 
@@ -92,7 +84,7 @@ template< int Size, typename Real >
 __cuda_callable__
 StaticVector< Size, Real >& StaticVector< Size, Real >::operator*=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< StaticVector, VectorExpression >::multiplicationStatic( *this, expression );
+   detail::VectorAssignmentWithOperation< StaticVector, VectorExpression >::multiplicationStatic( *this, expression );
    return *this;
 }
 
@@ -101,7 +93,7 @@ template< int Size, typename Real >
 __cuda_callable__
 StaticVector< Size, Real >& StaticVector< Size, Real >::operator/=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< StaticVector, VectorExpression >::divisionStatic( *this, expression );
+   detail::VectorAssignmentWithOperation< StaticVector, VectorExpression >::divisionStatic( *this, expression );
    return *this;
 }
 
@@ -112,7 +104,7 @@ StaticVector< Size, Real >::
 operator StaticVector< Size, OtherReal >() const
 {
    StaticVector< Size, OtherReal > aux;
-   StaticFor< 0, Size >::exec( Algorithms::detail::AssignArrayFunctor{}, aux.getData(), this->getData() );
+   Algorithms::StaticFor< 0, Size >::exec( detail::AssignArrayFunctor{}, aux.getData(), this->getData() );
    return aux;
 }
 
diff --git a/src/TNL/Containers/Subrange.h b/src/TNL/Containers/Subrange.h
index 08911855337817712c960047bf9688a7f134a752..17e02c45f96ff5be79bde0caf8692d25db75166e 100644
--- a/src/TNL/Containers/Subrange.h
+++ b/src/TNL/Containers/Subrange.h
@@ -16,7 +16,7 @@
 
 #include <TNL/Assert.h>
 #include <TNL/String.h>
-#include <TNL/param-types.h>
+#include <TNL/TypeInfo.h>
 
 namespace TNL {
 namespace Containers {
@@ -54,11 +54,6 @@ public:
       end = 0;
    }
 
-   static String getType()
-   {
-      return "Subrange< " + TNL::getType< Index >() + " >";
-   }
-
    // Checks if a global index is in the set of local indices.
    __cuda_callable__
    bool isLocal( Index i ) const
@@ -127,7 +122,7 @@ protected:
 template< typename Index >
 std::ostream& operator<<( std::ostream& str, const Subrange< Index >& range )
 {
-   return str << Subrange< Index >::getType() << "( " << range.getBegin() << ", " << range.getEnd() << " )";
+   return str << getType< Subrange< Index > >() << "( " << range.getBegin() << ", " << range.getEnd() << " )";
 }
 
 } // namespace Containers
diff --git a/src/TNL/Containers/Vector.h b/src/TNL/Containers/Vector.h
index 42f8465978f7539bee9b2d7de731a6587a74b95f..be08266b61bc42555f9b78cd5471bce7f31f5b43 100644
--- a/src/TNL/Containers/Vector.h
+++ b/src/TNL/Containers/Vector.h
@@ -42,7 +42,6 @@ class Vector
 : public Array< Real, Device, Index, Allocator >
 {
 public:
-
    /**
     * \brief Type of elements stored in this vector.
     */
@@ -67,16 +66,6 @@ public:
     */
    using AllocatorType = Allocator;
 
-   /**
-    * \brief Defines the same vector type but allocated on host (CPU).
-    */
-   using HostType = Vector< Real, TNL::Devices::Host, Index >;
-
-   /**
-    * \brief Defines the same vector type but allocated on CUDA device (GPU).
-    */
-   using CudaType = Vector< Real, TNL::Devices::Cuda, Index >;
-
    /**
     * \brief Compatible VectorView type.
     */
@@ -87,6 +76,16 @@ public:
     */
    using ConstViewType = VectorView< std::add_const_t< Real >, Device, Index >;
 
+   /**
+    * \brief A template which allows to quickly obtain a \ref Vector type with changed template parameters.
+    */
+   template< typename _Real,
+             typename _Device = Device,
+             typename _Index = Index,
+             typename _Allocator = typename Allocators::Default< _Device >::template Allocator< _Real > >
+   using Self = Vector< _Real, _Device, _Index, _Allocator >;
+
+
    // constructors and assignment operators inherited from the class Array
    using Array< Real, Device, Index, Allocator >::Array;
    using Array< Real, Device, Index, Allocator >::operator=;
@@ -121,16 +120,6 @@ public:
     */
    Vector& operator=( Vector&& ) = default;
 
-   /**
-    * \brief Returns a \ref String representation of the vector type in C++ style.
-    */
-   static String getType();
-
-   /**
-    * \brief Returns a \ref String representation of the vector type in C++ style.
-    */
-   virtual String getTypeVirtual() const;
-
    /**
     * \brief Returns a modifiable view of the vector.
     *
@@ -255,75 +244,84 @@ public:
    Vector& operator/=( const VectorExpression& expression );
 
    /**
-    * \brief Computes prefix sum of the vector elements.
+    * \brief Computes the scan (prefix sum) of the vector elements.
     *
-    * Computes prefix sum for elements within the index range [ \e begin to \e end ).
-    * The other elements of this vector remain unchanged.
+    * By default, scan is computed for the whole vector. If \e begin
+    * or \e end is set to a non-zero value, only elements in the sub-interval
+    * `[begin, end)` are scanned.
     *
-    * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive.
-    * 
-    * \param begin beginning of the index range
-    * \param end end of the index range.
+    * \tparam Type The scan type - either \e Inclusive or \e Exclusive.
+    *
+    * \param begin The beginning of the vector sub-interval. It is 0 by
+    *              default.
+    * \param end The end of the vector sub-interval. The default value is 0
+    *            which is, however, replaced with the array size.
     */
    template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive >
-   void prefixSum( IndexType begin = 0, IndexType end = 0 );
+   void scan( IndexType begin = 0, IndexType end = 0 );
 
    /**
-    * \brief Computes segmented prefix sum of the vector elements.
+    * \brief Computes the segmented scan (prefix sum) of the vector elements.
     *
-    * Computes segmented prefix sum for elements within the index range [ \e begin to \e end ).
-    * The other elements of this vector remain unchanged. Whole vector is assumed
-    * by default, i.e. when \e begin and \e end are set to zero.
+    * By default, segmented scan is computed for the whole vector. If \e begin
+    * or \e end is set to a non-zero value, only elements in the sub-interval
+    * `[begin, end)` are scanned.
     *
-    * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive.
-    * \tparam FlagsArray is an array type describing beginnings of the segments.
-    * 
-    * \param flags is an array having `1` at the beginning of each segment and `0` on any other position
-    * \param begin beginning of the index range
-    * \param end end of the index range.
+    * \tparam Type The scan type - either \e Inclusive or \e Exclusive.
+    *
+    * \param flags A binary array where ones indicate the beginning of each
+    *              segment.
+    * \param begin The beginning of the vector sub-interval. It is 0 by
+    *              default.
+    * \param end The end of the vector sub-interval. The default value is 0
+    *            which is, however, replaced with the array size.
     */
    template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive,
              typename FlagsArray >
-   void segmentedPrefixSum( FlagsArray& flags, IndexType begin = 0, IndexType end = 0 );
+   void segmentedScan( FlagsArray& flags, IndexType begin = 0, IndexType end = 0 );
 
    /**
-    * \brief Computes prefix sum of the vector expression.
+    * \brief Computes the scan (prefix sum) of the vector expression.
     *
-    * Computes prefix sum for elements within the index range [ \e begin to \e end ).
-    * The other elements of this vector remain unchanged. Whole vector expression is assumed
-    * by default, i.e. when \e begin and \e end are set to zero.
+    * By default, scan is computed for the whole vector. If \e begin
+    * or \e end is set to a non-zero value, only elements in the sub-interval
+    * `[begin, end)` are scanned.
     *
-    * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive.
-    * \tparam VectorExpression is the vector expression.
-    * 
-    * \param expression is the vector expression.
-    * \param begin beginning of the index range
-    * \param end end of the index range.
+    * \tparam Type The scan type - either \e Inclusive or \e Exclusive.
+    *
+    * \param expression A vector expression for which scan is computed and
+    *                   stored in this vector.
+    * \param begin The beginning of the vector sub-interval. It is 0 by
+    *              default.
+    * \param end The end of the vector sub-interval. The default value is 0
+    *            which is, however, replaced with the array size.
     */
    template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive,
              typename VectorExpression >
-   void prefixSum( const VectorExpression& expression, IndexType begin = 0, IndexType end = 0 );
+   void scan( const VectorExpression& expression, IndexType begin = 0, IndexType end = 0 );
 
    /**
-    * \brief Computes segmented prefix sum of a vector expression.
+    * \brief Computes the segmented scan (prefix sum) of a vector expression.
     *
-    * Computes segmented prefix sum for elements within the index range [ \e begin to \e end ).
-    * The other elements of this vector remain unchanged. Whole vector expression is assumed
-    * by default, i.e. when \e begin and \e end are set to zero.
+    * By default, segmented scan is computed for the whole vector. If \e begin
+    * or \e end is set to a non-zero value, only elements in the sub-interval
+    * `[begin, end)` are scanned.
     *
-    * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive.
-    * \tparam VectorExpression is the vector expression.
-    * \tparam FlagsArray is an array type describing beginnings of the segments.
-    * 
-    * \param expression is the vector expression.
-    * \param flags is an array having `1` at the beginning of each segment and `0` on any other position
-    * \param begin beginning of the index range
-    * \param end end of the index range.
+    * \tparam Type The scan type - either \e Inclusive or \e Exclusive.
+    *
+    * \param expression A vector expression for which scan is computed and
+    *                   stored in this vector.
+    * \param flags A binary array where ones indicate the beginning of each
+    *              segment.
+    * \param begin The beginning of the vector sub-interval. It is 0 by
+    *              default.
+    * \param end The end of the vector sub-interval. The default value is 0
+    *            which is, however, replaced with the array size.
     */
    template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive,
              typename VectorExpression,
              typename FlagsArray >
-   void segmentedPrefixSum( const VectorExpression& expression, FlagsArray& flags, IndexType begin = 0, IndexType end = 0 );
+   void segmentedScan( const VectorExpression& expression, FlagsArray& flags, IndexType begin = 0, IndexType end = 0 );
 };
 
 } // namespace Containers
diff --git a/src/TNL/Containers/Vector.hpp b/src/TNL/Containers/Vector.hpp
index 0468fc749135434ff5542dcf4ac60f239378eb41..5fdce0d09d2adb53b7c19e971fdf3b0a545891a5 100644
--- a/src/TNL/Containers/Vector.hpp
+++ b/src/TNL/Containers/Vector.hpp
@@ -27,31 +27,6 @@ Vector( const Vector& vector,
 {
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename Allocator >
-String
-Vector< Real, Device, Index, Allocator >::
-getType()
-{
-   return String( "Containers::Vector< " ) +
-                  TNL::getType< Real >() + ", " +
-                  Device::getDeviceType() + ", " +
-                  TNL::getType< Index >() + " >";
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename Allocator >
-String
-Vector< Real, Device, Index, Allocator >::
-getTypeVirtual() const
-{
-   return this->getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -107,8 +82,8 @@ Vector< Real, Device, Index, Allocator >&
 Vector< Real, Device, Index, Allocator >::
 operator=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignment< Vector, VectorExpression >::resize( *this, expression );
-   Algorithms::VectorAssignment< Vector, VectorExpression >::assign( *this, expression );
+   detail::VectorAssignment< Vector, VectorExpression >::resize( *this, expression );
+   detail::VectorAssignment< Vector, VectorExpression >::assign( *this, expression );
    return *this;
 }
 
@@ -121,7 +96,7 @@ Vector< Real, Device, Index, Allocator >&
 Vector< Real, Device, Index, Allocator >::
 operator+=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< Vector, VectorExpression >::addition( *this, expression );
+   detail::VectorAssignmentWithOperation< Vector, VectorExpression >::addition( *this, expression );
    return *this;
 }
 
@@ -134,7 +109,7 @@ Vector< Real, Device, Index, Allocator >&
 Vector< Real, Device, Index, Allocator >::
 operator-=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< Vector, VectorExpression >::subtraction( *this, expression );
+   detail::VectorAssignmentWithOperation< Vector, VectorExpression >::subtraction( *this, expression );
    return *this;
 }
 
@@ -147,7 +122,7 @@ Vector< Real, Device, Index, Allocator >&
 Vector< Real, Device, Index, Allocator >::
 operator*=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< Vector, VectorExpression >::multiplication( *this, expression );
+   detail::VectorAssignmentWithOperation< Vector, VectorExpression >::multiplication( *this, expression );
    return *this;
 }
 
@@ -160,7 +135,7 @@ Vector< Real, Device, Index, Allocator >&
 Vector< Real, Device, Index, Allocator >::
 operator/=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< Vector, VectorExpression >::division( *this, expression );
+   detail::VectorAssignmentWithOperation< Vector, VectorExpression >::division( *this, expression );
    return *this;
 }
 
@@ -171,7 +146,7 @@ template< typename Real,
    template< Algorithms::ScanType Type >
 void
 Vector< Real, Device, Index, Allocator >::
-prefixSum( IndexType begin, IndexType end )
+scan( IndexType begin, IndexType end )
 {
    if( end == 0 )
       end = this->getSize();
@@ -186,7 +161,7 @@ template< typename Real,
              typename FlagsArray >
 void
 Vector< Real, Device, Index, Allocator >::
-segmentedPrefixSum( FlagsArray& flags, IndexType begin, IndexType end )
+segmentedScan( FlagsArray& flags, IndexType begin, IndexType end )
 {
    if( end == 0 )
       end = this->getSize();
@@ -201,9 +176,9 @@ template< typename Real,
              typename VectorExpression >
 void
 Vector< Real, Device, Index, Allocator >::
-prefixSum( const VectorExpression& expression, IndexType begin, IndexType end )
+scan( const VectorExpression& expression, IndexType begin, IndexType end )
 {
-   throw Exceptions::NotImplementedError( "Prefix sum with vector expressions is not implemented." );
+   throw Exceptions::NotImplementedError( "Scan (prefix sum) with vector expressions is not implemented." );
 }
 
 template< typename Real,
@@ -215,9 +190,9 @@ template< typename Real,
              typename FlagsArray >
 void
 Vector< Real, Device, Index, Allocator >::
-segmentedPrefixSum( const VectorExpression& expression, FlagsArray& flags, IndexType begin, IndexType end )
+segmentedScan( const VectorExpression& expression, FlagsArray& flags, IndexType begin, IndexType end )
 {
-   throw Exceptions::NotImplementedError( "Prefix sum with vector expressions is not implemented." );
+   throw Exceptions::NotImplementedError( "Segmented scan (prefix sum) with vector expressions is not implemented." );
 }
 
 } // namespace Containers
diff --git a/src/TNL/Containers/VectorView.h b/src/TNL/Containers/VectorView.h
index ba43e74d819a2ee58d7ccd7b3bf2e7c54641c571..1a144ea5cde79951ee059f386859322eba18cf57 100644
--- a/src/TNL/Containers/VectorView.h
+++ b/src/TNL/Containers/VectorView.h
@@ -14,7 +14,7 @@
 
 #include <TNL/Containers/ArrayView.h>
 #include <TNL/Containers/Expressions/ExpressionTemplates.h>
-#include <TNL/Containers/Algorithms/Scan.h>
+#include <TNL/Algorithms/Scan.h>
 
 namespace TNL {
 namespace Containers {
@@ -39,7 +39,6 @@ class VectorView
    using BaseType = ArrayView< Real, Device, Index >;
    using NonConstReal = typename std::remove_const< Real >::type;
 public:
-
    /**
     * \brief Type of elements stored in this vector.
     */
@@ -57,16 +56,6 @@ public:
     */
    using IndexType = Index;
 
-   /**
-    * \brief Defines the same vector type but allocated on host (CPU).
-    */
-   using HostType = VectorView< Real, TNL::Devices::Host, Index >;
-
-   /**
-    * \brief Defines the same vector type but allocated on CUDA device (GPU).
-    */
-   using CudaType = VectorView< Real, TNL::Devices::Cuda, Index >;
-
    /**
     * \brief Compatible VectorView type.
     */
@@ -77,6 +66,15 @@ public:
     */
    using ConstViewType = VectorView< std::add_const_t< Real >, Device, Index >;
 
+   /**
+    * \brief A template which allows to quickly obtain a \ref VectorView type with changed template parameters.
+    */
+   template< typename _Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = VectorView< _Real, _Device, _Index >;
+
+
    // constructors and assignment operators inherited from the class ArrayView
    using ArrayView< Real, Device, Index >::ArrayView;
    using ArrayView< Real, Device, Index >::operator=;
@@ -97,11 +95,6 @@ public:
    VectorView( const ArrayView< Real_, Device, Index >& view )
    : BaseType( view ) {}
 
-   /**
-    * \brief Returns a \ref String representation of the vector view type.
-    */
-   static String getType();
-
    /**
     * \brief Returns a modifiable view of the vector view.
     *
@@ -221,75 +214,84 @@ public:
    VectorView& operator/=( const VectorExpression& expression );
 
    /**
-    * \brief Computes prefix sum of the vector view elements.
+    * \brief Computes the scan (prefix sum) of the vector elements.
     *
-    * Computes prefix sum for elements within the index range [ \e begin to \e end ).
-    * The other elements of this vector view remain unchanged.
+    * By default, scan is computed for the whole vector. If \e begin
+    * or \e end is set to a non-zero value, only elements in the sub-interval
+    * `[begin, end)` are scanned.
     *
-    * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive.
-    * 
-    * \param begin beginning of the index range
-    * \param end end of the index range.
+    * \tparam Type The scan type - either \e Inclusive or \e Exclusive.
+    *
+    * \param begin The beginning of the vector sub-interval. It is 0 by
+    *              default.
+    * \param end The end of the vector sub-interval. The default value is 0
+    *            which is, however, replaced with the array size.
     */
    template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive >
-   void prefixSum( IndexType begin = 0, IndexType end = 0 );
+   void scan( IndexType begin = 0, IndexType end = 0 );
 
    /**
-    * \brief Computes segmented prefix sum of the vector view elements.
+    * \brief Computes the segmented scan (prefix sum) of the vector elements.
     *
-    * Computes segmented prefix sum for elements within the index range [ \e begin to \e end ).
-    * The other elements of this vector view remain unchanged. Whole vector view is assumed
-    * by default, i.e. when \e begin and \e end are set to zero.
+    * By default, segmented scan is computed for the whole vector. If \e begin
+    * or \e end is set to a non-zero value, only elements in the sub-interval
+    * `[begin, end)` are scanned.
     *
-    * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive.
-    * \tparam FlagsArray is an array type describing beginnings of the segments.
-    * 
-    * \param flags is an array having `1` at the beginning of each segment and `0` on any other position
-    * \param begin beginning of the index range
-    * \param end end of the index range.
+    * \tparam Type The scan type - either \e Inclusive or \e Exclusive.
+    *
+    * \param flags A binary array where ones indicate the beginning of each
+    *              segment.
+    * \param begin The beginning of the vector sub-interval. It is 0 by
+    *              default.
+    * \param end The end of the vector sub-interval. The default value is 0
+    *            which is, however, replaced with the array size.
     */
    template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive,
              typename FlagsArray >
-   void segmentedPrefixSum( FlagsArray& flags, IndexType begin = 0, IndexType end = 0 );
+   void segmentedScan( FlagsArray& flags, IndexType begin = 0, IndexType end = 0 );
 
    /**
-    * \brief Computes prefix sum of the vector expression.
+    * \brief Computes the scan (prefix sum) of the vector expression.
     *
-    * Computes prefix sum for elements within the index range [ \e begin to \e end ).
-    * The other elements of this vector remain unchanged. Whole vector expression is assumed
-    * by default, i.e. when \e begin and \e end are set to zero.
+    * By default, scan is computed for the whole vector. If \e begin
+    * or \e end is set to a non-zero value, only elements in the sub-interval
+    * `[begin, end)` are scanned.
     *
-    * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive.
-    * \tparam VectorExpression is the vector expression.
-    * 
-    * \param expression is the vector expression.
-    * \param begin beginning of the index range
-    * \param end end of the index range.
+    * \tparam Type The scan type - either \e Inclusive or \e Exclusive.
+    *
+    * \param expression A vector expression for which scan is computed and
+    *                   stored in this vector.
+    * \param begin The beginning of the vector sub-interval. It is 0 by
+    *              default.
+    * \param end The end of the vector sub-interval. The default value is 0
+    *            which is, however, replaced with the array size.
     */
    template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive,
              typename VectorExpression >
-   void prefixSum( const VectorExpression& expression, IndexType begin = 0, IndexType end = 0 );
+   void scan( const VectorExpression& expression, IndexType begin = 0, IndexType end = 0 );
 
    /**
-    * \brief Computes segmented prefix sum of a vector expression.
+    * \brief Computes the segmented scan (prefix sum) of a vector expression.
     *
-    * Computes segmented prefix sum for elements within the index range [ \e begin to \e end ).
-    * The other elements of this vector remain unchanged. Whole vector expression is assumed
-    * by default, i.e. when \e begin and \e end are set to zero.
+    * By default, segmented scan is computed for the whole vector. If \e begin
+    * or \e end is set to a non-zero value, only elements in the sub-interval
+    * `[begin, end)` are scanned.
     *
-    * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive.
-    * \tparam VectorExpression is the vector expression.
-    * \tparam FlagsArray is an array type describing beginnings of the segments.
-    * 
-    * \param expression is the vector expression.
-    * \param flags is an array having `1` at the beginning of each segment and `0` on any other position
-    * \param begin beginning of the index range
-    * \param end end of the index range.
+    * \tparam Type The scan type - either \e Inclusive or \e Exclusive.
+    *
+    * \param expression A vector expression for which scan is computed and
+    *                   stored in this vector.
+    * \param flags A binary array where ones indicate the beginning of each
+    *              segment.
+    * \param begin The beginning of the vector sub-interval. It is 0 by
+    *              default.
+    * \param end The end of the vector sub-interval. The default value is 0
+    *            which is, however, replaced with the array size.
     */
    template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive,
              typename VectorExpression,
              typename FlagsArray >
-   void segmentedPrefixSum( const VectorExpression& expression, FlagsArray& flags, IndexType begin = 0, IndexType end = 0 );
+   void segmentedScan( const VectorExpression& expression, FlagsArray& flags, IndexType begin = 0, IndexType end = 0 );
 };
 
 } // namespace Containers
diff --git a/src/TNL/Containers/VectorView.hpp b/src/TNL/Containers/VectorView.hpp
index 7c342703bcd526307b9cb85c8bde874a913357cc..2c1cd02c8163db83760907a50aeafaf0c8e5404d 100644
--- a/src/TNL/Containers/VectorView.hpp
+++ b/src/TNL/Containers/VectorView.hpp
@@ -11,25 +11,12 @@
 #pragma once
 
 #include <TNL/Containers/VectorView.h>
-#include <TNL/Containers/Algorithms/VectorAssignment.h>
+#include <TNL/Containers/detail/VectorAssignment.h>
 #include <TNL/Exceptions/NotImplementedError.h>
 
 namespace TNL {
 namespace Containers {
 
-template< typename Real,
-          typename Device,
-          typename Index >
-String
-VectorView< Real, Device, Index >::
-getType()
-{
-   return String( "Containers::VectorView< " ) +
-                  TNL::getType< Real >() + ", " +
-                  Device::getDeviceType() + ", " +
-                  TNL::getType< Index >() + " >";
-}
-
 template< typename Real,
           typename Device,
           typename Index >
@@ -63,7 +50,7 @@ template< typename Real,
 VectorView< Real, Device, Index >&
 VectorView< Real, Device, Index >::operator=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignment< VectorView, VectorExpression >::assign( *this, expression );
+   detail::VectorAssignment< VectorView, VectorExpression >::assign( *this, expression );
    return *this;
 }
 
@@ -75,7 +62,7 @@ VectorView< Real, Device, Index >&
 VectorView< Real, Device, Index >::
 operator+=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< VectorView, VectorExpression >::addition( *this, expression );
+   detail::VectorAssignmentWithOperation< VectorView, VectorExpression >::addition( *this, expression );
    return *this;
 }
 
@@ -87,7 +74,7 @@ VectorView< Real, Device, Index >&
 VectorView< Real, Device, Index >::
 operator-=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< VectorView, VectorExpression >::subtraction( *this, expression );
+   detail::VectorAssignmentWithOperation< VectorView, VectorExpression >::subtraction( *this, expression );
    return *this;
 }
 
@@ -99,7 +86,7 @@ VectorView< Real, Device, Index >&
 VectorView< Real, Device, Index >::
 operator*=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< VectorView, VectorExpression >::multiplication( *this, expression );
+   detail::VectorAssignmentWithOperation< VectorView, VectorExpression >::multiplication( *this, expression );
    return *this;
 }
 
@@ -111,7 +98,7 @@ VectorView< Real, Device, Index >&
 VectorView< Real, Device, Index >::
 operator/=( const VectorExpression& expression )
 {
-   Algorithms::VectorAssignmentWithOperation< VectorView, VectorExpression >::division( *this, expression );
+   detail::VectorAssignmentWithOperation< VectorView, VectorExpression >::division( *this, expression );
    return *this;
 }
 
@@ -121,7 +108,7 @@ template< typename Real,
    template< Algorithms::ScanType Type >
 void
 VectorView< Real, Device, Index >::
-prefixSum( IndexType begin, IndexType end )
+scan( IndexType begin, IndexType end )
 {
    if( end == 0 )
       end = this->getSize();
@@ -135,7 +122,7 @@ template< typename Real,
              typename FlagsArray >
 void
 VectorView< Real, Device, Index >::
-segmentedPrefixSum( FlagsArray& flags, IndexType begin, IndexType end )
+segmentedScan( FlagsArray& flags, IndexType begin, IndexType end )
 {
    if( end == 0 )
       end = this->getSize();
@@ -149,9 +136,9 @@ template< typename Real,
              typename VectorExpression >
 void
 VectorView< Real, Device, Index >::
-prefixSum( const VectorExpression& expression, IndexType begin, IndexType end )
+scan( const VectorExpression& expression, IndexType begin, IndexType end )
 {
-   throw Exceptions::NotImplementedError( "Prefix sum with vector expressions is not implemented." );
+   throw Exceptions::NotImplementedError( "Scan (prefix sum) with vector expressions is not implemented." );
 }
 
 template< typename Real,
@@ -162,9 +149,9 @@ template< typename Real,
              typename FlagsArray >
 void
 VectorView< Real, Device, Index >::
-segmentedPrefixSum( const VectorExpression& expression, FlagsArray& flags, IndexType begin, IndexType end )
+segmentedScan( const VectorExpression& expression, FlagsArray& flags, IndexType begin, IndexType end )
 {
-   throw Exceptions::NotImplementedError( "Prefix sum with vector expressions is not implemented." );
+   throw Exceptions::NotImplementedError( "Segmented scan (prefix sum) with vector expressions is not implemented." );
 }
 
 } // namespace Containers
diff --git a/src/TNL/Containers/Algorithms/ArrayAssignment.h b/src/TNL/Containers/detail/ArrayAssignment.h
similarity index 85%
rename from src/TNL/Containers/Algorithms/ArrayAssignment.h
rename to src/TNL/Containers/detail/ArrayAssignment.h
index 9a67a36b9190d3243332bf985ac978fcb5b7cae9..e6671bb2ced00cc2124d3019b07080ae379d986a 100644
--- a/src/TNL/Containers/Algorithms/ArrayAssignment.h
+++ b/src/TNL/Containers/detail/ArrayAssignment.h
@@ -11,11 +11,12 @@
 #pragma once
 
 #include <TNL/TypeTraits.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
+#include <TNL/Algorithms/MemoryOperations.h>
+#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
 
 namespace TNL {
 namespace Containers {
-namespace Algorithms {
+namespace detail {
 
 template< typename Array,
           typename T,
@@ -39,7 +40,7 @@ struct ArrayAssignment< Array, T, true >
    {
       TNL_ASSERT_EQ( a.getSize(), t.getSize(), "The sizes of the arrays must be equal." );
       if( t.getSize() > 0 ) // we allow even assignment of empty arrays
-         ArrayOperations< typename Array::DeviceType, typename T::DeviceType >::template
+         Algorithms::MultiDeviceMemoryOperations< typename Array::DeviceType, typename T::DeviceType >::template
             copy< typename Array::ValueType, typename T::ValueType, typename Array::IndexType >
             ( a.getArrayData(), t.getArrayData(), t.getSize() );
    }
@@ -60,12 +61,12 @@ struct ArrayAssignment< Array, T, false >
    static void assign( Array& a, const T& t )
    {
       TNL_ASSERT_FALSE( a.empty(), "Cannot assign value to empty array." );
-      ArrayOperations< typename Array::DeviceType >::template
+      Algorithms::MemoryOperations< typename Array::DeviceType >::template
          set< typename Array::ValueType, typename Array::IndexType >
          ( a.getArrayData(), ( typename Array::ValueType ) t, a.getSize() );
    }
 };
 
-} // namespace Algorithms
+} // namespace detail
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/ArrayIO.h b/src/TNL/Containers/detail/ArrayIO.h
similarity index 78%
rename from src/TNL/Containers/Algorithms/ArrayIO.h
rename to src/TNL/Containers/detail/ArrayIO.h
index 35d79055896bf8570e13dfcde63de3a521428308..8844a554f1915379559f274d20f3dddea692c966 100644
--- a/src/TNL/Containers/Algorithms/ArrayIO.h
+++ b/src/TNL/Containers/detail/ArrayIO.h
@@ -14,29 +14,29 @@
 
 #include <TNL/Object.h>
 #include <TNL/File.h>
+#include <TNL/TypeInfo.h>
 
 namespace TNL {
 namespace Containers {
-namespace Algorithms {
+namespace detail {
 
 template< typename Value,
-          typename Device,
           typename Index,
+          typename Allocator,
           bool Elementwise = std::is_base_of< Object, Value >::value >
 struct ArrayIO
 {};
 
 template< typename Value,
-          typename Device,
-          typename Index >
-struct ArrayIO< Value, Device, Index, true >
+          typename Index,
+          typename Allocator >
+struct ArrayIO< Value, Index, Allocator, true >
 {
    static String getSerializationType()
    {
       return String( "Containers::Array< " ) +
-             TNL::getType< Value >() + ", " +
-             Devices::Host::getDeviceType() + ", " +
-             TNL::getType< Index >() + " >";
+             TNL::getSerializationType< Value >() + ", [any_device], " +
+             TNL::getSerializationType< Index >() + ", [any_allocator] >";
    }
 
    static void save( File& file,
@@ -73,16 +73,15 @@ struct ArrayIO< Value, Device, Index, true >
 };
 
 template< typename Value,
-          typename Device,
-          typename Index >
-struct ArrayIO< Value, Device, Index, false >
+          typename Index,
+          typename Allocator >
+struct ArrayIO< Value, Index, Allocator, false >
 {
    static String getSerializationType()
    {
       return String( "Containers::Array< " ) +
-             TNL::getType< Value >() + ", " +
-             Devices::Host::getDeviceType() + ", " +
-             TNL::getType< Index >() + " >";
+             TNL::getSerializationType< Value >() + ", [any_device], " +
+             TNL::getSerializationType< Index >() + ", [any_allocator] >";
    }
 
    static void save( File& file,
@@ -93,7 +92,7 @@ struct ArrayIO< Value, Device, Index, false >
          return;
       try
       {
-         file.save< Value, Value, Device >( data, elements );
+         file.save< Value, Value, Allocator >( data, elements );
       }
       catch(...)
       {
@@ -109,7 +108,7 @@ struct ArrayIO< Value, Device, Index, false >
          return;
       try
       {
-         file.load< Value, Value, Device >( data, elements );
+         file.load< Value, Value, Allocator >( data, elements );
       }
       catch(...)
       {
@@ -118,6 +117,6 @@ struct ArrayIO< Value, Device, Index, false >
    }
 };
 
-} // namespace Algorithms
+} // namespace detail
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/StaticArrayAssignment.h b/src/TNL/Containers/detail/StaticArrayAssignment.h
similarity index 63%
rename from src/TNL/Containers/Algorithms/StaticArrayAssignment.h
rename to src/TNL/Containers/detail/StaticArrayAssignment.h
index 32a59e98c594e0875ce963dc0de57751c66b4bc7..9a8d7d3eeb7bdbcfd9814ad3754d4ba9131004b5 100644
--- a/src/TNL/Containers/Algorithms/StaticArrayAssignment.h
+++ b/src/TNL/Containers/detail/StaticArrayAssignment.h
@@ -11,33 +11,31 @@
 #pragma once
 
 #include <TNL/TypeTraits.h>
-#include <TNL/StaticFor.h>
+#include <TNL/Algorithms/StaticFor.h>
 
 namespace TNL {
 namespace Containers {
-namespace Algorithms {
+namespace detail {
 
-   namespace detail {
-      struct AssignArrayFunctor
-      {
-         template< typename LeftValue, typename RightValue >
-         __cuda_callable__
-         void operator()( int i, LeftValue& data, const RightValue& v ) const
-         {
-            data[ i ] = v[ i ];
-         }
-      };
+struct AssignArrayFunctor
+{
+   template< typename LeftValue, typename RightValue >
+   __cuda_callable__
+   void operator()( int i, LeftValue& data, const RightValue& v ) const
+   {
+      data[ i ] = v[ i ];
+   }
+};
 
-      struct AssignValueFunctor
-      {
-         template< typename LeftValue, typename RightValue >
-         __cuda_callable__
-         void operator()( int i, LeftValue& data, const RightValue& v ) const
-         {
-            data[ i ] = v;
-         }
-      };
-   } // namespace detail
+struct AssignValueFunctor
+{
+   template< typename LeftValue, typename RightValue >
+   __cuda_callable__
+   void operator()( int i, LeftValue& data, const RightValue& v ) const
+   {
+      data[ i ] = v;
+   }
+};
 
 template< typename StaticArray,
           typename T,
@@ -55,7 +53,7 @@ struct StaticArrayAssignment< StaticArray, T, true >
    static void assign( StaticArray& a, const T& v )
    {
       static_assert( StaticArray::getSize() == T::getSize(), "Cannot assign static arrays with different size." );
-      StaticFor< 0, StaticArray::getSize() >::exec( detail::AssignArrayFunctor{}, a.getData(), v );
+      Algorithms::StaticFor< 0, StaticArray::getSize() >::exec( AssignArrayFunctor{}, a.getData(), v );
    }
 };
 
@@ -70,10 +68,10 @@ struct StaticArrayAssignment< StaticArray, T, false >
    __cuda_callable__
    static void assign( StaticArray& a, const T& v )
    {
-      StaticFor< 0, StaticArray::getSize() >::exec( detail::AssignValueFunctor{}, a, v );
+      Algorithms::StaticFor< 0, StaticArray::getSize() >::exec( AssignValueFunctor{}, a, v );
    }
 };
 
-} // namespace Algorithms
+} // namespace detail
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/VectorAssignment.h b/src/TNL/Containers/detail/VectorAssignment.h
similarity index 91%
rename from src/TNL/Containers/Algorithms/VectorAssignment.h
rename to src/TNL/Containers/detail/VectorAssignment.h
index c861579f4e90810b024d2d84b5ea3c2eeaf92234..fa778a2480ad36c169aec903195ff2566f766359 100644
--- a/src/TNL/Containers/Algorithms/VectorAssignment.h
+++ b/src/TNL/Containers/detail/VectorAssignment.h
@@ -11,11 +11,11 @@
 #pragma once
 
 #include <TNL/TypeTraits.h>
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 namespace TNL {
 namespace Containers {
-namespace Algorithms {
+namespace detail {
 
 /**
  * \brief Vector assignment
@@ -68,7 +68,7 @@ struct VectorAssignment< Vector, T, true >
       {
          data[ i ] = t[ i ];
       };
-      ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), assignment );
+      Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), assignment );
    }
 };
 
@@ -103,7 +103,7 @@ struct VectorAssignment< Vector, T, false >
       {
          data[ i ] = t;
       };
-      ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), assignment );
+      Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), assignment );
    }
 };
 
@@ -169,7 +169,7 @@ struct VectorAssignmentWithOperation< Vector, T, true, false >
       {
          data[ i ] += t[ i ];
       };
-      ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), add );
+      Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), add );
    }
 
    __cuda_callable__
@@ -194,7 +194,7 @@ struct VectorAssignmentWithOperation< Vector, T, true, false >
       {
          data[ i ] -= t[ i ];
       };
-      ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), subtract );
+      Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), subtract );
    }
 
    __cuda_callable__
@@ -219,7 +219,7 @@ struct VectorAssignmentWithOperation< Vector, T, true, false >
       {
          data[ i ] *= t[ i ];
       };
-      ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), multiply );
+      Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), multiply );
    }
 
    __cuda_callable__
@@ -244,7 +244,7 @@ struct VectorAssignmentWithOperation< Vector, T, true, false >
       {
          data[ i ] /= t[ i ];
       };
-      ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), divide );
+      Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), divide );
    }
 };
 
@@ -275,7 +275,7 @@ struct VectorAssignmentWithOperation< Vector, T, false, false >
       {
          data[ i ] += t;
       };
-      ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), add );
+      Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), add );
    }
 
    __cuda_callable__
@@ -297,7 +297,7 @@ struct VectorAssignmentWithOperation< Vector, T, false, false >
       {
          data[ i ] -= t;
       };
-      ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), subtract );
+      Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), subtract );
    }
 
    __cuda_callable__
@@ -319,7 +319,7 @@ struct VectorAssignmentWithOperation< Vector, T, false, false >
       {
          data[ i ] *= t;
       };
-      ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), multiply );
+      Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), multiply );
    }
 
    __cuda_callable__
@@ -341,10 +341,10 @@ struct VectorAssignmentWithOperation< Vector, T, false, false >
       {
          data[ i ] /= t;
       };
-      ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), divide );
+      Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), divide );
    }
 };
 
-} // namespace Algorithms
+} // namespace detail
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/ndarray/BoundaryExecutors.h b/src/TNL/Containers/ndarray/BoundaryExecutors.h
index e4cd93705c7ae83dd36378662fa67b2e618f66eb..cf06ab1511e2179392ecee744d9589fc3ac74725 100644
--- a/src/TNL/Containers/ndarray/BoundaryExecutors.h
+++ b/src/TNL/Containers/ndarray/BoundaryExecutors.h
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 #include <TNL/Containers/ndarray/Meta.h>
 #include <TNL/Containers/ndarray/SizesHolder.h>
@@ -225,12 +225,12 @@ struct ParallelBoundaryExecutor< Permutation, Device, IndexTag< 3 > >
       const auto end1 = ends.template getSize< get< 1 >( Permutation{} ) >();
       const auto end2 = ends.template getSize< get< 2 >( Permutation{} ) >();
 
-      ParallelFor3D< Device >::exec( begin2,     begin1,     begin0,   skipBegin2, end1,       end0,       kernel, f );
-      ParallelFor3D< Device >::exec( skipEnd2,   begin1,     begin0,   end2,       end1,       end0,       kernel, f );
-      ParallelFor3D< Device >::exec( skipBegin2, begin1,     begin0,   skipEnd2,   skipBegin1, end0,       kernel, f );
-      ParallelFor3D< Device >::exec( skipBegin2, skipEnd1,   begin0,   skipEnd2,   end1,       end0,       kernel, f );
-      ParallelFor3D< Device >::exec( skipBegin2, skipBegin1, begin0,   skipEnd2,   skipEnd1,   skipBegin0, kernel, f );
-      ParallelFor3D< Device >::exec( skipBegin2, skipBegin1, skipEnd0, skipEnd2,   skipEnd1,   end0,       kernel, f );
+      Algorithms::ParallelFor3D< Device >::exec( begin2,     begin1,     begin0,   skipBegin2, end1,       end0,       kernel, f );
+      Algorithms::ParallelFor3D< Device >::exec( skipEnd2,   begin1,     begin0,   end2,       end1,       end0,       kernel, f );
+      Algorithms::ParallelFor3D< Device >::exec( skipBegin2, begin1,     begin0,   skipEnd2,   skipBegin1, end0,       kernel, f );
+      Algorithms::ParallelFor3D< Device >::exec( skipBegin2, skipEnd1,   begin0,   skipEnd2,   end1,       end0,       kernel, f );
+      Algorithms::ParallelFor3D< Device >::exec( skipBegin2, skipBegin1, begin0,   skipEnd2,   skipEnd1,   skipBegin0, kernel, f );
+      Algorithms::ParallelFor3D< Device >::exec( skipBegin2, skipBegin1, skipEnd0, skipEnd2,   skipEnd1,   end0,       kernel, f );
    }
 
    template< typename __Device, typename = void >
@@ -291,10 +291,10 @@ struct ParallelBoundaryExecutor< Permutation, Device, IndexTag< 2 > >
       const auto end0 = ends.template getSize< get< 0 >( Permutation{} ) >();
       const auto end1 = ends.template getSize< get< 1 >( Permutation{} ) >();
 
-      ParallelFor2D< Device >::exec( begin1,     begin0,   skipBegin1, end0,       kernel, f );
-      ParallelFor2D< Device >::exec( skipEnd1,   begin0,   end1,       end0,       kernel, f );
-      ParallelFor2D< Device >::exec( skipBegin1, begin0,   skipEnd1,   skipBegin0, kernel, f );
-      ParallelFor2D< Device >::exec( skipBegin1, skipEnd0, skipEnd1,   end0,       kernel, f );
+      Algorithms::ParallelFor2D< Device >::exec( begin1,     begin0,   skipBegin1, end0,       kernel, f );
+      Algorithms::ParallelFor2D< Device >::exec( skipEnd1,   begin0,   end1,       end0,       kernel, f );
+      Algorithms::ParallelFor2D< Device >::exec( skipBegin1, begin0,   skipEnd1,   skipBegin0, kernel, f );
+      Algorithms::ParallelFor2D< Device >::exec( skipBegin1, skipEnd0, skipEnd1,   end0,       kernel, f );
    }
 
    template< typename __Device, typename = void >
@@ -343,8 +343,8 @@ struct ParallelBoundaryExecutor< Permutation, Device, IndexTag< 1 > >
       const auto skipEnd = skipEnds.template getSize< get< 0 >( Permutation{} ) >();
       const auto end = ends.template getSize< get< 0 >( Permutation{} ) >();
 
-      ParallelFor< Device >::exec( begin, skipBegin, f );
-      ParallelFor< Device >::exec( skipEnd, end, f );
+      Algorithms::ParallelFor< Device >::exec( begin, skipBegin, f );
+      Algorithms::ParallelFor< Device >::exec( skipEnd, end, f );
    }
 };
 
diff --git a/src/TNL/Containers/ndarray/Executors.h b/src/TNL/Containers/ndarray/Executors.h
index eff2adff3fa171460f09b9bf29ee13f90b7fcdb1..2d3db794d1a1ffb204723766d7224fcb0ac2884a 100644
--- a/src/TNL/Containers/ndarray/Executors.h
+++ b/src/TNL/Containers/ndarray/Executors.h
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 #include <TNL/Containers/ndarray/Meta.h>
 #include <TNL/Containers/ndarray/SizesHolder.h>
@@ -139,7 +139,7 @@ struct ParallelExecutorDeviceDispatch
       const Index end0 = ends.template getSize< get< 0 >( Permutation{} ) >();
       const Index end1 = ends.template getSize< get< 1 >( Permutation{} ) >();
       const Index end2 = ends.template getSize< get< 2 >( Permutation{} ) >();
-      ParallelFor3D< Device >::exec( begin2, begin1, begin0, end2, end1, end0, kernel );
+      Algorithms::ParallelFor3D< Device >::exec( begin2, begin1, begin0, end2, end1, end0, kernel );
    }
 };
 
@@ -168,7 +168,7 @@ struct ParallelExecutorDeviceDispatch< Permutation, Devices::Cuda >
       const Index end0 = ends.template getSize< get< Ends::getDimension() - 3 >( Permutation{} ) >();
       const Index end1 = ends.template getSize< get< Ends::getDimension() - 2 >( Permutation{} ) >();
       const Index end2 = ends.template getSize< get< Ends::getDimension() - 1 >( Permutation{} ) >();
-      ParallelFor3D< Devices::Cuda >::exec( begin2, begin1, begin0, end2, end1, end0, kernel );
+      Algorithms::ParallelFor3D< Devices::Cuda >::exec( begin2, begin1, begin0, end2, end1, end0, kernel );
    }
 };
 
@@ -214,7 +214,7 @@ struct ParallelExecutor< Permutation, Device, IndexTag< 3 > >
       const Index end0 = ends.template getSize< get< 0 >( Permutation{} ) >();
       const Index end1 = ends.template getSize< get< 1 >( Permutation{} ) >();
       const Index end2 = ends.template getSize< get< 2 >( Permutation{} ) >();
-      ParallelFor3D< Device >::exec( begin2, begin1, begin0, end2, end1, end0, kernel, f );
+      Algorithms::ParallelFor3D< Device >::exec( begin2, begin1, begin0, end2, end1, end0, kernel, f );
    }
 
    template< typename __Device, typename = void >
@@ -265,7 +265,7 @@ struct ParallelExecutor< Permutation, Device, IndexTag< 2 > >
       const Index begin1 = begins.template getSize< get< 1 >( Permutation{} ) >();
       const Index end0 = ends.template getSize< get< 0 >( Permutation{} ) >();
       const Index end1 = ends.template getSize< get< 1 >( Permutation{} ) >();
-      ParallelFor2D< Device >::exec( begin1, begin0, end1, end0, kernel, f );
+      Algorithms::ParallelFor2D< Device >::exec( begin1, begin0, end1, end0, kernel, f );
    }
 
    template< typename __Device, typename = void >
@@ -312,8 +312,8 @@ struct ParallelExecutor< Permutation, Device, IndexTag< 1 > >
 
       const Index begin = begins.template getSize< get< 0 >( Permutation{} ) >();
       const Index end = ends.template getSize< get< 0 >( Permutation{} ) >();
-//      ParallelFor< Device >::exec( begin, end, kernel );
-      ParallelFor< Device >::exec( begin, end, f );
+//      Algorithms::ParallelFor< Device >::exec( begin, end, kernel );
+      Algorithms::ParallelFor< Device >::exec( begin, end, f );
    }
 };
 
diff --git a/src/TNL/Containers/ndarray/SizesHolder.h b/src/TNL/Containers/ndarray/SizesHolder.h
index c3334e19b2c4e1ff5db6706fc25e264106fea691..1375683b28d9a04ab8d0888f6e79274d79f62fe0 100644
--- a/src/TNL/Containers/ndarray/SizesHolder.h
+++ b/src/TNL/Containers/ndarray/SizesHolder.h
@@ -13,8 +13,8 @@
 #pragma once
 
 #include <TNL/Assert.h>
-#include <TNL/Devices/CudaCallable.h>
-#include <TNL/TemplateStaticFor.h>
+#include <TNL/Cuda/CudaCallable.h>
+#include <TNL/Algorithms/TemplateStaticFor.h>
 
 #include <TNL/Containers/ndarray/Meta.h>
 
@@ -231,7 +231,7 @@ SizesHolder< Index, sizes... >
 operator+( const SizesHolder< Index, sizes... >& lhs, const OtherHolder& rhs )
 {
    SizesHolder< Index, sizes... > result;
-   TemplateStaticFor< std::size_t, 0, sizeof...(sizes), __ndarray_impl::SizesHolerOperatorPlusHelper >::execHost( result, lhs, rhs );
+   Algorithms::TemplateStaticFor< std::size_t, 0, sizeof...(sizes), __ndarray_impl::SizesHolerOperatorPlusHelper >::execHost( result, lhs, rhs );
    return result;
 }
 
@@ -242,7 +242,7 @@ SizesHolder< Index, sizes... >
 operator-( const SizesHolder< Index, sizes... >& lhs, const OtherHolder& rhs )
 {
    SizesHolder< Index, sizes... > result;
-   TemplateStaticFor< std::size_t, 0, sizeof...(sizes), __ndarray_impl::SizesHolerOperatorMinusHelper >::execHost( result, lhs, rhs );
+   Algorithms::TemplateStaticFor< std::size_t, 0, sizeof...(sizes), __ndarray_impl::SizesHolerOperatorMinusHelper >::execHost( result, lhs, rhs );
    return result;
 }
 
@@ -295,9 +295,9 @@ template< typename Index,
 std::ostream& operator<<( std::ostream& str, const SizesHolder< Index, sizes... >& holder )
 {
    str << "SizesHolder< ";
-   TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderStaticSizePrinter >::execHost( str, holder );
+   Algorithms::TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderStaticSizePrinter >::execHost( str, holder );
    str << holder.template getStaticSize< sizeof...(sizes) - 1 >() << " >( ";
-   TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderSizePrinter >::execHost( str, holder );
+   Algorithms::TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderSizePrinter >::execHost( str, holder );
    str << holder.template getSize< sizeof...(sizes) - 1 >() << " )";
    return str;
 }
@@ -360,10 +360,10 @@ template< typename Index,
 std::ostream& operator<<( std::ostream& str, const __ndarray_impl::LocalBeginsHolder< SizesHolder< Index, sizes... >, ConstValue >& holder )
 {
    str << "LocalBeginsHolder< SizesHolder< ";
-   TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderStaticSizePrinter >::execHost( str, (SizesHolder< Index, sizes... >) holder );
+   Algorithms::TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderStaticSizePrinter >::execHost( str, (SizesHolder< Index, sizes... >) holder );
    str << holder.template getStaticSize< sizeof...(sizes) - 1 >() << " >, ";
    str << ConstValue << " >( ";
-   TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderSizePrinter >::execHost( str, holder );
+   Algorithms::TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderSizePrinter >::execHost( str, holder );
    str << holder.template getSize< sizeof...(sizes) - 1 >() << " )";
    return str;
 }
diff --git a/src/TNL/Containers/ndarray/SizesHolderHelpers.h b/src/TNL/Containers/ndarray/SizesHolderHelpers.h
index 9d1c0d439fe69129058d998679492a4338fc7ba3..d06c9a7a0980780c0a875bb5ef8e59a641c3810e 100644
--- a/src/TNL/Containers/ndarray/SizesHolderHelpers.h
+++ b/src/TNL/Containers/ndarray/SizesHolderHelpers.h
@@ -15,7 +15,7 @@
 #include <algorithm>
 
 #include <TNL/Assert.h>
-#include <TNL/TemplateStaticFor.h>
+#include <TNL/Algorithms/TemplateStaticFor.h>
 #include <TNL/Containers/ndarray/Meta.h>
 
 namespace TNL {
@@ -227,7 +227,7 @@ bool sizesWeakCompare( const SizesHolder1& sizes1, const SizesHolder2& sizes2 )
    static_assert( SizesHolder1::getDimension() == SizesHolder2::getDimension(),
                   "Cannot compare sizes of different dimensions." );
    bool result = true;
-   TemplateStaticFor< std::size_t, 0, SizesHolder1::getDimension(), WeakCompareHelper >::exec( sizes1, sizes2, result );
+   Algorithms::TemplateStaticFor< std::size_t, 0, SizesHolder1::getDimension(), WeakCompareHelper >::exec( sizes1, sizes2, result );
    return result;
 }
 
diff --git a/src/TNL/Cuda/CheckDevice.h b/src/TNL/Cuda/CheckDevice.h
new file mode 100644
index 0000000000000000000000000000000000000000..c857d8dd6ab8129fd2b1cac4e967831207296153
--- /dev/null
+++ b/src/TNL/Cuda/CheckDevice.h
@@ -0,0 +1,40 @@
+/***************************************************************************
+                          CheckDevice.h  -  description
+                             -------------------
+    begin                : Aug 18, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Exceptions/CudaRuntimeError.h>
+
+namespace TNL {
+namespace Cuda {
+
+#ifdef HAVE_CUDA
+   /****
+    * I do not know why, but it is more reliable to pass the error code instead
+    * of calling cudaGetLastError() inside the function.
+    * We recommend to use macro 'TNL_CHECK_CUDA_DEVICE' defined bellow.
+    */
+   inline void checkDevice( const char* file_name, int line, cudaError error )
+   {
+      if( error != cudaSuccess )
+         throw Exceptions::CudaRuntimeError( error, file_name, line );
+   }
+#else
+   inline void checkDevice() {}
+#endif
+
+} // namespace Cuda
+} // namespace TNL
+
+#ifdef HAVE_CUDA
+#define TNL_CHECK_CUDA_DEVICE ::TNL::Cuda::checkDevice( __FILE__, __LINE__, cudaGetLastError() )
+#else
+#define TNL_CHECK_CUDA_DEVICE ::TNL::Cuda::checkDevice()
+#endif
diff --git a/src/TNL/Devices/CudaCallable.h b/src/TNL/Cuda/CudaCallable.h
similarity index 76%
rename from src/TNL/Devices/CudaCallable.h
rename to src/TNL/Cuda/CudaCallable.h
index f9311443f12a0c85fb6fba9ebaf07ca47736b030..5cd3e8fbbe51abe0bd7dc525c165990b734ef388 100644
--- a/src/TNL/Devices/CudaCallable.h
+++ b/src/TNL/Cuda/CudaCallable.h
@@ -12,19 +12,14 @@
 
 // The __cuda_callable__ macro has to be in a separate header file to avoid
 // infinite loops by the #include directives.
-//
-// For example, the implementation of Devices::Cuda needs TNL_ASSERT_*
-// macros, which need __cuda_callable__ functions.
 
 /***
  * This macro serves for definition of function which are supposed to be called
  * even from device. If HAVE_CUDA is defined, the __cuda_callable__ function
  * is compiled for both CPU and GPU. If HAVE_CUDA is not defined, this macro has
- * no effect. Support for Intel Xeon Phi is now in "hibernated" state.
+ * no effect.
  */
-#ifdef HAVE_MIC 
-   #define __cuda_callable__ __attribute__((target(mic)))
-#elif HAVE_CUDA
+#ifdef HAVE_CUDA
    #define __cuda_callable__ __device__ __host__
 #else
    #define __cuda_callable__
diff --git a/src/TNL/Cuda/DeviceInfo.h b/src/TNL/Cuda/DeviceInfo.h
new file mode 100644
index 0000000000000000000000000000000000000000..d53b46fecbf45c49d7c9d6723423c6951a456fef
--- /dev/null
+++ b/src/TNL/Cuda/DeviceInfo.h
@@ -0,0 +1,52 @@
+/***************************************************************************
+                          CudaDeviceInfo.h  -  description
+                             -------------------
+    begin                : Jun 21, 2015
+    copyright            : (C) 2007 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/String.h>
+
+namespace TNL {
+namespace Cuda {
+
+struct DeviceInfo
+{
+   static int getNumberOfDevices();
+
+   static int getActiveDevice();
+
+   static String getDeviceName( int deviceNum );
+
+   static int getArchitectureMajor( int deviceNum );
+
+   static int getArchitectureMinor( int deviceNum );
+
+   static int getClockRate( int deviceNum );
+
+   static std::size_t getGlobalMemory( int deviceNum );
+
+   static std::size_t getFreeGlobalMemory();
+
+   static int getMemoryClockRate( int deviceNum );
+
+   static bool getECCEnabled( int deviceNum );
+
+   static int getCudaMultiprocessors( int deviceNum );
+
+   static int getCudaCoresPerMultiprocessors( int deviceNum );
+
+   static int getCudaCores( int deviceNum );
+
+   static int getRegistersPerMultiprocessor( int deviceNum );
+};
+
+} // namespace Cuda
+} // namespace TNL
+
+#include <TNL/Cuda/DeviceInfo.hpp>
diff --git a/src/TNL/Devices/CudaDeviceInfo_impl.h b/src/TNL/Cuda/DeviceInfo.hpp
similarity index 86%
rename from src/TNL/Devices/CudaDeviceInfo_impl.h
rename to src/TNL/Cuda/DeviceInfo.hpp
index f29ecd8c91493edb538f11a8ced6c9ee6503983a..d10e6f05cbbb391d06e1287ad6f19db30c967ac0 100644
--- a/src/TNL/Devices/CudaDeviceInfo_impl.h
+++ b/src/TNL/Cuda/DeviceInfo.hpp
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          CudaDeviceInfo_impl.h  -  description
+                          DeviceInfo.hpp  -  description
                              -------------------
     begin                : Jun 21, 2015
     copyright            : (C) 2007 by Tomas Oberhuber
@@ -12,14 +12,14 @@
 
 #include <unordered_map>
 
-#include <TNL/Devices/CudaDeviceInfo.h>
+#include <TNL/Cuda/DeviceInfo.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 
 namespace TNL {
-namespace Devices {
+namespace Cuda {
 
 inline int
-CudaDeviceInfo::
+DeviceInfo::
 getNumberOfDevices()
 {
 #ifdef HAVE_CUDA
@@ -32,7 +32,7 @@ getNumberOfDevices()
 }
 
 inline int
-CudaDeviceInfo::
+DeviceInfo::
 getActiveDevice()
 {
 #ifdef HAVE_CUDA
@@ -45,7 +45,7 @@ getActiveDevice()
 }
 
 inline String
-CudaDeviceInfo::
+DeviceInfo::
 getDeviceName( int deviceNum )
 {
 #ifdef HAVE_CUDA
@@ -58,7 +58,7 @@ getDeviceName( int deviceNum )
 }
 
 inline int
-CudaDeviceInfo::
+DeviceInfo::
 getArchitectureMajor( int deviceNum )
 {
 #ifdef HAVE_CUDA
@@ -71,7 +71,7 @@ getArchitectureMajor( int deviceNum )
 }
 
 inline int
-CudaDeviceInfo::
+DeviceInfo::
 getArchitectureMinor( int deviceNum )
 {
 #ifdef HAVE_CUDA
@@ -84,7 +84,7 @@ getArchitectureMinor( int deviceNum )
 }
 
 inline int
-CudaDeviceInfo::
+DeviceInfo::
 getClockRate( int deviceNum )
 {
 #ifdef HAVE_CUDA
@@ -96,8 +96,8 @@ getClockRate( int deviceNum )
 #endif
 }
 
-inline size_t
-CudaDeviceInfo::
+inline std::size_t
+DeviceInfo::
 getGlobalMemory( int deviceNum )
 {
 #ifdef HAVE_CUDA
@@ -109,13 +109,13 @@ getGlobalMemory( int deviceNum )
 #endif
 }
 
-inline size_t
-CudaDeviceInfo::
+inline std::size_t
+DeviceInfo::
 getFreeGlobalMemory()
 {
 #ifdef HAVE_CUDA
-   size_t free = 0;
-   size_t total = 0;
+   std::size_t free = 0;
+   std::size_t total = 0;
    cudaMemGetInfo( &free, &total );
    return free;
 #else
@@ -124,7 +124,7 @@ getFreeGlobalMemory()
 }
 
 inline int
-CudaDeviceInfo::
+DeviceInfo::
 getMemoryClockRate( int deviceNum )
 {
 #ifdef HAVE_CUDA
@@ -137,7 +137,7 @@ getMemoryClockRate( int deviceNum )
 }
 
 inline bool
-CudaDeviceInfo::
+DeviceInfo::
 getECCEnabled( int deviceNum )
 {
 #ifdef HAVE_CUDA
@@ -150,7 +150,7 @@ getECCEnabled( int deviceNum )
 }
 
 inline int
-CudaDeviceInfo::
+DeviceInfo::
 getCudaMultiprocessors( int deviceNum )
 {
 #ifdef HAVE_CUDA
@@ -169,12 +169,12 @@ getCudaMultiprocessors( int deviceNum )
 }
 
 inline int
-CudaDeviceInfo::
+DeviceInfo::
 getCudaCoresPerMultiprocessors( int deviceNum )
 {
 #ifdef HAVE_CUDA
-   int major = CudaDeviceInfo::getArchitectureMajor( deviceNum );
-   int minor = CudaDeviceInfo::getArchitectureMinor( deviceNum );
+   int major = DeviceInfo::getArchitectureMajor( deviceNum );
+   int minor = DeviceInfo::getArchitectureMinor( deviceNum );
    switch( major )
    {
       case 1:   // Tesla generation, G80, G8x, G9x classes
@@ -209,19 +209,19 @@ getCudaCoresPerMultiprocessors( int deviceNum )
 }
 
 inline int
-CudaDeviceInfo::
+DeviceInfo::
 getCudaCores( int deviceNum )
 {
 #ifdef HAVE_CUDA
-   return CudaDeviceInfo::getCudaMultiprocessors( deviceNum ) *
-          CudaDeviceInfo::getCudaCoresPerMultiprocessors( deviceNum );
+   return DeviceInfo::getCudaMultiprocessors( deviceNum ) *
+          DeviceInfo::getCudaCoresPerMultiprocessors( deviceNum );
 #else
    throw Exceptions::CudaSupportMissing();
 #endif
 }
 
 inline int
-CudaDeviceInfo::
+DeviceInfo::
 getRegistersPerMultiprocessor( int deviceNum )
 {
 #ifdef HAVE_CUDA
@@ -239,5 +239,5 @@ getRegistersPerMultiprocessor( int deviceNum )
 #endif
 }
 
-} // namespace Devices
+} // namespace Cuda
 } // namespace TNL
diff --git a/src/TNL/Cuda/LaunchHelpers.h b/src/TNL/Cuda/LaunchHelpers.h
new file mode 100644
index 0000000000000000000000000000000000000000..6e5d3c9757601afaa5f9d9c2be45593298f7ab12
--- /dev/null
+++ b/src/TNL/Cuda/LaunchHelpers.h
@@ -0,0 +1,170 @@
+/***************************************************************************
+                          LaunchHelpers.h  -  description
+                             -------------------
+    begin                : Aug 19, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Math.h>
+
+namespace TNL {
+namespace Cuda {
+
+inline constexpr int getMaxGridSize()
+{
+   return 65535;
+}
+
+inline constexpr int getMaxBlockSize()
+{
+   return 1024;
+}
+
+inline constexpr int getWarpSize()
+{
+   return 32;
+}
+
+// When we transfer data between the GPU and the CPU we use 1 MiB buffer. This
+// size should ensure good performance.
+// We use the same buffer size even for retyping data during IO operations.
+inline constexpr int getTransferBufferSize()
+{
+   return 1 << 20;
+}
+
+#ifdef HAVE_CUDA
+__device__ inline int getGlobalThreadIdx( const int gridIdx = 0,
+                                          const int gridSize = getMaxGridSize() )
+{
+   return ( gridIdx * gridSize + blockIdx.x ) * blockDim.x + threadIdx.x;
+}
+
+__device__ inline int getGlobalThreadIdx_x( const dim3& gridIdx )
+{
+   return ( gridIdx.x * getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+}
+
+__device__ inline int getGlobalThreadIdx_y( const dim3& gridIdx )
+{
+   return ( gridIdx.y * getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
+}
+
+__device__ inline int getGlobalThreadIdx_z( const dim3& gridIdx )
+{
+   return ( gridIdx.z * getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z;
+}
+#endif
+
+inline int getNumberOfBlocks( const int threads,
+                              const int blockSize )
+{
+   return roundUpDivision( threads, blockSize );
+}
+
+inline int getNumberOfGrids( const int blocks,
+                             const int gridSize = getMaxGridSize() )
+{
+   return roundUpDivision( blocks, gridSize );
+}
+
+#ifdef HAVE_CUDA
+inline void setupThreads( const dim3& blockSize,
+                          dim3& blocksCount,
+                          dim3& gridsCount,
+                          long long int xThreads,
+                          long long int yThreads = 0,
+                          long long int zThreads = 0 )
+{
+   blocksCount.x = max( 1, xThreads / blockSize.x + ( xThreads % blockSize.x != 0 ) );
+   blocksCount.y = max( 1, yThreads / blockSize.y + ( yThreads % blockSize.y != 0 ) );
+   blocksCount.z = max( 1, zThreads / blockSize.z + ( zThreads % blockSize.z != 0 ) );
+
+   /****
+    * TODO: Fix the following:
+    * I do not known how to get max grid size in kernels :(
+    *
+    * Also, this is very slow. */
+   /*int currentDevice( 0 );
+   cudaGetDevice( currentDevice );
+   cudaDeviceProp properties;
+   cudaGetDeviceProperties( &properties, currentDevice );
+   gridsCount.x = blocksCount.x / properties.maxGridSize[ 0 ] + ( blocksCount.x % properties.maxGridSize[ 0 ] != 0 );
+   gridsCount.y = blocksCount.y / properties.maxGridSize[ 1 ] + ( blocksCount.y % properties.maxGridSize[ 1 ] != 0 );
+   gridsCount.z = blocksCount.z / properties.maxGridSize[ 2 ] + ( blocksCount.z % properties.maxGridSize[ 2 ] != 0 );
+   */
+   gridsCount.x = blocksCount.x / getMaxGridSize() + ( blocksCount.x % getMaxGridSize() != 0 );
+   gridsCount.y = blocksCount.y / getMaxGridSize() + ( blocksCount.y % getMaxGridSize() != 0 );
+   gridsCount.z = blocksCount.z / getMaxGridSize() + ( blocksCount.z % getMaxGridSize() != 0 );
+}
+
+inline void setupGrid( const dim3& blocksCount,
+                       const dim3& gridsCount,
+                       const dim3& gridIdx,
+                       dim3& gridSize )
+{
+   /* TODO: this is ext slow!!!!
+   int currentDevice( 0 );
+   cudaGetDevice( &currentDevice );
+   cudaDeviceProp properties;
+   cudaGetDeviceProperties( &properties, currentDevice );*/
+
+   /****
+    * TODO: fix the following
+   if( gridIdx.x < gridsCount.x )
+      gridSize.x = properties.maxGridSize[ 0 ];
+   else
+      gridSize.x = blocksCount.x % properties.maxGridSize[ 0 ];
+
+   if( gridIdx.y < gridsCount.y )
+      gridSize.y = properties.maxGridSize[ 1 ];
+   else
+      gridSize.y = blocksCount.y % properties.maxGridSize[ 1 ];
+
+   if( gridIdx.z < gridsCount.z )
+      gridSize.z = properties.maxGridSize[ 2 ];
+   else
+      gridSize.z = blocksCount.z % properties.maxGridSize[ 2 ];*/
+
+   if( gridIdx.x < gridsCount.x - 1 )
+      gridSize.x = getMaxGridSize();
+   else
+      gridSize.x = blocksCount.x % getMaxGridSize();
+
+   if( gridIdx.y < gridsCount.y - 1 )
+      gridSize.y = getMaxGridSize();
+   else
+      gridSize.y = blocksCount.y % getMaxGridSize();
+
+   if( gridIdx.z < gridsCount.z - 1 )
+      gridSize.z = getMaxGridSize();
+   else
+      gridSize.z = blocksCount.z % getMaxGridSize();
+}
+
+inline std::ostream& operator<<( std::ostream& str, const dim3& d )
+{
+   str << "( " << d.x << ", " << d.y << ", " << d.z << " )";
+   return str;
+}
+
+inline void printThreadsSetup( const dim3& blockSize,
+                               const dim3& blocksCount,
+                               const dim3& gridSize,
+                               const dim3& gridsCount,
+                               std::ostream& str = std::cout )
+{
+   str << "Block size: " << blockSize << std::endl
+       << " Blocks count: " << blocksCount << std::endl
+       << " Grid size: " << gridSize << std::endl
+       << " Grids count: " << gridsCount << std::endl;
+}
+#endif
+
+} // namespace Cuda
+} // namespace TNL
diff --git a/src/TNL/Cuda/MemoryHelpers.h b/src/TNL/Cuda/MemoryHelpers.h
new file mode 100644
index 0000000000000000000000000000000000000000..cb214f5d02ebaf6784d08e4c288de6ddd8638de7
--- /dev/null
+++ b/src/TNL/Cuda/MemoryHelpers.h
@@ -0,0 +1,103 @@
+/***************************************************************************
+                          MemoryHelpers.h  -  description
+                             -------------------
+    begin                : Aug 19, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <iostream>
+
+#include <TNL/Cuda/CheckDevice.h>
+#include <TNL/Exceptions/CudaSupportMissing.h>
+#include <TNL/Exceptions/CudaBadAlloc.h>
+
+namespace TNL {
+namespace Cuda {
+
+template< typename ObjectType >
+[[deprecated("Allocators and MemoryOperations hould be used instead.")]]
+ObjectType* passToDevice( const ObjectType& object )
+{
+#ifdef HAVE_CUDA
+   ObjectType* deviceObject;
+   if( cudaMalloc( ( void** ) &deviceObject,
+                   ( size_t ) sizeof( ObjectType ) ) != cudaSuccess )
+      throw Exceptions::CudaBadAlloc();
+   if( cudaMemcpy( ( void* ) deviceObject,
+                   ( void* ) &object,
+                   sizeof( ObjectType ),
+                   cudaMemcpyHostToDevice ) != cudaSuccess )
+   {
+      TNL_CHECK_CUDA_DEVICE;
+      cudaFree( ( void* ) deviceObject );
+      TNL_CHECK_CUDA_DEVICE;
+      return 0;
+   }
+   return deviceObject;
+#else
+   throw Exceptions::CudaSupportMissing();
+#endif
+}
+
+template< typename ObjectType >
+[[deprecated("Allocators and MemoryOperations hould be used instead.")]]
+ObjectType passFromDevice( const ObjectType* object )
+{
+#ifdef HAVE_CUDA
+   ObjectType aux;
+   cudaMemcpy( ( void* ) aux,
+               ( void* ) &object,
+               sizeof( ObjectType ),
+               cudaMemcpyDeviceToHost );
+   TNL_CHECK_CUDA_DEVICE;
+   return aux;
+#else
+   throw Exceptions::CudaSupportMissing();
+#endif
+}
+
+template< typename ObjectType >
+[[deprecated("Allocators and MemoryOperations hould be used instead.")]]
+void passFromDevice( const ObjectType* deviceObject,
+                     ObjectType& hostObject )
+{
+#ifdef HAVE_CUDA
+   cudaMemcpy( ( void* ) &hostObject,
+               ( void* ) deviceObject,
+               sizeof( ObjectType ),
+               cudaMemcpyDeviceToHost );
+   TNL_CHECK_CUDA_DEVICE;
+#else
+   throw Exceptions::CudaSupportMissing();
+#endif
+}
+
+template< typename ObjectType >
+[[deprecated("Allocators and MemoryOperations hould be used instead.")]]
+void freeFromDevice( ObjectType* deviceObject )
+{
+#ifdef HAVE_CUDA
+   cudaFree( ( void* ) deviceObject );
+   TNL_CHECK_CUDA_DEVICE;
+#else
+   throw Exceptions::CudaSupportMissing();
+#endif
+}
+
+template< typename ObjectType >
+void print( const ObjectType* deviceObject, std::ostream& str = std::cout )
+{
+#ifdef HAVE_CUDA
+   ObjectType hostObject;
+   passFromDevice( deviceObject, hostObject );
+   str << hostObject;
+#endif
+}
+
+} // namespace Cuda
+} // namespace TNL
diff --git a/src/TNL/CudaSharedMemory.h b/src/TNL/Cuda/SharedMemory.h
similarity index 78%
rename from src/TNL/CudaSharedMemory.h
rename to src/TNL/Cuda/SharedMemory.h
index ec9a43c207fc7962f36ffb40ad0af71973b76868..29851952c01c86356e6872511f90496358b55152 100644
--- a/src/TNL/CudaSharedMemory.h
+++ b/src/TNL/Cuda/SharedMemory.h
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          CudaSharedMemory.h  -  description
+                          SharedMemory.h  -  description
                              -------------------
     begin                : Oct 18, 2017
     copyright            : (C) 2017 by Tomas Oberhuber et al.
@@ -26,11 +26,11 @@
  *
  * Until CUDA 8.0, it was possible to use reinterpret_cast this way:
  *
- *    template< typename Element, size_t Alignment >
- *    __device__ Element* Cuda::getSharedMemory()
+ *    template< typename T, size_t Alignment >
+ *    __device__ T* getSharedMemory()
  *    {
  *       extern __shared__ __align__ ( Alignment ) unsigned char __sdata[];
- *       return reinterpret_cast< Element* >( __sdata );
+ *       return reinterpret_cast< T* >( __sdata );
  *    }
  *
  * But since CUDA 9.0 there is a new restriction that the alignment of the
@@ -44,12 +44,13 @@
 #include <stdint.h>
 
 namespace TNL {
+namespace Cuda {
 
 template< typename T, std::size_t _alignment = CHAR_BIT * sizeof(T) >
-struct CudaSharedMemory {};
+struct SharedMemory;
 
 template< typename T >
-struct CudaSharedMemory< T, 8 >
+struct SharedMemory< T, 8 >
 {
    __device__ inline operator T* ()
    {
@@ -65,7 +66,7 @@ struct CudaSharedMemory< T, 8 >
 };
 
 template< typename T >
-struct CudaSharedMemory< T, 16 >
+struct SharedMemory< T, 16 >
 {
    __device__ inline operator T* ()
    {
@@ -81,7 +82,7 @@ struct CudaSharedMemory< T, 16 >
 };
 
 template< typename T >
-struct CudaSharedMemory< T, 32 >
+struct SharedMemory< T, 32 >
 {
    __device__ inline operator T* ()
    {
@@ -97,7 +98,7 @@ struct CudaSharedMemory< T, 32 >
 };
 
 template< typename T >
-struct CudaSharedMemory< T, 64 >
+struct SharedMemory< T, 64 >
 {
    __device__ inline operator T* ()
    {
@@ -112,6 +113,25 @@ struct CudaSharedMemory< T, 64 >
    }
 };
 
+template< typename T >
+__device__ inline T* getSharedMemory()
+{
+   return SharedMemory< T >{};
+}
+
+// helper functions for indexing shared memory
+inline constexpr int getNumberOfSharedMemoryBanks()
+{
+   return 32;
+}
+
+template< typename Index >
+__device__ Index getInterleaving( const Index index )
+{
+   return index + index / Cuda::getNumberOfSharedMemoryBanks();
+}
+
+} // namespace Cuda
 } // namespace TNL
 
 #endif
diff --git a/src/TNL/CudaStreamPool.h b/src/TNL/Cuda/StreamPool.h
similarity index 73%
rename from src/TNL/CudaStreamPool.h
rename to src/TNL/Cuda/StreamPool.h
index 1dd2b7907fe39b53e331b0147fff1cabe16424ef..59bf38a5791d2ae9f381e831559c5caa7788567a 100644
--- a/src/TNL/CudaStreamPool.h
+++ b/src/TNL/Cuda/StreamPool.h
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          CudaStreamPool.h  -  description
+                          StreamPool.h  -  description
                              -------------------
     begin                : Oct 14, 2016
     copyright            : (C) 2016 by Tomas Oberhuber et al.
@@ -15,22 +15,20 @@
 #include <stdlib.h>
 #include <unordered_map>
 
-#include <TNL/Devices/Host.h>
-#include <TNL/Devices/Cuda.h>
-
 namespace TNL {
+namespace Cuda {
 
 #ifdef HAVE_CUDA
-class CudaStreamPool
+class StreamPool
 {
    public:
       // stop the compiler generating methods of copy the object
-      CudaStreamPool( CudaStreamPool const& copy ) = delete;
-      CudaStreamPool& operator=( CudaStreamPool const& copy ) = delete;
+      StreamPool( StreamPool const& copy ) = delete;
+      StreamPool& operator=( StreamPool const& copy ) = delete;
 
-      inline static CudaStreamPool& getInstance()
+      inline static StreamPool& getInstance()
       {
-         static CudaStreamPool instance;
+         static StreamPool instance;
          return instance;
       }
 
@@ -47,14 +45,14 @@ class CudaStreamPool
 
    private:
       // private constructor of the singleton
-      inline CudaStreamPool()
+      inline StreamPool()
       {
-         atexit( CudaStreamPool::free_atexit );
+         atexit( StreamPool::free_atexit );
       }
 
       inline static void free_atexit( void )
       {
-         CudaStreamPool::getInstance().free();
+         StreamPool::getInstance().free();
       }
 
    protected:
@@ -70,5 +68,6 @@ class CudaStreamPool
 };
 #endif
 
+} // namespace Cuda
 } // namespace TNL
 
diff --git a/src/TNL/Devices/Cuda.h b/src/TNL/Devices/Cuda.h
index 7831014155e9a730c1be101c47cb2602cd8d3179..2b3bf8c6660b1f4e4c2d7310c24f25ea047beea6 100644
--- a/src/TNL/Devices/Cuda.h
+++ b/src/TNL/Devices/Cuda.h
@@ -10,13 +10,7 @@
 
 #pragma once
 
-#include <iostream>
-
 #include <TNL/String.h>
-#include <TNL/Assert.h>
-#include <TNL/Pointers/SmartPointersRegister.h>
-#include <TNL/Timer.h>
-#include <TNL/Devices/CudaCallable.h>
 #include <TNL/Config/ConfigDescription.h>
 #include <TNL/Config/ParameterContainer.h>
 
@@ -25,188 +19,30 @@ namespace Devices {
 
 class Cuda
 {
-   public:
-
-   static inline String getDeviceType();
-
-   // TODO: Remove getDeviceType();
-   static inline String getType() { return getDeviceType();};
-
-   static inline void configSetup( Config::ConfigDescription& config, const String& prefix = "" );
-
-   static inline bool setup( const Config::ParameterContainer& parameters,
-                             const String& prefix = "" );
-
-   __cuda_callable__ static inline constexpr int getMaxGridSize();
-
-   __cuda_callable__ static inline constexpr int getMaxBlockSize();
-
-   __cuda_callable__ static inline constexpr int getWarpSize();
-
-   __cuda_callable__ static inline constexpr int getNumberOfSharedMemoryBanks();
-
-   static inline constexpr int getGPUTransferBufferSize();
-
-#ifdef HAVE_CUDA
-   /***
-    * This function is obsolete and should be replaced by the following functions.
-    */
-   __device__ static inline int
-   getGlobalThreadIdx( const int gridIdx = 0,
-                       const int gridSize = getMaxGridSize() );   
-
-   __device__ static inline int
-   getGlobalThreadIdx_x( const dim3& gridIdx );
-
-   __device__ static inline int
-   getGlobalThreadIdx_y( const dim3& gridIdx );
-
-   __device__ static inline int
-   getGlobalThreadIdx_z( const dim3& gridIdx );   
-#endif
-
-   /****
-    * This functions helps to count number of CUDA blocks depending on the 
-    * number of the CUDA threads and the block size.
-    * It is obsolete and it will be replaced by setupThreads.
-    */
-   static inline int getNumberOfBlocks( const int threads,
-                                        const int blockSize );
-
-   /****
-    * This functions helps to count number of CUDA grids depending on the 
-    * number of the CUDA blocks and maximum grid size.
-    * It is obsolete and it will be replaced by setupThreads.
-    */
-   static inline int getNumberOfGrids( const int blocks,
-                                       const int gridSize = getMaxGridSize() );
-   
-#ifdef HAVE_CUDA   
-   /*! This method sets up gridSize and computes number of grids depending
-    *  on total number of CUDA threads.
-    */
-   static void setupThreads( const dim3& blockSize,
-                             dim3& blocksCount,
-                             dim3& gridsCount,
-                             long long int xThreads,
-                             long long int yThreads = 0,
-                             long long int zThreads = 0 );
-   
-   /*! This method sets up grid size when one iterates over more grids.
-    * If gridIdx.? < gridsCount.? then the gridSize.? is set to maximum
-    * allowed by CUDA. Otherwise gridSize.? is set to the size of the grid
-    * in the last loop i.e. blocksCount.? % maxGridSize.?.
-    */
-   static void setupGrid( const dim3& blocksCount,
-                          const dim3& gridsCount,
-                          const dim3& gridIdx,
-                          dim3& gridSize );
-   
-   static void printThreadsSetup( const dim3& blockSize,
-                                  const dim3& blocksCount,
-                                  const dim3& gridSize,
-                                  const dim3& gridsCount,
-                                  std::ostream& str = std::cout );
-#endif   
-
-   template< typename ObjectType >
-   static ObjectType* passToDevice( const ObjectType& object );
-
-   template< typename ObjectType >
-   static ObjectType passFromDevice( const ObjectType* object );
-
-   template< typename ObjectType >
-   static void passFromDevice( const ObjectType* deviceObject,
-                               ObjectType& hostObject );
-
-   template< typename ObjectType >
-   static void freeFromDevice( ObjectType* object );
-
-   template< typename ObjectType >
-   static void print( const ObjectType* object, std::ostream& str = std::cout );
-
+public:
+   static inline void configSetup( Config::ConfigDescription& config, const String& prefix = "" )
+   {
 #ifdef HAVE_CUDA
-   template< typename Index >
-   static __device__ Index getInterleaving( const Index index );
-
-   /****
-    * Declaration of variables for dynamic shared memory is difficult in
-    * templated functions. For example, the following does not work for
-    * different types T:
-    *
-    *    template< typename T >
-    *    void foo()
-    *    {
-    *        extern __shared__ T shx[];
-    *    }
-    *
-    * This is because extern variables must be declared exactly once. In
-    * templated functions we need to have same variable name with different
-    * type, which causes the conflict. In CUDA samples they solve the problem
-    * using template specialization via classes, but using one base type and
-    * reinterpret_cast works too.
-    * See http://stackoverflow.com/a/19339004/4180822 for reference.
-    */
-   template< typename Element >
-   static __device__ Element* getSharedMemory();
-#endif
-
-#ifdef HAVE_CUDA
-   /****
-    * I do not know why, but it is more reliable to pass the error code instead
-    * of calling cudaGetLastError() inside the method.
-    * We recommend to use macro 'TNL_CHECK_CUDA_DEVICE' defined bellow.
-    */
-   static inline void checkDevice( const char* file_name, int line, cudaError error );
-#else
-   static inline void checkDevice() {}
-#endif
-
-   static inline void insertSmartPointer( Pointers::SmartPointer* pointer );
-
-   static inline void removeSmartPointer( Pointers::SmartPointer* pointer );
-
-   // Negative deviceId means that CudaDeviceInfo::getActiveDevice will be
-   // called to get the device ID.
-   static inline bool synchronizeDevice( int deviceId = -1 );
-
-   static inline Timer& getSmartPointersSynchronizationTimer();
-
-   ////
-   // When we transfer data between the GPU and the CPU we use 5 MB buffer. This
-   // size should ensure good performance -- see.
-   // http://wiki.accelereyes.com/wiki/index.php/GPU_Memory_Transfer .
-   // We use the same buffer size even for retyping data during IO operations.
-   //
-   static constexpr std::size_t TransferBufferSize = 5 * 2<<20;
-
-
-   protected:
-
-   static inline Pointers::SmartPointersRegister& getSmartPointersRegister();
-};
-
-#ifdef HAVE_CUDA
-#define TNL_CHECK_CUDA_DEVICE ::TNL::Devices::Cuda::checkDevice( __FILE__, __LINE__, cudaGetLastError() )
+      config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device to run the computation.", 0 );
 #else
-#define TNL_CHECK_CUDA_DEVICE ::TNL::Devices::Cuda::checkDevice()
-#endif
-
-#ifdef HAVE_CUDA
-namespace {
-   std::ostream& operator << ( std::ostream& str, const dim3& d );
-}
+      config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device to run the computation (not supported on this system).", 0 );
 #endif
+   }
 
+   static inline bool setup( const Config::ParameterContainer& parameters,
+                             const String& prefix = "" )
+   {
 #ifdef HAVE_CUDA
-#if __CUDA_ARCH__ < 600
-namespace {
-   __device__ double atomicAdd(double* address, double val);
-}
-#endif
+      int cudaDevice = parameters.getParameter< int >( prefix + "cuda-device" );
+      if( cudaSetDevice( cudaDevice ) != cudaSuccess )
+      {
+         std::cerr << "I cannot activate CUDA device number " << cudaDevice << "." << std::endl;
+         return false;
+      }
 #endif
+      return true;
+   }
+};
 
 } // namespace Devices
 } // namespace TNL
-
-#include <TNL/Devices/Cuda_impl.h>
diff --git a/src/TNL/Devices/CudaDeviceInfo.h b/src/TNL/Devices/CudaDeviceInfo.h
deleted file mode 100644
index 9eefe3bad8932670af271204e03f72b5eb501a95..0000000000000000000000000000000000000000
--- a/src/TNL/Devices/CudaDeviceInfo.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/***************************************************************************
-                          CudaDeviceInfo.h  -  description
-                             -------------------
-    begin                : Jun 21, 2015
-    copyright            : (C) 2007 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <stdlib.h>
-
-#include <TNL/String.h>
-
-namespace TNL {
-namespace Devices {
-
-class CudaDeviceInfo
-{
-   public:
-
-      static int getNumberOfDevices();
-
-      static int getActiveDevice();
-
-      static String getDeviceName( int deviceNum );
-
-      static int getArchitectureMajor( int deviceNum );
-
-      static int getArchitectureMinor( int deviceNum );
-
-      static int getClockRate( int deviceNum );
-
-      static size_t getGlobalMemory( int deviceNum );
-
-      static size_t getFreeGlobalMemory();
-
-      static int getMemoryClockRate( int deviceNum );
-
-      static bool getECCEnabled( int deviceNum );
-
-      static int getCudaMultiprocessors( int deviceNum );
-
-      static int getCudaCoresPerMultiprocessors( int deviceNum );
-
-      static int getCudaCores( int deviceNum );
-
-      static int getRegistersPerMultiprocessor( int deviceNum );
-};
-
-} // namespace Devices
-} // namespace TNL
-
-#include <TNL/Devices/CudaDeviceInfo_impl.h>
diff --git a/src/TNL/Devices/Cuda_impl.h b/src/TNL/Devices/Cuda_impl.h
deleted file mode 100644
index 07e2c1ddcba83bd07405e5cbcdc726e8d56ba6f7..0000000000000000000000000000000000000000
--- a/src/TNL/Devices/Cuda_impl.h
+++ /dev/null
@@ -1,384 +0,0 @@
-/***************************************************************************
-                          Cuda_impl.h  -  description
-                             -------------------
-    begin                : Jan 21, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <TNL/Math.h>
-#include <TNL/Devices/Cuda.h>
-#include <TNL/Devices/CudaDeviceInfo.h>
-#include <TNL/Exceptions/CudaBadAlloc.h>
-#include <TNL/Exceptions/CudaSupportMissing.h>
-#include <TNL/Exceptions/CudaRuntimeError.h>
-#include <TNL/CudaSharedMemory.h>
-
-namespace TNL {
-namespace Devices {
-
-inline String Cuda::getDeviceType()
-{
-   return String( "Devices::Cuda" );
-}
-
-inline void
-Cuda::configSetup( Config::ConfigDescription& config,
-                   const String& prefix )
-{
-#ifdef HAVE_CUDA
-   config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device to run the computation.", 0 );
-#else
-   config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device to run the computation (not supported on this system).", 0 );
-#endif
-}
-
-inline bool
-Cuda::setup( const Config::ParameterContainer& parameters,
-             const String& prefix )
-{
-#ifdef HAVE_CUDA
-   int cudaDevice = parameters.getParameter< int >( prefix + "cuda-device" );
-   if( cudaSetDevice( cudaDevice ) != cudaSuccess )
-   {
-      std::cerr << "I cannot activate CUDA device number " << cudaDevice << "." << std::endl;
-      return false;
-   }
-   getSmartPointersSynchronizationTimer().reset();
-   getSmartPointersSynchronizationTimer().stop();
-#endif
-   return true;
-}
-
-__cuda_callable__
-inline constexpr int Cuda::getMaxGridSize()
-{
-   return 65535;
-}
-
-__cuda_callable__
-inline constexpr int Cuda::getMaxBlockSize()
-{
-   return 1024;
-}
-
-__cuda_callable__
-inline constexpr int Cuda::getWarpSize()
-{
-   return 32;
-}
-
-__cuda_callable__
-inline constexpr int Cuda::getNumberOfSharedMemoryBanks()
-{
-   return 32;
-}
-
-inline constexpr int Cuda::getGPUTransferBufferSize()
-{
-   return 1 << 20;
-}
-
-#ifdef HAVE_CUDA
-__device__ inline int Cuda::getGlobalThreadIdx( const int gridIdx, const int gridSize )
-{
-   return ( gridIdx * gridSize + blockIdx.x ) * blockDim.x + threadIdx.x;
-}
-
-__device__ inline int Cuda::getGlobalThreadIdx_x( const dim3& gridIdx )
-{
-   return ( gridIdx.x * getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-}
-
-__device__ inline int Cuda::getGlobalThreadIdx_y( const dim3& gridIdx )
-{
-   return ( gridIdx.y * getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
-}
-
-__device__ inline int Cuda::getGlobalThreadIdx_z( const dim3& gridIdx )
-{
-   return ( gridIdx.z * getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z;
-}
-#endif
-
-inline int Cuda::getNumberOfBlocks( const int threads,
-                                    const int blockSize )
-{
-   return roundUpDivision( threads, blockSize );
-}
-
-inline int Cuda::getNumberOfGrids( const int blocks,
-                                   const int gridSize )
-{
-   return roundUpDivision( blocks, gridSize );
-}
-
-#ifdef HAVE_CUDA
-inline void Cuda::setupThreads( const dim3& blockSize,
-                                dim3& blocksCount,
-                                dim3& gridsCount,
-                                long long int xThreads,
-                                long long int yThreads,
-                                long long int zThreads )
-{
-   blocksCount.x = max( 1, xThreads / blockSize.x + ( xThreads % blockSize.x != 0 ) );
-   blocksCount.y = max( 1, yThreads / blockSize.y + ( yThreads % blockSize.y != 0 ) );
-   blocksCount.z = max( 1, zThreads / blockSize.z + ( zThreads % blockSize.z != 0 ) );
-   
-   /****
-    * TODO: Fix the following:
-    * I do not known how to get max grid size in kernels :(
-    * 
-    * Also, this is very slow. */
-   /*int currentDevice( 0 );
-   cudaGetDevice( currentDevice );
-   cudaDeviceProp properties;
-   cudaGetDeviceProperties( &properties, currentDevice );
-   gridsCount.x = blocksCount.x / properties.maxGridSize[ 0 ] + ( blocksCount.x % properties.maxGridSize[ 0 ] != 0 );
-   gridsCount.y = blocksCount.y / properties.maxGridSize[ 1 ] + ( blocksCount.y % properties.maxGridSize[ 1 ] != 0 );
-   gridsCount.z = blocksCount.z / properties.maxGridSize[ 2 ] + ( blocksCount.z % properties.maxGridSize[ 2 ] != 0 );
-   */
-   gridsCount.x = blocksCount.x / getMaxGridSize() + ( blocksCount.x % getMaxGridSize() != 0 );
-   gridsCount.y = blocksCount.y / getMaxGridSize() + ( blocksCount.y % getMaxGridSize() != 0 );
-   gridsCount.z = blocksCount.z / getMaxGridSize() + ( blocksCount.z % getMaxGridSize() != 0 );
-}
-
-inline void Cuda::setupGrid( const dim3& blocksCount,
-                             const dim3& gridsCount,
-                             const dim3& gridIdx,
-                             dim3& gridSize )
-{
-   /* TODO: this is extremely slow!!!!
-   int currentDevice( 0 );
-   cudaGetDevice( &currentDevice );
-   cudaDeviceProp properties;
-   cudaGetDeviceProperties( &properties, currentDevice );*/
- 
-   /****
-    * TODO: fix the following
-   if( gridIdx.x < gridsCount.x )
-      gridSize.x = properties.maxGridSize[ 0 ];
-   else
-      gridSize.x = blocksCount.x % properties.maxGridSize[ 0 ];
-   
-   if( gridIdx.y < gridsCount.y )
-      gridSize.y = properties.maxGridSize[ 1 ];
-   else
-      gridSize.y = blocksCount.y % properties.maxGridSize[ 1 ];
-
-   if( gridIdx.z < gridsCount.z )
-      gridSize.z = properties.maxGridSize[ 2 ];
-   else
-      gridSize.z = blocksCount.z % properties.maxGridSize[ 2 ];*/
-   
-   if( gridIdx.x < gridsCount.x - 1 )
-      gridSize.x = getMaxGridSize();
-   else
-      gridSize.x = blocksCount.x % getMaxGridSize();
-   
-   if( gridIdx.y < gridsCount.y - 1 )
-      gridSize.y = getMaxGridSize();
-   else
-      gridSize.y = blocksCount.y % getMaxGridSize();
-
-   if( gridIdx.z < gridsCount.z - 1 )
-      gridSize.z = getMaxGridSize();
-   else
-      gridSize.z = blocksCount.z % getMaxGridSize();
-}
-
-inline void Cuda::printThreadsSetup( const dim3& blockSize,
-                                     const dim3& blocksCount,
-                                     const dim3& gridSize,
-                                     const dim3& gridsCount,
-                                     std::ostream& str )
-{
-   str << "Block size: " << blockSize << std::endl
-       << " Blocks count: " << blocksCount << std::endl
-       << " Grid size: " << gridSize << std::endl
-       << " Grids count: " << gridsCount << std::endl;
-}
-#endif
-
-
-template< typename ObjectType >
-ObjectType* Cuda::passToDevice( const ObjectType& object )
-{
-#ifdef HAVE_CUDA
-   ObjectType* deviceObject;
-   if( cudaMalloc( ( void** ) &deviceObject,
-                   ( size_t ) sizeof( ObjectType ) ) != cudaSuccess )
-      throw Exceptions::CudaBadAlloc();
-   if( cudaMemcpy( ( void* ) deviceObject,
-                   ( void* ) &object,
-                   sizeof( ObjectType ),
-                   cudaMemcpyHostToDevice ) != cudaSuccess )
-   {
-      TNL_CHECK_CUDA_DEVICE;
-      cudaFree( ( void* ) deviceObject );
-      TNL_CHECK_CUDA_DEVICE;
-      return 0;
-   }
-   return deviceObject;
-#else
-   throw Exceptions::CudaSupportMissing();
-#endif
-}
-
-template< typename ObjectType >
-ObjectType Cuda::passFromDevice( const ObjectType* object )
-{
-#ifdef HAVE_CUDA
-   ObjectType aux;
-   cudaMemcpy( ( void* ) aux,
-               ( void* ) &object,
-               sizeof( ObjectType ),
-               cudaMemcpyDeviceToHost );
-   TNL_CHECK_CUDA_DEVICE;
-   return aux;
-#else
-   throw Exceptions::CudaSupportMissing();
-#endif
-}
-
-template< typename ObjectType >
-void Cuda::passFromDevice( const ObjectType* deviceObject,
-                           ObjectType& hostObject )
-{
-#ifdef HAVE_CUDA
-   cudaMemcpy( ( void* ) &hostObject,
-               ( void* ) deviceObject,
-               sizeof( ObjectType ),
-               cudaMemcpyDeviceToHost );
-   TNL_CHECK_CUDA_DEVICE;
-#else
-   throw Exceptions::CudaSupportMissing();
-#endif
-}
-
-template< typename ObjectType >
-void Cuda::print( const ObjectType* deviceObject, std::ostream& str )
-{
-#ifdef HAVE_CUDA
-   ObjectType hostObject;
-   passFromDevice( deviceObject, hostObject );
-   str << hostObject;
-#endif
-}
-
-
-template< typename ObjectType >
-void Cuda::freeFromDevice( ObjectType* deviceObject )
-{
-#ifdef HAVE_CUDA
-   cudaFree( ( void* ) deviceObject );
-   TNL_CHECK_CUDA_DEVICE;
-#else
-   throw Exceptions::CudaSupportMissing();
-#endif
-}
-
-#ifdef HAVE_CUDA
-template< typename Index >
-__device__ Index Cuda::getInterleaving( const Index index )
-{
-   return index + index / Cuda::getNumberOfSharedMemoryBanks();
-}
-
-template< typename Element >
-__device__ Element* Cuda::getSharedMemory()
-{
-   return CudaSharedMemory< Element >();
-}
-#endif
-
-#ifdef HAVE_CUDA
-inline void Cuda::checkDevice( const char* file_name, int line, cudaError error )
-{
-   if( error != cudaSuccess )
-      throw Exceptions::CudaRuntimeError( error, file_name, line );
-}
-#endif
-
-inline void Cuda::insertSmartPointer( Pointers::SmartPointer* pointer )
-{
-   getSmartPointersRegister().insert( pointer, Devices::CudaDeviceInfo::getActiveDevice() );
-}
-
-inline void Cuda::removeSmartPointer( Pointers::SmartPointer* pointer )
-{
-   getSmartPointersRegister().remove( pointer, Devices::CudaDeviceInfo::getActiveDevice() );
-}
-
-inline bool Cuda::synchronizeDevice( int deviceId )
-{
-#ifdef HAVE_CUDA
-#ifdef HAVE_CUDA_UNIFIED_MEMORY
-   return true;
-#else
-   if( deviceId < 0 )
-      deviceId = Devices::CudaDeviceInfo::getActiveDevice();
-   getSmartPointersSynchronizationTimer().start();
-   bool b = getSmartPointersRegister().synchronizeDevice( deviceId );
-   getSmartPointersSynchronizationTimer().stop();
-   return b;
-#endif
-#else
-   return true;
-#endif
-}
-
-inline Timer& Cuda::getSmartPointersSynchronizationTimer()
-{
-   static Timer timer;
-   return timer;
-}
-
-inline Pointers::SmartPointersRegister& Cuda::getSmartPointersRegister()
-{
-   static Pointers::SmartPointersRegister reg;
-   return reg;
-}
-
-#ifdef HAVE_CUDA
-namespace {
-   std::ostream& operator << ( std::ostream& str, const dim3& d )
-   {
-      str << "( " << d.x << ", " << d.y << ", " << d.z << " )";
-      return str;
-   }
-}
-#endif
-
-// double-precision atomicAdd function for Maxwell and older GPUs
-// copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions
-#ifdef HAVE_CUDA
-#if __CUDA_ARCH__ < 600
-namespace {
-   __device__ double atomicAdd(double* address, double val)
-   {
-       unsigned long long int* address_as_ull =
-                                 (unsigned long long int*)address;
-       unsigned long long int old = *address_as_ull, assumed;
-
-       do {
-           assumed = old;
-           old = atomicCAS(address_as_ull, assumed,
-                           __double_as_longlong(val +
-                                  __longlong_as_double(assumed)));
-
-       // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
-       } while (assumed != old);
-
-       return __longlong_as_double(old);
-   }
-} // namespace
-#endif
-#endif
-
-} // namespace Devices
-} // namespace TNL
diff --git a/src/TNL/Devices/Host.h b/src/TNL/Devices/Host.h
index 40f55711a817e684f379e442ab30cdec485be013..4af7892ecc2c6c8fe96ba936afe29a99cb023a0b 100644
--- a/src/TNL/Devices/Host.h
+++ b/src/TNL/Devices/Host.h
@@ -19,20 +19,11 @@
 #endif
 
 namespace TNL {
-//! \brief Namespace for TNL execution models
 namespace Devices {
 
 class Host
 {
 public:
-   static String getDeviceType()
-   {
-      return String( "Devices::Host" );
-   }
-
-   // TODO: Remove getDeviceType();
-   static inline String getType() { return getDeviceType();};
-
    static void disableOMP()
    {
       ompEnabled() = false;
diff --git a/src/TNL/Devices/MIC.h b/src/TNL/Devices/MIC.h
deleted file mode 100644
index f347a24d1f9e4fa6d5cceb7e2693807c7158065a..0000000000000000000000000000000000000000
--- a/src/TNL/Devices/MIC.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/***************************************************************************
-                          MIC.h  -  description
-                          -------------------
-    begin                : Nov 7, 2016
-    copyright            : (C) 2016 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-// Implemented by: Vit Hanousek
-
-#pragma once
-
-#include <iostream>
-#include <cstring>
-#include <unistd.h>
-#include <TNL/String.h>
-#include <TNL/Assert.h>
-#include <TNL/Pointers/SmartPointersRegister.h>
-#include <TNL/Timer.h>
-
-#include <TNL/Devices/CudaCallable.h>
-
-
-namespace TNL {
-namespace Devices {
-namespace {
-
-//useful macros from Intel's tutorials -- but we do not use it, becaouse it is tricky (system of maping variables CPU-MIC)
-#define ALLOC alloc_if(1) //alloac variable at begining of offloaded block -- default
-#define FREE free_if(1) // delete variable at the end of offloaded block -- default
-#define RETAIN free_if(0) //do not delete variable at the end of offladed block
-#define REUSE alloc_if(0) //do not alloc variable at begin of offloaded block, reuse variable on MIC which was not deleted befeore
-
-//structure which hides pointer - bypass mapping of variables and addresses of arrays and allow get RAW addres of MIC memory to RAM
-template< typename Type >
-struct MICHider{
-    Type *pointer;
-};
-
-//inflatable structure -- structures can be copied to MIC - classes not (viz paper published after CSJP 2016 in Krakow)
-//object can be copied in side this structure and then copied into MIC memory
-template <unsigned int VELIKOST>
-struct MICStruct{
-	uint8_t data[VELIKOST];
-};
-
-//Macros which can make code better readeble --but they are tricky, creating variables with specific names...
-//version using inflatable structure
-#define TNLMICSTRUCT(bb,typ) Devices::MICStruct<sizeof(typ)> s ## bb; \
-                             memcpy((void*)& s ## bb,(void*)& bb,sizeof(typ));
-#define TNLMICSTRUCTOFF(bb,typ) s ## bb
-#define TNLMICSTRUCTUSE(bb,typ) typ * kernel ## bb = (typ*) &s ## bb;
-#define TNLMICSTRUCTALLOC(bb,typ) typ * kernel ## bb = (typ*) malloc (sizeof(typ)); \
-                                memcpy((void*)kernel ## bb,(void*) & s ## bb, sizeof(typ));
-
-//version which retypes pointer of object to pointer to array of uint8_t,
-//object can be copied using uint8_t pointer as array with same length as object size
-#define TNLMICHIDE(bb,typ) uint8_t * u ## bb=(uint8_t *)&bb; \
-                           MICHider<typ> kernel ## bb;
-#define TNLMICHIDEALLOCOFF(bb,typ) in(u ## bb:length(sizeof(typ))) out(kernel ## bb)
-#define TNLMICHIDEALLOC(bb,typ) kernel ## bb.pointer=(typ*)malloc(sizeof(typ)); \
-                                memcpy((void*)kernel ## bb.pointer,(void*)u ## bb,sizeof(typ));
-#define TNLMICHIDEFREEOFF(bb,typ) in(kernel ## bb)
-#define TNLMICHIDEFREE(bb,typ) free((void*)kernel ## bb.pointer
-
-class MIC
-{
-   public:
-
-      static String getDeviceType()
-      {
-         return String( "Devices::MIC" );
-      };
-
-      // TODO: Remove getDeviceType();
-      static inline String getType() { return getDeviceType(); };
-        
-#ifdef HAVE_MIC
-
-       //useful debuging -- but produce warning
-       __cuda_callable__ static inline void CheckMIC(void)
-       {
-            #ifdef __MIC__
-                    std::cout<<"ON MIC"<<std::endl;
-            #else
-                    std::cout<<"ON CPU" <<std::endl;
-            #endif
-        };
-
-
-        //old copying funciton  -- deprecated
-        template <typename TYP>
-        static
-        TYP * passToDevice(TYP &objektCPU)
-        {
-                uint8_t * uk=(uint8_t *)&objektCPU;
-                MICHider<TYP> ret;
-
-                #pragma offload target(mic) in(uk:length(sizeof(TYP))) out(ret)
-                {
-                    ret.pointer=(TYP*)malloc(sizeof(TYP));
-                    std::memcpy((void*)ret.pointer,(void*)uk,sizeof(TYP));
-                }
-                return ret.pointer;
-
-                std::cout << "NÄ›kdo mnÄ› volÃ¡ :-D" <<std::endl;
-        };
-
-        //old cleaning function -- deprecated
-        template <typename TYP>
-        static
-        void freeFromDevice(TYP *objektMIC)
-        {
-            MICHider<TYP> ptr;
-            ptr.pointer=objektMIC;
-            #pragma offload target(mic) in(ptr)
-            {
-                free((void*)ptr.pointer);
-            }
-        };
-
-        static inline
-        void CopyToMIC(void* mic_ptr,void* ptr,size_t size)
-        {
-            uint8_t image[size];
-            std::memcpy((void*)&image,ptr,size);
-            Devices::MICHider<void> hide_ptr;
-            hide_ptr.pointer=mic_ptr;
-            #pragma offload target(mic) in(hide_ptr) in(image) in(size)
-            {
-                std::memcpy((void*)hide_ptr.pointer,(void*)&image,size);
-            }
-        };
-
-#endif
-
-   static void insertSmartPointer( Pointers::SmartPointer* pointer )
-   {
-      smartPointersRegister.insert( pointer, -1 );
-   }
-
-   static void removeSmartPointer( Pointers::SmartPointer* pointer )
-   {
-      smartPointersRegister.remove( pointer, -1 );
-   }
-
-   // Negative deviceId means that CudaDeviceInfo::getActiveDevice will be
-   // called to get the device ID.
-   static bool synchronizeDevice( int deviceId = -1 )
-   {
-      smartPointersSynchronizationTimer.start();
-      bool b = smartPointersRegister.synchronizeDevice( deviceId );
-      smartPointersSynchronizationTimer.stop();
-      return b;
-   }
-
-   static Timer smartPointersSynchronizationTimer;
-
-protected:
-   static Pointers::SmartPointersRegister smartPointersRegister;
-};
-
-Pointers::SmartPointersRegister MIC::smartPointersRegister;
-Timer MIC::smartPointersSynchronizationTimer;
-
-} // namespace <unnamed>
-} // namespace Devices
-} // namespace TNL
diff --git a/src/TNL/Devices/Sequential.h b/src/TNL/Devices/Sequential.h
new file mode 100644
index 0000000000000000000000000000000000000000..f00660f1961e5534db2bab2568a2a98e4fe7a622
--- /dev/null
+++ b/src/TNL/Devices/Sequential.h
@@ -0,0 +1,21 @@
+/***************************************************************************
+                          Sequential.h  -  description
+                             -------------------
+    begin                : Aug 17, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+//! \brief Namespace for TNL execution models
+namespace Devices {
+
+struct Sequential
+{};
+
+} // namespace Devices
+} // namespace TNL
diff --git a/src/TNL/Exceptions/MICBadAlloc.h b/src/TNL/Exceptions/MICBadAlloc.h
deleted file mode 100644
index b8f3a9157c54d8155652a42a700ad71a221aa201..0000000000000000000000000000000000000000
--- a/src/TNL/Exceptions/MICBadAlloc.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/***************************************************************************
-                          MICBadAlloc.h  -  description
-                             -------------------
-    begin                : Jul 31, 2017
-    copyright            : (C) 2017 by Tomas Oberhuber et al.
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-// Implemented by: Jakub Klinkovsky
-
-#pragma once
-
-#include <new>
-
-namespace TNL {
-namespace Exceptions {
-
-struct MICBadAlloc
-   : public std::bad_alloc
-{
-   const char* what() const throw()
-   {
-      return "Failed to allocate memory on the MIC device: "
-             "most likely there is not enough space on the device memory.";
-   }
-};
-
-} // namespace Exceptions
-} // namespace TNL
diff --git a/src/TNL/Exceptions/MICSupportMissing.h b/src/TNL/Exceptions/MICSupportMissing.h
deleted file mode 100644
index 6d4260e6addbbb9dd89a7c9d5a07833485c6a0c2..0000000000000000000000000000000000000000
--- a/src/TNL/Exceptions/MICSupportMissing.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/***************************************************************************
-                          MICSupportMissing.h  -  description
-                             -------------------
-    begin                : Jul 31, 2017
-    copyright            : (C) 2017 by Tomas Oberhuber et al.
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-// Implemented by: Jakub Klinkovsky
-
-#pragma once
-
-#include <stdexcept>
-
-namespace TNL {
-namespace Exceptions {
-
-struct MICSupportMissing
-   : public std::runtime_error
-{
-   MICSupportMissing()
-   : std::runtime_error( "MIC support is missing, but the program called a function which needs it. "
-                         "Please recompile the program with MIC support." )
-   {}
-};
-
-} // namespace Exceptions
-} // namespace TNL
diff --git a/src/TNL/Experimental/Arithmetics/Quad.h b/src/TNL/Experimental/Arithmetics/Quad.h
index 13d9c823155ad5fee670f61eb89486d56a08a7df..3c1dd073f54ccf8b258e64a73819b11a15fe3202 100644
--- a/src/TNL/Experimental/Arithmetics/Quad.h
+++ b/src/TNL/Experimental/Arithmetics/Quad.h
@@ -33,8 +33,6 @@ public:
     explicit Quad(const T&);
     explicit Quad(int);
     Quad(const Quad<T>&);
-    
-    static String getType();
 
     /*OVERLOADED OPERATORS*/
     T& operator[](int);
diff --git a/src/TNL/Experimental/Arithmetics/Quad_impl.h b/src/TNL/Experimental/Arithmetics/Quad_impl.h
index 63c08a40180d420d069bd4cb640ea7137ff1cb22..3a2ecb245061a28af50f552a8cb20790db89eba0 100644
--- a/src/TNL/Experimental/Arithmetics/Quad_impl.h
+++ b/src/TNL/Experimental/Arithmetics/Quad_impl.h
@@ -56,14 +56,6 @@ Quad<T>::Quad(const Quad<T>& other) {
     data[3] = other[3];
 }
 
-template <class T>
-String
-Quad< T >::
-getType()
-{
-   return String( "Quad< " + getType< T >() + " >" );
-}
-
 template <class T>
 T& Quad<T>::operator [](int idx) {
     return data[idx];
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase1D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase1D_impl.h
index 55129c4e1008e69ef3b3d238acb8fc587cce4076..49cda643cc73a6717f47d877315b980e5589d60c 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase1D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase1D_impl.h
@@ -22,10 +22,10 @@ initInterface( const MeshFunctionPointer& _input,
     const MeshType& mesh = _input->getMesh();
     
     const int cudaBlockSize( 16 );
-    int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize );
+    int numBlocksX = Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize );
     dim3 blockSize( cudaBlockSize );
     dim3 gridSize( numBlocksX );
-    Devices::Cuda::synchronizeDevice();
+    Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
     CudaInitCaller<<< gridSize, blockSize >>>( _input.template getData< Device >(),
             _output.template modifyData< Device >(),
             _interfaceMap.template modifyData< Device >() );
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h
index cddf4f9cb7a97f8a74eb94f7377b2cf740db03a5..b18252cb078b803d6e911ca130b9b161c241ff4d 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h
@@ -25,11 +25,11 @@ initInterface( const MeshFunctionPointer& _input,
     const MeshType& mesh = _input->getMesh();
     
     const int cudaBlockSize( 16 );
-    int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize );
-    int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize );
+    int numBlocksX = Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize );
+    int numBlocksY = Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize );
     dim3 blockSize( cudaBlockSize, cudaBlockSize );
     dim3 gridSize( numBlocksX, numBlocksY );
-    Devices::Cuda::synchronizeDevice();
+    Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
     CudaInitCaller<<< gridSize, blockSize >>>( _input.template getData< Device >(),
             _output.template modifyData< Device >(),
             _interfaceMap.template modifyData< Device >(),
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h
index 32548abcfe66affd71a79d3ed5f2f21d67df644e..fd7dc9381ec1325f108078271df343953be58ebd 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h
@@ -23,14 +23,14 @@ initInterface( const MeshFunctionPointer& _input,
     const MeshType& mesh = _input->getMesh();
     
     const int cudaBlockSize( 8 );
-    int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize );
-    int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize );
-    int numBlocksZ = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().z(), cudaBlockSize );
+    int numBlocksX = Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize );
+    int numBlocksY = Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize );
+    int numBlocksZ = Cuda::getNumberOfBlocks( mesh.getDimensions().z(), cudaBlockSize );
     if( cudaBlockSize * cudaBlockSize * cudaBlockSize > 1024 || numBlocksX > 1024 || numBlocksY > 1024 || numBlocksZ > 64 )
       std::cout << "Invalid kernel call. Dimensions of grid are max: [1024,1024,64], and maximum threads per block are 1024!" << std::endl;
     dim3 blockSize( cudaBlockSize, cudaBlockSize, cudaBlockSize );
     dim3 gridSize( numBlocksX, numBlocksY, numBlocksZ );
-    Devices::Cuda::synchronizeDevice();
+    Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
     CudaInitCaller3d<<< gridSize, blockSize >>>( _input.template getData< Device >(),
             _output.template modifyData< Device >(),
             _interfaceMap.template modifyData< Device >(), vLower, vUpper );
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h
index f2f033ccbee3ffa5b71567ea1b54e2307ebe1713..52c2ebbee3f16fd39a2dfab738ac9bea6ffaf393 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h
@@ -105,7 +105,7 @@ solve( const MeshPointer& mesh,
          // TODO: CUDA code
 #ifdef HAVE_CUDA
           const int cudaBlockSize( 16 );
-          int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize );
+          int numBlocksX = Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize );
           dim3 blockSize( cudaBlockSize );
           dim3 gridSize( numBlocksX );
           
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h
index e5638c11dd71d88d72d8d0590c8e51c0df6baaab..1b1666a02b627778bd8364a7e46f5e22718ff76f 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h
@@ -251,8 +251,8 @@ solve( const MeshPointer& mesh,
         const int cudaBlockSize( 16 );
         
         // Setting number of threads and blocks for kernel
-        int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x() - vecLowerOverlaps[0] - vecUpperOverlaps[0], cudaBlockSize );
-        int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y() - vecLowerOverlaps[1] - vecUpperOverlaps[1], cudaBlockSize );
+        int numBlocksX = Cuda::getNumberOfBlocks( mesh->getDimensions().x() - vecLowerOverlaps[0] - vecUpperOverlaps[0], cudaBlockSize );
+        int numBlocksY = Cuda::getNumberOfBlocks( mesh->getDimensions().y() - vecLowerOverlaps[1] - vecUpperOverlaps[1], cudaBlockSize );
         dim3 blockSize( cudaBlockSize, cudaBlockSize );
         dim3 gridSize( numBlocksX, numBlocksY );
         
@@ -316,7 +316,7 @@ solve( const MeshPointer& mesh,
           
           
   /** HERE IS FIM FOR MPI AND WITHOUT MPI **/
-          Devices::Cuda::synchronizeDevice();
+          Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
           CudaUpdateCellCaller<18><<< gridSize, blockSize >>>( ptr, interfaceMapPtr.template getData< Device >(),
                   auxPtr.template getData< Device>(), helpFunc.template modifyData< Device>(),
                   blockCalculationIndicator.getView(), vecLowerOverlaps, vecUpperOverlaps );
@@ -327,7 +327,7 @@ solve( const MeshPointer& mesh,
           auxPtr.swap( helpFunc );
           
           // Getting blocks that should calculate in next passage. These blocks are neighbours of those that were calculated now.
-          Devices::Cuda::synchronizeDevice(); 
+          Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
           GetNeighbours<<< nBlocksNeigh, 1024 >>>( blockCalculationIndicator.getView(), blockCalculationIndicatorHelp.getView(), numBlocksX, numBlocksY );
           cudaDeviceSynchronize();
           TNL_CHECK_CUDA_DEVICE;
@@ -349,7 +349,7 @@ solve( const MeshPointer& mesh,
         if( numIter%2 == 1 ) // Need to check parity for MPI overlaps to synchronize ( otherwise doesnt work )
         {
           helpFunc.swap( auxPtr );
-          Devices::Cuda::synchronizeDevice();
+          Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
           cudaDeviceSynchronize();
           TNL_CHECK_CUDA_DEVICE;
         }
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h
index 325b626f7bf5262637f8e1b43ec9e156bbeca26b..82185a937d832b0785b597188aeb0989ab751d47 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h
@@ -263,9 +263,9 @@ solve( const MeshPointer& mesh,
         const int cudaBlockSize( 8 );
         
         // Getting the number of blocks in grid in each direction (without overlaps bcs we dont calculate on overlaps)
-        int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x() - vecLowerOverlaps[0] - vecUpperOverlaps[0], cudaBlockSize );
-        int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y() - vecLowerOverlaps[1] - vecUpperOverlaps[1], cudaBlockSize );
-        int numBlocksZ = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().z() - vecLowerOverlaps[2] - vecUpperOverlaps[2], cudaBlockSize ); 
+        int numBlocksX = Cuda::getNumberOfBlocks( mesh->getDimensions().x() - vecLowerOverlaps[0] - vecUpperOverlaps[0], cudaBlockSize );
+        int numBlocksY = Cuda::getNumberOfBlocks( mesh->getDimensions().y() - vecLowerOverlaps[1] - vecUpperOverlaps[1], cudaBlockSize );
+        int numBlocksZ = Cuda::getNumberOfBlocks( mesh->getDimensions().z() - vecLowerOverlaps[2] - vecUpperOverlaps[2], cudaBlockSize ); 
         if( cudaBlockSize * cudaBlockSize * cudaBlockSize > 1024 || numBlocksX > 1024 || numBlocksY > 1024 || numBlocksZ > 64 )
           std::cout << "Invalid kernel call. Dimensions of grid are max: [1024,1024,64], and maximum threads per block are 1024!" << std::endl;
         
@@ -295,14 +295,14 @@ solve( const MeshPointer& mesh,
         //MeshFunctionPointer helpFunc1( mesh );      
         MeshFunctionPointer helpFunc( mesh );
         helpFunc.template modifyData() = auxPtr.template getData();
-        Devices::Cuda::synchronizeDevice(); 
+        Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
                 
         int numIter = 0; // number of passages of following while cycle
         
         while( BlockIterD ) //main body of cuda code
         {
           
-          Devices::Cuda::synchronizeDevice();          
+          Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
           // main function that calculates all values in each blocks
           // calculated values are in helpFunc
           CudaUpdateCellCaller< 10 ><<< gridSize, blockSize >>>( ptr,
@@ -315,14 +315,14 @@ solve( const MeshPointer& mesh,
           // Switching pointers to helpFunc and auxPtr so real results are in memory of helpFunc but here under variable auxPtr
           auxPtr.swap( helpFunc );
           
-          Devices::Cuda::synchronizeDevice();
+          Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
           // Neighbours of blocks that calculatedBefore in this passage should calculate in the next!
           // BlockIterDevice contains blocks that calculatedBefore in this passage and BlockIterPom those that should calculate in next (are neighbours)
           GetNeighbours<<< nBlocksNeigh, 1024 >>>( BlockIterDevice.getView(), BlockIterPom.getView(), numBlocksX, numBlocksY, numBlocksZ );
           cudaDeviceSynchronize();
           TNL_CHECK_CUDA_DEVICE;
           BlockIterDevice = BlockIterPom;
-          Devices::Cuda::synchronizeDevice();
+          Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
           
           // .containsValue(1) is actually parallel reduction implemented in TNL
           BlockIterD = BlockIterDevice.containsValue(1);
@@ -340,7 +340,7 @@ solve( const MeshPointer& mesh,
           // We need auxPtr to point on memory of original auxPtr (not to helpFunc)
           // last passage of previous while cycle didnt calculate any number anyway so switching names doesnt effect values
           auxPtr.swap( helpFunc ); 
-          Devices::Cuda::synchronizeDevice();
+          Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
         }
         cudaDeviceSynchronize();
         TNL_CHECK_CUDA_DEVICE;
diff --git a/src/TNL/File.h b/src/TNL/File.h
index 1aa5615e5cbbf8f36d2c9ac3d98bdbc3ba4ada03..cef110e1633537ecb5b13770cc805dccb1e2786f 100644
--- a/src/TNL/File.h
+++ b/src/TNL/File.h
@@ -14,9 +14,8 @@
 #include <type_traits>
 
 #include <TNL/String.h>
-#include <TNL/Devices/Host.h>
-#include <TNL/Devices/Cuda.h>
-#include <TNL/Devices/MIC.h>
+#include <TNL/Allocators/Host.h>
+#include <TNL/Allocators/Cuda.h>
 
 namespace TNL {
 
@@ -86,9 +85,9 @@ class File
       /**
        * \brief Method for loading data from the file.
        *
-       * The data will be stored in \e buffer allocated on device given by the
-       * \e Device parameter. The data type of the buffer is given by the
-       * template parameter \e Type. The second template parameter 
+       * The data will be stored in \e buffer which was allocated using the
+       * allocator of type \e Allocator. The data type of the buffer is given
+       * by the template parameter \e Type. The second template parameter
        * \e SourceType defines the type of data in the source file. If both
        * types are different, on-the-fly conversion takes place during the
        * data loading.
@@ -97,31 +96,31 @@ class File
        *
        * \tparam Type type of data to be loaded to the \e buffer.
        * \tparam SourceType type of data stored on the file,
-       * \tparam Device device where the data are stored after reading. For example \ref Devices::Host or \ref Devices::Cuda.
+       * \tparam Allocator type of the allocator which was used to allocate \e buffer.
        * \param buffer Pointer in memory where the elements are loaded and stored after reading.
        * \param elements number of elements to be loaded from the file.
-       * 
+       *
        * The following example shows how to load data directly to GPU.
-       * 
+       *
        * \par Example
        * \include FileExampleCuda.cpp
        * \par Output
        * \include FileExampleCuda.out
        * The following example shows how to do on-the-fly data conversion.
-       * 
+       *
        * \par Example
        * \include FileExampleSaveAndLoad.cpp
        * \par Output
        * \include FileExampleSaveAndLoad.out
        */
-      template< typename Type, typename SourceType = Type, typename Device = Devices::Host >
+      template< typename Type, typename SourceType = Type, typename Allocator = Allocators::Host< Type > >
       void load( Type* buffer, std::streamsize elements = 1 );
 
       /**
        * \brief Method for saving data to the file.
        *
-       * The data from the \e buffer (with type \e Type) allocated on the device
-       * \e Device will be saved into the file. \e TargetType defines as what
+       * The data from the \e buffer (with type \e Type) which was allocated
+       * using an allocator of type \e Allocator. \e TargetType defines as what
        * data type the buffer shall be saved. If the type is different from the
        * data type, on-the-fly data type conversion takes place during the data
        * saving.
@@ -130,69 +129,49 @@ class File
        *
        * \tparam Type type of data in the \e buffer.
        * \tparam TargetType tells as what type data the buffer shall be saved.
-       * \tparam Device device from where the data are loaded before writing into file. For example \ref Devices::Host or \ref Devices::Cuda.
+       * \tparam Allocator type of the allocator which was used to allocate \e buffer.
        * \tparam Index type of index by which the elements are indexed.
        * \param buffer buffer that is going to be saved to the file.
        * \param elements number of elements saved to the file.
        * 
        * See \ref File::load for examples.
        */
-      template< typename Type, typename TargetType = Type, typename Device = Devices::Host >
+      template< typename Type, typename TargetType = Type, typename Allocator = Allocators::Host< Type > >
       void save( const Type* buffer, std::streamsize elements = 1 );
 
    protected:
+      // implementation for all allocators which allocate data accessible from host
       template< typename Type,
                 typename SourceType,
-                typename Device,
-                typename = typename std::enable_if< std::is_same< Device, Devices::Host >::value >::type >
+                typename Allocator,
+                typename = std::enable_if_t< ! std::is_same< Allocator, Allocators::Cuda< Type > >::value > >
       void load_impl( Type* buffer, std::streamsize elements );
 
+      // implementation for \ref Allocators::Cuda
       template< typename Type,
                 typename SourceType,
-                typename Device,
-                typename = typename std::enable_if< std::is_same< Device, Devices::Cuda >::value >::type,
+                typename Allocator,
+                typename = std::enable_if_t< std::is_same< Allocator, Allocators::Cuda< Type > >::value >,
                 typename = void >
       void load_impl( Type* buffer, std::streamsize elements );
 
-      template< typename Type,
-                typename SourceType,
-                typename Device,
-                typename = typename std::enable_if< std::is_same< Device, Devices::MIC >::value >::type,
-                typename = void,
-                typename = void >
-      void load_impl( Type* buffer, std::streamsize elements );
-
-      template< typename Type,
-                typename TargetType,
-                typename Device,
-                typename = typename std::enable_if< std::is_same< Device, Devices::Host >::value >::type >
-      void save_impl( const Type* buffer, std::streamsize elements );
-
+      // implementation for all allocators which allocate data accessible from host
       template< typename Type,
                 typename TargetType,
-                typename Device,
-                typename = typename std::enable_if< std::is_same< Device, Devices::Cuda >::value >::type,
-                typename = void >
+                typename Allocator,
+                typename = std::enable_if_t< ! std::is_same< Allocator, Allocators::Cuda< Type > >::value > >
       void save_impl( const Type* buffer, std::streamsize elements );
 
+      // implementation for \ref Allocators::Cuda
       template< typename Type,
                 typename TargetType,
-                typename Device,
-                typename = typename std::enable_if< std::is_same< Device, Devices::MIC >::value >::type,
-                typename = void,
+                typename Allocator,
+                typename = std::enable_if_t< std::is_same< Allocator, Allocators::Cuda< Type > >::value >,
                 typename = void >
       void save_impl( const Type* buffer, std::streamsize elements );
 
       std::fstream file;
       String fileName;
-
-      ////
-      // When we transfer data between the GPU and the CPU we use 5 MB buffer. This
-      // size should ensure good performance -- see.
-      // http://wiki.accelereyes.com/wiki/index.php/GPU_Memory_Transfer .
-      // We use the same buffer size even for retyping data during IO operations.
-      //
-      static constexpr std::streamsize TransferBufferSize = 5 * 2<<20;
 };
 
 /**
diff --git a/src/TNL/File.hpp b/src/TNL/File.hpp
index f4edd2b9638e0331da973bdf16de06568d4b7c23..af112e992a7640070ab880192688b3a0aac8f1d2 100644
--- a/src/TNL/File.hpp
+++ b/src/TNL/File.hpp
@@ -17,8 +17,9 @@
 
 #include <TNL/File.h>
 #include <TNL/Assert.h>
+#include <TNL/Cuda/CheckDevice.h>
+#include <TNL/Cuda/LaunchHelpers.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
-#include <TNL/Exceptions/MICSupportMissing.h>
 #include <TNL/Exceptions/FileSerializationError.h>
 #include <TNL/Exceptions/FileDeserializationError.h>
 #include <TNL/Exceptions/NotImplementedError.h>
@@ -79,21 +80,23 @@ inline void File::close()
 
 template< typename Type,
           typename SourceType,
-          typename Device >
+          typename Allocator >
 void File::load( Type* buffer, std::streamsize elements )
 {
+   static_assert( std::is_same< Type, typename Allocator::value_type >::value,
+                  "Allocator::value_type must be the same as Type." );
    TNL_ASSERT_GE( elements, 0, "Number of elements to load must be non-negative." );
 
    if( ! elements )
       return;
 
-   load_impl< Type, SourceType, Device >( buffer, elements );
+   load_impl< Type, SourceType, Allocator >( buffer, elements );
 }
 
-// Host
+// Host allocators
 template< typename Type,
           typename SourceType,
-          typename Device,
+          typename Allocator,
           typename >
 void File::load_impl( Type* buffer, std::streamsize elements )
 {
@@ -101,7 +104,7 @@ void File::load_impl( Type* buffer, std::streamsize elements )
       file.read( reinterpret_cast<char*>(buffer), sizeof(Type) * elements );
    else
    {
-      const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(SourceType), elements );
+      const std::streamsize cast_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(SourceType), elements );
       using BaseType = typename std::remove_cv< SourceType >::type;
       std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] };
       std::streamsize readElements = 0;
@@ -116,15 +119,15 @@ void File::load_impl( Type* buffer, std::streamsize elements )
    }
 }
 
-// Cuda
+// Allocators::Cuda
 template< typename Type,
           typename SourceType,
-          typename Device,
+          typename Allocator,
           typename, typename >
 void File::load_impl( Type* buffer, std::streamsize elements )
 {
 #ifdef HAVE_CUDA
-   const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements );
+   const std::streamsize host_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(Type), elements );
    using BaseType = typename std::remove_cv< Type >::type;
    std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] };
 
@@ -145,7 +148,7 @@ void File::load_impl( Type* buffer, std::streamsize elements )
    }
    else
    {
-      const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(SourceType), elements );
+      const std::streamsize cast_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(SourceType), elements );
       using BaseType = typename std::remove_cv< SourceType >::type;
       std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] };
 
@@ -168,65 +171,25 @@ void File::load_impl( Type* buffer, std::streamsize elements )
 #endif
 }
 
-// MIC
-template< typename Type,
-          typename SourceType,
-          typename Device,
-          typename, typename, typename >
-void File::load_impl( Type* buffer, std::streamsize elements )
-{
-#ifdef HAVE_MIC
-   const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements );
-   using BaseType = typename std::remove_cv< Type >::type;
-   std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] };
-
-   std::streamsize readElements = 0;
-   if( std::is_same< Type, SourceType >::value )
-   {
-      while( readElements < elements )
-      {
-         const std::streamsize transfer = std::min( elements - readElements, host_buffer_size );
-         file.read( reinterpret_cast<char*>(host_buffer.get()), sizeof(Type) * transfer );
-
-         Devices::MICHider<Type> device_buff;
-         device_buff.pointer=buffer;
-         #pragma offload target(mic) in(device_buff,readElements) in(host_buffer:length(transfer))
-         {
-            /*
-            for(int i=0;i<transfer;i++)
-                 device_buff.pointer[readElements+i]=host_buffer[i];
-             */
-            memcpy(&(device_buff.pointer[readElements]), host_buffer.get(), transfer*sizeof(Type) );
-         }
-
-         readElements += transfer;
-      }
-      free( host_buffer );
-   }
-   else
-      throw Exceptions::NotImplementedError("Type conversion during loading is not implemented for MIC.");
-#else
-   throw Exceptions::MICSupportMissing();
-#endif
-}
-
 template< typename Type,
           typename TargetType,
-          typename Device >
+          typename Allocator >
 void File::save( const Type* buffer, std::streamsize elements )
 {
+   static_assert( std::is_same< Type, typename Allocator::value_type >::value,
+                  "Allocator::value_type must be the same as Type." );
    TNL_ASSERT_GE( elements, 0, "Number of elements to save must be non-negative." );
 
    if( ! elements )
       return;
 
-   save_impl< Type, TargetType, Device >( buffer, elements );
+   save_impl< Type, TargetType, Allocator >( buffer, elements );
 }
 
-// Host
+// Host allocators
 template< typename Type,
           typename TargetType,
-          typename Device,
+          typename Allocator,
           typename >
 void File::save_impl( const Type* buffer, std::streamsize elements )
 {
@@ -234,7 +197,7 @@ void File::save_impl( const Type* buffer, std::streamsize elements )
       file.write( reinterpret_cast<const char*>(buffer), sizeof(Type) * elements );
    else
    {
-      const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(TargetType), elements );
+      const std::streamsize cast_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(TargetType), elements );
       using BaseType = typename std::remove_cv< TargetType >::type;
       std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] };
       std::streamsize writtenElements = 0;
@@ -250,15 +213,15 @@ void File::save_impl( const Type* buffer, std::streamsize elements )
    }
 }
 
-// Cuda
+// Allocators::Cuda
 template< typename Type,
           typename TargetType,
-          typename Device,
+          typename Allocator,
           typename, typename >
 void File::save_impl( const Type* buffer, std::streamsize elements )
 {
 #ifdef HAVE_CUDA
-   const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements );
+   const std::streamsize host_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(Type), elements );
    using BaseType = typename std::remove_cv< Type >::type;
    std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] };
 
@@ -279,7 +242,7 @@ void File::save_impl( const Type* buffer, std::streamsize elements )
    }
    else
    {
-      const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(TargetType), elements );
+      const std::streamsize cast_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(TargetType), elements );
       using BaseType = typename std::remove_cv< TargetType >::type;
       std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] };
 
@@ -303,48 +266,6 @@ void File::save_impl( const Type* buffer, std::streamsize elements )
 #endif
 }
 
-// MIC
-template< typename Type,
-          typename TargetType,
-          typename Device,
-          typename, typename, typename >
-void File::save_impl( const Type* buffer, std::streamsize elements )
-{
-#ifdef HAVE_MIC
-   const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements );
-   using BaseType = typename std::remove_cv< Type >::type;
-   std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] };
-
-   std::streamsize writtenElements = 0;
-   if( std::is_same< Type, TargetType >::value )
-   {
-      while( this->writtenElements < elements )
-      {
-         const std::streamsize transfer = std::min( elements - writtenElements, host_buffer_size );
-
-         Devices::MICHider<const Type> device_buff;
-         device_buff.pointer=buffer;
-         #pragma offload target(mic) in(device_buff,writtenElements) out(host_buffer:length(transfer))
-         {
-            //THIS SHOULD WORK... BUT NOT WHY?
-            /*for(int i=0;i<transfer;i++)
-                 host_buffer[i]=device_buff.pointer[writtenElements+i];
-             */
-
-            memcpy(host_buffer.get(), &(device_buff.pointer[writtenElements]), transfer*sizeof(Type) );
-         }
-
-         file.write( reinterpret_cast<const char*>(host_buffer.get()), sizeof(Type) * transfer );
-         writtenElements += transfer;
-      }
-   }
-   else
-      throw Exceptions::NotImplementedError("Type conversion during saving is not implemented for MIC.");
-#else
-   throw Exceptions::MICSupportMissing();
-#endif
-}
-
 inline bool fileExists( const String& fileName )
 {
    std::fstream file;
diff --git a/src/TNL/Functions/Analytic/Blob.h b/src/TNL/Functions/Analytic/Blob.h
index e12a27393c7077f71fe57137a6fce3a2abc00d0c..5a95257cdf0e0a78d071ca6b078c8f9ec2d687ed 100644
--- a/src/TNL/Functions/Analytic/Blob.h
+++ b/src/TNL/Functions/Analytic/Blob.h
@@ -50,8 +50,6 @@ class Blob< 1, Real > : public BlobBase< Real, 1 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Blob();
 
       template< int XDiffOrder = 0,
@@ -75,8 +73,6 @@ class Blob< 2, Real > : public BlobBase< Real, 2 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Blob();
 
       template< int XDiffOrder = 0,
@@ -101,8 +97,6 @@ class Blob< 3, Real > : public BlobBase< Real, 3 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Blob();
 
       template< int XDiffOrder = 0,
diff --git a/src/TNL/Functions/Analytic/Blob_impl.h b/src/TNL/Functions/Analytic/Blob_impl.h
index f615a10dd7826decd3ec2432e29e96a54371824e..f5195f758e979441a81e7dcfbcf2431705a6f8bd 100644
--- a/src/TNL/Functions/Analytic/Blob_impl.h
+++ b/src/TNL/Functions/Analytic/Blob_impl.h
@@ -32,13 +32,6 @@ setup( const Config::ParameterContainer& parameters,
  * 1D
  */
 
-template< typename Real >
-String
-Blob< 1, Real >::getType()
-{
-   return "Functions::Analytic::Blob< 1, " + TNL::getType< Real >() + String( " >" );
-}
-
 template< typename Real >
 Blob< 1, Real >::Blob()
 {
@@ -75,13 +68,6 @@ operator()( const PointType& v,
 /****
  * 2D
  */
-template< typename Real >
-String
-Blob< 2, Real >::getType()
-{
-   return String( "Functions::Analytic::Blob< 2, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 Blob< 2, Real >::Blob()
 {
@@ -119,13 +105,6 @@ operator()( const PointType& v,
 /****
  * 3D
  */
-template< typename Real >
-String
-Blob< 3, Real >::getType()
-{
-   return String( "Functions::Analytic::Blob< 3, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 Blob< 3, Real >::Blob()
 {
diff --git a/src/TNL/Functions/Analytic/Cylinder.h b/src/TNL/Functions/Analytic/Cylinder.h
index fb3f0542ceda6b3c1b334c300419549d5217bf2b..8b8ab198319a413d9cf95c9e92d1dcb23bc777c6 100644
--- a/src/TNL/Functions/Analytic/Cylinder.h
+++ b/src/TNL/Functions/Analytic/Cylinder.h
@@ -54,8 +54,6 @@ class Cylinder< 1, Real > : public CylinderBase< Real, 1 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Cylinder();
 
       template< int XDiffOrder = 0,
@@ -81,8 +79,6 @@ class Cylinder< 2, Real > : public CylinderBase< Real, 2 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Cylinder();
 
       template< int XDiffOrder = 0,
@@ -108,8 +104,6 @@ class Cylinder< 3, Real > : public CylinderBase< Real, 3 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Cylinder();
 
       template< int XDiffOrder = 0,
diff --git a/src/TNL/Functions/Analytic/Cylinder_impl.h b/src/TNL/Functions/Analytic/Cylinder_impl.h
index b0698bca32056610195f5a9ab23c3e603455e1e5..b76286580c8c13a13c5ffa56fa29ef7943da1fe0 100644
--- a/src/TNL/Functions/Analytic/Cylinder_impl.h
+++ b/src/TNL/Functions/Analytic/Cylinder_impl.h
@@ -47,13 +47,6 @@ const Real& CylinderBase< Real, Dimension >::getDiameter() const
  * 1D
  */
 
-template< typename Real >
-String
-Cylinder< 1, Real >::getType()
-{
-   return "Functions::Analytic::Cylinder< 1, " + TNL::getType< Real >() + String( " >" );
-}
-
 template< typename Real >
 Cylinder< 1, Real >::Cylinder()
 {
@@ -91,13 +84,6 @@ operator()( const PointType& v,
  * 2D
  */
 
-template< typename Real >
-String
-Cylinder< 2, Real >::getType()
-{
-   return String( "Functions::Analytic::Cylinder< 2, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 Cylinder< 2, Real >::Cylinder()
 {
@@ -137,14 +123,6 @@ operator()( const PointType& v,
 /****
  * 3D
  */
-
-template< typename Real >
-String
-Cylinder< 3, Real >::getType()
-{
-   return String( "Functions::Analytic::Cylinder< 3, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 Cylinder< 3, Real >::Cylinder()
 {
diff --git a/src/TNL/Functions/Analytic/ExpBump.h b/src/TNL/Functions/Analytic/ExpBump.h
index 36b07c9a27d549532fb055c01c075cf5e30aa8a8..48fc613d9b40278140d08ffd0285097dbd7fd326 100644
--- a/src/TNL/Functions/Analytic/ExpBump.h
+++ b/src/TNL/Functions/Analytic/ExpBump.h
@@ -58,8 +58,6 @@ class ExpBump< 1, Real > : public ExpBumpBase< 1, Real >
       typedef Real RealType;
       typedef Containers::StaticVector< 1, RealType > PointType;
 
-      static String getType();
-
       ExpBump();
 
       template< int XDiffOrder = 0,
@@ -82,8 +80,6 @@ class ExpBump< 2, Real > : public ExpBumpBase< 2, Real >
       typedef Real RealType;
       typedef Containers::StaticVector< 2, RealType > PointType;
 
-      static String getType();
-
       ExpBump();
 
       template< int XDiffOrder = 0,
@@ -106,9 +102,6 @@ class ExpBump< 3, Real > : public ExpBumpBase< 3, Real >
       typedef Real RealType;
       typedef Containers::StaticVector< 3, RealType > PointType;
 
- 
-      static String getType();
-
       ExpBump();
 
       template< int XDiffOrder = 0,
diff --git a/src/TNL/Functions/Analytic/ExpBump_impl.h b/src/TNL/Functions/Analytic/ExpBump_impl.h
index 54ecbe2a66fb011e827c1c876aa385ad4b4eee57..6c1103f02ae918cdf35563ccc86480d045d23598 100644
--- a/src/TNL/Functions/Analytic/ExpBump_impl.h
+++ b/src/TNL/Functions/Analytic/ExpBump_impl.h
@@ -63,13 +63,6 @@ const Real& ExpBumpBase< dimensions, Real >::getSigma() const
  * 1D
  */
 
-template< typename Real >
-String
-ExpBump< 1, Real >::getType()
-{
-   return "Functions::Analytic::ExpBump< 1, " + TNL::getType< Real >() + String( " >" );
-}
-
 template< typename Real >
 ExpBump< 1, Real >::ExpBump()
 {
@@ -113,13 +106,6 @@ operator()( const PointType& v,
  * 2D
  */
 
-template< typename Real >
-String
-ExpBump< 2, Real >::getType()
-{
-   return String( "Functions::Analytic::ExpBump< 2, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 ExpBump< 2, Real >::ExpBump()
 {
@@ -168,13 +154,6 @@ operator()( const PointType& v,
  * 3D
  */
 
-template< typename Real >
-String
-ExpBump< 3, Real >::getType()
-{
-   return String( "Functions::Analytic::ExpBump< 3, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 ExpBump< 3, Real >::ExpBump()
 {
diff --git a/src/TNL/Functions/Analytic/Flowerpot.h b/src/TNL/Functions/Analytic/Flowerpot.h
index 5a42c5f94249aa69c320959a9713f13f88beec56..f33d32b1899fc9df026e10cd34a841e8b3d32023 100644
--- a/src/TNL/Functions/Analytic/Flowerpot.h
+++ b/src/TNL/Functions/Analytic/Flowerpot.h
@@ -54,8 +54,6 @@ class Flowerpot< 1, Real > : public FlowerpotBase< Real, 1 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Flowerpot();
 
       template< int XDiffOrder = 0,
@@ -81,8 +79,6 @@ class Flowerpot< 2, Real > : public FlowerpotBase< Real, 2 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Flowerpot();
 
       template< int XDiffOrder = 0,
@@ -108,8 +104,6 @@ class Flowerpot< 3, Real > : public FlowerpotBase< Real, 3 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Flowerpot();
 
       template< int XDiffOrder = 0,
diff --git a/src/TNL/Functions/Analytic/Flowerpot_impl.h b/src/TNL/Functions/Analytic/Flowerpot_impl.h
index 455b4682b29780bdf526b45adf9b228c7c225073..6769a794b2e7882706d3c7f1f2c3ed11ba655a83 100644
--- a/src/TNL/Functions/Analytic/Flowerpot_impl.h
+++ b/src/TNL/Functions/Analytic/Flowerpot_impl.h
@@ -45,13 +45,6 @@ const Real& FlowerpotBase< Real, Dimension >::getDiameter() const
  * 1D
  */
 
-template< typename Real >
-String
-Flowerpot< 1, Real >::getType()
-{
-   return "Functions::Analytic::Flowerpot< 1, " + TNL::getType< Real >() + String( " >" );
-}
-
 template< typename Real >
 Flowerpot< 1, Real >::Flowerpot()
 {
@@ -89,13 +82,6 @@ operator()( const PointType& v,
 /****
  * 2D
  */
-template< typename Real >
-String
-Flowerpot< 2, Real >::getType()
-{
-   return String( "Functions::Analytic::Flowerpot< 2, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 Flowerpot< 2, Real >::Flowerpot()
 {
@@ -136,13 +122,6 @@ operator()( const PointType& v,
  * 3D
  */
 
-template< typename Real >
-String
-Flowerpot< 3, Real >::getType()
-{
-   return String( "Functions::Analytic::Flowerpot< 3, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 Flowerpot< 3, Real >::Flowerpot()
 {
diff --git a/src/TNL/Functions/Analytic/PseudoSquare.h b/src/TNL/Functions/Analytic/PseudoSquare.h
index ea4a5ae84e7e306560e67c74f40075cd3cc5a883..1139f6ed83462e1fef9c72a749332f05f39410d5 100644
--- a/src/TNL/Functions/Analytic/PseudoSquare.h
+++ b/src/TNL/Functions/Analytic/PseudoSquare.h
@@ -50,8 +50,6 @@ class PseudoSquare< 1, Real > : public PseudoSquareBase< Real, 1 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       PseudoSquare();
 
       template< int XDiffOrder = 0,
@@ -75,8 +73,6 @@ class PseudoSquare< 2, Real > : public PseudoSquareBase< Real, 2 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       PseudoSquare();
 
       template< int XDiffOrder = 0,
@@ -100,8 +96,6 @@ class PseudoSquare< 3, Real > : public PseudoSquareBase< Real, 3 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       PseudoSquare();
 
       template< int XDiffOrder = 0,
diff --git a/src/TNL/Functions/Analytic/PseudoSquare_impl.h b/src/TNL/Functions/Analytic/PseudoSquare_impl.h
index 5da33707a43307bf4b343384e924f15b5a1518b6..18edb0d347709369a750c3114dc1884a912b9d84 100644
--- a/src/TNL/Functions/Analytic/PseudoSquare_impl.h
+++ b/src/TNL/Functions/Analytic/PseudoSquare_impl.h
@@ -33,13 +33,6 @@ setup( const Config::ParameterContainer& parameters,
  * 1D
  */
 
-template< typename Real >
-String
-PseudoSquare< 1, Real >::getType()
-{
-   return "Functions::Analytic::PseudoSquare< 1, " + TNL::getType< Real >() + String( " >" );
-}
-
 template< typename Real >
 PseudoSquare< 1, Real >::PseudoSquare()
 {
@@ -76,13 +69,6 @@ operator()( const PointType& v,
 /****
  * 2D
  */
-template< typename Real >
-String
-PseudoSquare< 2, Real >::getType()
-{
-   return String( "Functions::Analytic::PseudoSquare< 2, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 PseudoSquare< 2, Real >::PseudoSquare()
 {
@@ -120,13 +106,6 @@ operator()( const PointType& v,
 /****
  * 3D
  */
-template< typename Real >
-String
-PseudoSquare< 3, Real >::getType()
-{
-   return String( "Functions::Analytic::PseudoSquare< 3, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 PseudoSquare< 3, Real >::PseudoSquare()
 {
diff --git a/src/TNL/Functions/Analytic/Twins.h b/src/TNL/Functions/Analytic/Twins.h
index c882ec4eb133c326195151b9d6db07098bec3735..775caf391c2b0f37dbabefbb48a07c80fdcea69d 100644
--- a/src/TNL/Functions/Analytic/Twins.h
+++ b/src/TNL/Functions/Analytic/Twins.h
@@ -46,8 +46,6 @@ class Twins< 1, Real > : public TwinsBase< Real, 1 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Twins();
 
       template< int XDiffOrder = 0,
@@ -73,8 +71,6 @@ class Twins< 2, Real > : public TwinsBase< Real, 2 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Twins();
 
       template< int XDiffOrder = 0,
@@ -100,8 +96,6 @@ class Twins< 3, Real > : public TwinsBase< Real, 3 >
       typedef Real RealType;
       typedef Containers::StaticVector< Dimension, Real > PointType;
 
-      static String getType();
-
       Twins();
 
       template< int XDiffOrder = 0,
diff --git a/src/TNL/Functions/Analytic/Twins_impl.h b/src/TNL/Functions/Analytic/Twins_impl.h
index 9e1cd81c185748cce2f038e0c55f154157056751..7b2ce41c7c2543a20d7b3750f406be241fca490c 100644
--- a/src/TNL/Functions/Analytic/Twins_impl.h
+++ b/src/TNL/Functions/Analytic/Twins_impl.h
@@ -31,13 +31,6 @@ setup( const Config::ParameterContainer& parameters,
  * 1D
  */
 
-template< typename Real >
-String
-Twins< 1, Real >::getType()
-{
-   return "Functions::Analytic::Twins< 1, " + TNL::getType< Real >() + String( " >" );
-}
-
 template< typename Real >
 Twins< 1, Real >::Twins()
 {
@@ -75,13 +68,6 @@ operator()( const PointType& v,
 /****
  * 2D
  */
-template< typename Real >
-String
-Twins< 2, Real >::getType()
-{
-   return String( "Functions::Analytic::Twins< 2, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 Twins< 2, Real >::Twins()
 {
@@ -121,13 +107,6 @@ operator()( const PointType& v,
 /****
  * 3D
  */
-template< typename Real >
-String
-Twins< 3, Real >::getType()
-{
-   return String( "Functions::Analytic::Twins< 3, " ) + TNL::getType< Real >() + " >";
-}
-
 template< typename Real >
 Twins< 3, Real >::Twins()
 {
diff --git a/src/TNL/Functions/Analytic/VectorNorm.h b/src/TNL/Functions/Analytic/VectorNorm.h
index a9d292c5f625a22e15115fe6a69d1f5903525a91..583f3eebcb22fa815fa2624dbd73de3073498c73 100644
--- a/src/TNL/Functions/Analytic/VectorNorm.h
+++ b/src/TNL/Functions/Analytic/VectorNorm.h
@@ -144,8 +144,6 @@ class VectorNorm< 1, Real > : public VectorNormBase< 1, Real >
       using typename BaseType::RealType;
       using typename BaseType::PointType;
 
-      static String getType();
-
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
@@ -184,8 +182,6 @@ class VectorNorm< 2, Real > : public VectorNormBase< 2, Real >
       using typename BaseType::RealType;
       using typename BaseType::PointType;
 
-      static String getType();
-
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
@@ -232,8 +228,6 @@ class VectorNorm< 3, Real > : public VectorNormBase< 3, Real >
       using typename BaseType::RealType;
       using typename BaseType::PointType;
 
-      static String getType();
-
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
diff --git a/src/TNL/Functions/CutMeshFunction.h b/src/TNL/Functions/CutMeshFunction.h
index 4cad00d1b9f87a8c241422c2b29f4acd36d8458e..e727b15fcd92313ebc1010cebe32f5b4a74b35d2 100644
--- a/src/TNL/Functions/CutMeshFunction.h
+++ b/src/TNL/Functions/CutMeshFunction.h
@@ -11,7 +11,7 @@
 #pragma once
 
 #include <TNL/Functions/MeshFunction.h>
-#include <TNL/StaticVectorFor.h>
+#include <TNL/Algorithms/StaticVectorFor.h>
 #include <TNL/Containers/StaticVector.h>
 
 namespace TNL {
@@ -101,7 +101,7 @@ class CutMeshFunction
 
             typename OutMesh::CoordinatesType starts;
             starts.setValue(0);
-            StaticVectorFor::exec(starts,outMesh.getDimensions(),kernel);
+            Algorithms::StaticVectorFor::exec(starts,outMesh.getDimensions(),kernel);
         }
 
         return inCut;
diff --git a/src/TNL/Functions/FunctionAdapter.h b/src/TNL/Functions/FunctionAdapter.h
index b9c35886689bf254eccfd4154469be71462107d3..b763ee47631d10522038b8dad9f927e83bc37a88 100644
--- a/src/TNL/Functions/FunctionAdapter.h
+++ b/src/TNL/Functions/FunctionAdapter.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Devices/CudaCallable.h>
+#include <TNL/Cuda/CudaCallable.h>
 #include <TNL/Config/ParameterContainer.h>
 #include <TNL/Functions/Domain.h>
 
diff --git a/src/TNL/Functions/MeshFunction.h b/src/TNL/Functions/MeshFunction.h
index f7d6749c9ab1bd7a8a850da97fc2b7d344f43dcd..2b7069c0fa45808d2e04ddf09709fe310cfdb18f 100644
--- a/src/TNL/Functions/MeshFunction.h
+++ b/src/TNL/Functions/MeshFunction.h
@@ -60,10 +60,6 @@ class MeshFunction :
                     Pointers::SharedPointer<  Vector >& data,
                     const IndexType& offset = 0 );
 
-      static String getType();
-
-      String getTypeVirtual() const;
-
       static String getSerializationType();
 
       virtual String getSerializationTypeVirtual() const;
diff --git a/src/TNL/Functions/MeshFunctionGnuplotWriter.h b/src/TNL/Functions/MeshFunctionGnuplotWriter.h
index d747e84a75d6c75d4029b49761ad17efdaf72368..244146ff6d9eea06068f7ac1b61379236fac7e02 100644
--- a/src/TNL/Functions/MeshFunctionGnuplotWriter.h
+++ b/src/TNL/Functions/MeshFunctionGnuplotWriter.h
@@ -68,11 +68,10 @@ template< typename MeshFunction,
 class MeshFunctionGnuplotWriter
 : public MeshFunctionGnuplotWriterBase
 {
-   public:
-
-      using MeshType = typename MeshFunction::MeshType;
-      using EntityType = typename MeshType::template EntityType< MeshFunction::getEntitiesDimension() >;
-      using GlobalIndex = typename MeshType::GlobalIndexType;
+public:
+   using MeshType = typename MeshFunction::MeshType;
+   using EntityType = typename MeshType::template EntityType< MeshFunction::getEntitiesDimension() >;
+   using GlobalIndex = typename MeshType::GlobalIndexType;
 
    static bool write( const MeshFunction& function,
                       std::ostream& str,
@@ -99,11 +98,10 @@ template< typename MeshFunction,
 class MeshFunctionGnuplotWriter< MeshFunction, Meshes::Grid< 2, Real, Device, Index >, EntityDimension >
 : public MeshFunctionGnuplotWriterBase
 {
-   public:
-
-      using MeshType = typename MeshFunction::MeshType;
-      using EntityType = typename MeshType::template EntityType< MeshFunction::getEntitiesDimension() >;
-      using GlobalIndex = typename MeshType::GlobalIndexType;
+public:
+   using MeshType = typename MeshFunction::MeshType;
+   using EntityType = typename MeshType::template EntityType< MeshFunction::getEntitiesDimension() >;
+   using GlobalIndex = typename MeshType::GlobalIndexType;
 
    static bool write( const MeshFunction& function,
                       std::ostream& str,
@@ -137,11 +135,10 @@ template< typename MeshFunction,
 class MeshFunctionGnuplotWriter< MeshFunction, Meshes::Grid< 3, Real, Device, Index >, EntityDimension >
 : public MeshFunctionGnuplotWriterBase
 {
-   public:
-
-      using MeshType = typename MeshFunction::MeshType;
-      using EntityType = typename MeshType::template EntityType< MeshFunction::getEntitiesDimension() >;
-      using GlobalIndex = typename MeshType::GlobalIndexType;
+public:
+   using MeshType = typename MeshFunction::MeshType;
+   using EntityType = typename MeshType::template EntityType< MeshFunction::getEntitiesDimension() >;
+   using GlobalIndex = typename MeshType::GlobalIndexType;
 
    static bool write( const MeshFunction& function,
                       std::ostream& str,
@@ -167,6 +164,5 @@ class MeshFunctionGnuplotWriter< MeshFunction, Meshes::Grid< 3, Real, Device, In
    }
 };
 
-
 } // namespace Functions
 } // namespace TNL
diff --git a/src/TNL/Functions/MeshFunctionVTKWriter.h b/src/TNL/Functions/MeshFunctionVTKWriter.h
index 78608de7461dc5510d280d29b9f7c329836e3eb8..201178c61197da4941f3c06af7914ec2428245b6 100644
--- a/src/TNL/Functions/MeshFunctionVTKWriter.h
+++ b/src/TNL/Functions/MeshFunctionVTKWriter.h
@@ -13,7 +13,7 @@
 #include <TNL/Meshes/Writers/VTKWriter.h>
 
 namespace TNL {
-namespace Functions {   
+namespace Functions {
 
 template< typename MeshFunction >
 class MeshFunctionVTKWriter
diff --git a/src/TNL/Functions/MeshFunction_impl.h b/src/TNL/Functions/MeshFunction_impl.h
index 908a31a09d5a69fa629ef0690df77ba00c17ea31..0131cbb25d653730f429306f8ae1a7226e4f41b9 100644
--- a/src/TNL/Functions/MeshFunction_impl.h
+++ b/src/TNL/Functions/MeshFunction_impl.h
@@ -92,30 +92,6 @@ MeshFunction( const MeshPointer& meshPointer,
    this->data.bind( *data, offset, getMesh().template getEntitiesCount< typename Mesh::template EntityType< MeshEntityDimension > >() );
 }
 
-template< typename Mesh,
-          int MeshEntityDimension,
-          typename Real >
-String
-MeshFunction< Mesh, MeshEntityDimension, Real >::
-getType()
-{
-   return String( "Functions::MeshFunction< " ) +
-                     Mesh::getType() + ", " +
-                     convertToString( MeshEntityDimension ) + ", " +
-                    TNL::getType< Real >() +
-                     " >";
-};
-
-template< typename Mesh,
-          int MeshEntityDimension,
-          typename Real >
-String
-MeshFunction< Mesh, MeshEntityDimension, Real >::
-getTypeVirtual() const
-{
-   return this->getType();
-};
-
 template< typename Mesh,
           int MeshEntityDimension,
           typename Real >
@@ -124,10 +100,10 @@ MeshFunction< Mesh, MeshEntityDimension, Real >::
 getSerializationType()
 {
    return String( "Functions::MeshFunction< " ) +
-                     Mesh::getSerializationType() + ", " +
-                     convertToString( MeshEntityDimension ) + ", " +
-                    TNL::getType< Real >() +
-                     " >";
+          TNL::getSerializationType< Mesh >() + ", " +
+          convertToString( MeshEntityDimension ) + ", " +
+          getType< Real >() +
+          " >";
 };
 
 template< typename Mesh,
diff --git a/src/TNL/Functions/OperatorFunction.h b/src/TNL/Functions/OperatorFunction.h
index 1f1e89b029f5d5e816e6f6df4bc6d9e9d27bb377..cc46d557a10cbfc315d04275d613de0ce679dce0 100644
--- a/src/TNL/Functions/OperatorFunction.h
+++ b/src/TNL/Functions/OperatorFunction.h
@@ -11,7 +11,7 @@
 #pragma once
 
 #include <type_traits>
-#include <TNL/Devices/CudaCallable.h>
+#include <TNL/Cuda/CudaCallable.h>
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Solvers/PDE/BoundaryConditionsSetter.h>
 
diff --git a/src/TNL/Functions/TestFunction_impl.h b/src/TNL/Functions/TestFunction_impl.h
index e2bdce1f1c4a72848e82f10d9c270099121c28b7..918f24107d0e4a27a24d414d6cdcbea4f885cb45 100644
--- a/src/TNL/Functions/TestFunction_impl.h
+++ b/src/TNL/Functions/TestFunction_impl.h
@@ -11,6 +11,8 @@
 #pragma once
 
 #include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/MemoryHelpers.h>
+
 #include <TNL/Functions/Analytic/Constant.h>
 #include <TNL/Functions/Analytic/ExpBump.h>
 #include <TNL/Functions/Analytic/SinBumps.h>
@@ -137,7 +139,7 @@ setupFunction( const Config::ParameterContainer& parameters,
    }
    if( std::is_same< Device, Devices::Cuda >::value )
    {
-      this->function = Devices::Cuda::passToDevice( *auxFunction );
+      this->function = Cuda::passToDevice( *auxFunction );
       delete auxFunction;
       TNL_CHECK_CUDA_DEVICE;
    }
@@ -166,7 +168,7 @@ setupOperator( const Config::ParameterContainer& parameters,
    }
    if( std::is_same< Device, Devices::Cuda >::value )
    {
-      this->operator_ = Devices::Cuda::passToDevice( *auxOperator );
+      this->operator_ = Cuda::passToDevice( *auxOperator );
       delete auxOperator;
       TNL_CHECK_CUDA_DEVICE;
    }
@@ -736,7 +738,7 @@ deleteFunction()
    if( std::is_same< Device, Devices::Cuda >::value )
    {
       if( function )
-         Devices::Cuda::freeFromDevice( ( FunctionType * ) function );
+         Cuda::freeFromDevice( ( FunctionType * ) function );
    }
 }
 
@@ -756,7 +758,7 @@ deleteOperator()
    if( std::is_same< Device, Devices::Cuda >::value )
    {
       if( operator_ )
-         Devices::Cuda::freeFromDevice( ( OperatorType * ) operator_ );
+         Cuda::freeFromDevice( ( OperatorType * ) operator_ );
    }
 }
 
@@ -912,7 +914,7 @@ printFunction( std::ostream& str ) const
    }
    if( std::is_same< Device, Devices::Cuda >::value )
    {
-      Devices::Cuda::print( f, str );
+      Cuda::print( f, str );
       return str;
    }
 }
diff --git a/src/TNL/Functions/VectorField.h b/src/TNL/Functions/VectorField.h
index 4db601c9f4ccd9c003c46f41a112501953a73a76..4f06cd368f5ed55ef303cc53bcaadf3db7332fd1 100644
--- a/src/TNL/Functions/VectorField.h
+++ b/src/TNL/Functions/VectorField.h
@@ -113,19 +113,6 @@ class VectorField< Size, MeshFunction< Mesh, MeshEntityDimension, Real > >
             this->vectorField[ i ]->setMesh( meshPointer );
       };
       
-      static String getType()
-      {
-         return String( "Functions::VectorField< " ) +
-                  convertToString( Size) + ", " +
-                 FunctionType::getType() +
-                  " >";
-      }
- 
-      String getTypeVirtual() const
-      {
-         return this->getType();
-      }
- 
       static String getSerializationType()
       {
          return String( "Functions::VectorField< " ) +
diff --git a/src/TNL/Functions/VectorFieldGnuplotWriter.h b/src/TNL/Functions/VectorFieldGnuplotWriter.h
index 41b59d511d680568d62ed545a8f230efc43dd575..a1a63883e8387b1bbe94cb463ae2d39286105570 100644
--- a/src/TNL/Functions/VectorFieldGnuplotWriter.h
+++ b/src/TNL/Functions/VectorFieldGnuplotWriter.h
@@ -16,15 +16,15 @@ namespace TNL {
 namespace Functions {
 
 template< int, typename > class VectorField;
+template< typename, int, typename > class MeshFunction;
 
 template< typename VectorField >
 class VectorFieldGnuplotWriter
 {
-   public:
-
-      static bool write( const VectorField& function,
-                         std::ostream& str,
-                         const double& scale );
+public:
+   static bool write( const VectorField& function,
+                      std::ostream& str,
+                      const double& scale );
 };
 
 /***
@@ -37,14 +37,14 @@ template< typename MeshReal,
           int VectorFieldSize >
 class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 1, Real > > >
 {
-   public:
-      typedef Meshes::Grid< 1, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 1, RealType > > VectorFieldType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
+public:
+   using MeshType = Meshes::Grid< 1, MeshReal, Device, MeshIndex >;
+   using RealType = Real;
+   using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 1, RealType > >;
+
+   static bool write( const VectorFieldType& function,
+                      std::ostream& str,
+                      const double& scale );
 };
 
 /***
@@ -57,14 +57,14 @@ template< typename MeshReal,
           int VectorFieldSize >
 class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 0, Real > > >
 {
-   public:
-      typedef Meshes::Grid< 1, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > > VectorFieldType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
+public:
+   using MeshType = Meshes::Grid< 1, MeshReal, Device, MeshIndex >;
+   using RealType = Real;
+   using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > >;
+
+   static bool write( const VectorFieldType& function,
+                      std::ostream& str,
+                      const double& scale );
 };
 
 
@@ -78,14 +78,14 @@ template< typename MeshReal,
           int VectorFieldSize >
 class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 2, Real > > >
 {
-   public:
-      typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 2, RealType > > VectorFieldType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
+public:
+   using MeshType = Meshes::Grid< 2, MeshReal, Device, MeshIndex >;
+   using RealType = Real;
+   using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 2, RealType > >;
+
+   static bool write( const VectorFieldType& function,
+                      std::ostream& str,
+                      const double& scale );
 };
 
 /***
@@ -98,14 +98,14 @@ template< typename MeshReal,
           int VectorFieldSize >
 class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 1, Real > > >
 {
-   public:
-      typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 1, RealType > > VectorFieldType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
+public:
+   using MeshType = Meshes::Grid< 2, MeshReal, Device, MeshIndex >;
+   using RealType = Real;
+   using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 1, RealType > >;
+
+   static bool write( const VectorFieldType& function,
+                      std::ostream& str,
+                      const double& scale );
 };
 
 /***
@@ -118,14 +118,14 @@ template< typename MeshReal,
           int VectorFieldSize >
 class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 0, Real > > >
 {
-   public:
-      typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > > VectorFieldType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
+public:
+   using MeshType = Meshes::Grid< 2, MeshReal, Device, MeshIndex >;
+   using RealType = Real;
+   using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > >;
+
+   static bool write( const VectorFieldType& function,
+                      std::ostream& str,
+                      const double& scale );
 };
 
 
@@ -139,14 +139,14 @@ template< typename MeshReal,
           int VectorFieldSize >
 class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 3, Real > > >
 {
-   public:
-      typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 3, RealType > > VectorFieldType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
+public:
+   using MeshType = Meshes::Grid< 3, MeshReal, Device, MeshIndex >;
+   using RealType = Real;
+   using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 3, RealType > >;
+
+   static bool write( const VectorFieldType& function,
+                      std::ostream& str,
+                      const double& scale );
 };
 
 /***
@@ -159,14 +159,14 @@ template< typename MeshReal,
           int VectorFieldSize >
 class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 2, Real > > >
 {
-   public:
-      typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 2, RealType > > VectorFieldType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
+public:
+   using MeshType = Meshes::Grid< 3, MeshReal, Device, MeshIndex >;
+   using RealType = Real;
+   using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 2, RealType > >;
+
+   static bool write( const VectorFieldType& function,
+                      std::ostream& str,
+                      const double& scale );
 };
 
 /***
@@ -179,14 +179,14 @@ template< typename MeshReal,
           int VectorFieldSize >
 class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 0, Real > > >
 {
-   public:
-      typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > > VectorFieldType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
+public:
+   using MeshType = Meshes::Grid< 3, MeshReal, Device, MeshIndex >;
+   using RealType = Real;
+   using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > >;
+
+   static bool write( const VectorFieldType& function,
+                      std::ostream& str,
+                      const double& scale );
 };
 
 } // namespace Functions
diff --git a/src/TNL/Functions/VectorFieldGnuplotWriter_impl.h b/src/TNL/Functions/VectorFieldGnuplotWriter_impl.h
index 500bdc4d8fbb0d3ece8c94ec6938c4c81a51b1c8..151ad5e7b178b7b0768ea794dbe0e03ca53b2f9a 100644
--- a/src/TNL/Functions/VectorFieldGnuplotWriter_impl.h
+++ b/src/TNL/Functions/VectorFieldGnuplotWriter_impl.h
@@ -23,7 +23,7 @@ write( const VectorField& vectorField,
        std::ostream& str,
        const double& scale  )
 {
-   std::cerr << "Gnuplot writer for mesh vectorFields defined on mesh type " << VectorField::MeshType::getType() << " is not (yet) implemented." << std::endl;
+   std::cerr << "Gnuplot writer for mesh vectorFields defined on mesh type " << getType< typename VectorField::MeshType >() << " is not (yet) implemented." << std::endl;
    return false;
 }
 
@@ -43,9 +43,8 @@ write( const VectorFieldType& vectorField,
 {
    const MeshType& mesh = vectorField.getMesh();
    typename MeshType::Cell entity( mesh );
-   for( entity.getCoordinates().x() = 0;
-        entity.getCoordinates().x() < mesh.getDimensions().x();
-        entity.getCoordinates().x() ++ )
+   auto& c = entity.getCoordinates();
+   for( c.x() = 0; c.x() < mesh.getDimensions().x(); c.x()++ )
    {
       entity.refresh();
       typename MeshType::PointType v = entity.getCenter();
@@ -73,9 +72,8 @@ write( const VectorFieldType& vectorField,
 {
    const MeshType& mesh = vectorField.getMesh();
    typename MeshType::Vertex entity( mesh );
-   for( entity.getCoordinates().x() = 0;
-        entity.getCoordinates().x() <= mesh.getDimensions().x();
-        entity.getCoordinates().x() ++ )
+   auto& c = entity.getCoordinates();
+   for( c.x() = 0; c.x() <= mesh.getDimensions().x(); c.x()++ )
    {
       entity.refresh();
       typename MeshType::PointType v = entity.getCenter();
@@ -104,13 +102,10 @@ write( const VectorFieldType& vectorField,
 {
    const MeshType& mesh = vectorField.getMesh();
    typename MeshType::Cell entity( mesh );
-   for( entity.getCoordinates().y() = 0;
-        entity.getCoordinates().y() < mesh.getDimensions().y();
-        entity.getCoordinates().y() ++ )
+   auto& c = entity.getCoordinates();
+   for( c.y() = 0; c.y() < mesh.getDimensions().y(); c.y()++ )
    {
-      for( entity.getCoordinates().x() = 0;
-           entity.getCoordinates().x() < mesh.getDimensions().x();
-           entity.getCoordinates().x() ++ )
+      for( c.x() = 0; c.x() < mesh.getDimensions().x(); c.x()++ )
       {
          entity.refresh();
          typename MeshType::PointType v = entity.getCenter();
@@ -142,15 +137,12 @@ write( const VectorFieldType& vectorField,
    typedef typename MeshType::Face EntityType;
    typedef typename EntityType::EntityOrientationType EntityOrientation;
    EntityType entity( mesh );
+   auto& c = entity.getCoordinates();
 
    entity.setOrientation( EntityOrientation( 1.0, 0.0 ) );
-   for( entity.getCoordinates().y() = 0;
-        entity.getCoordinates().y() < mesh.getDimensions().y();
-        entity.getCoordinates().y() ++ )
+   for( c.y() = 0; c.y() < mesh.getDimensions().y(); c.y()++ )
    {
-      for( entity.getCoordinates().x() = 0;
-           entity.getCoordinates().x() <= mesh.getDimensions().x();
-           entity.getCoordinates().x() ++ )
+      for( c.x() = 0; c.x() <= mesh.getDimensions().x(); c.x()++ )
       {
          entity.refresh();
          typename MeshType::PointType v = entity.getCenter();
@@ -163,15 +155,9 @@ write( const VectorFieldType& vectorField,
    }
 
    entity.setOrientation( EntityOrientation( 0.0, 1.0 ) );
-         for( entity.getCoordinates().x() = 0;
-           entity.getCoordinates().x() < mesh.getDimensions().x();
-           entity.getCoordinates().x() ++ )
-
+   for( c.x() = 0; c.x() < mesh.getDimensions().x(); c.x()++ )
    {
-            for( entity.getCoordinates().y() = 0;
-        entity.getCoordinates().y() <= mesh.getDimensions().y();
-        entity.getCoordinates().y() ++ )
-
+      for( c.y() = 0; c.y() <= mesh.getDimensions().y(); c.y()++ )
       {
          entity.refresh();
          typename MeshType::PointType v = entity.getCenter();
@@ -202,13 +188,10 @@ write( const VectorFieldType& vectorField,
 {
    const MeshType& mesh = vectorField.getMesh();
    typename MeshType::Vertex entity( mesh );
-   for( entity.getCoordinates().y() = 0;
-        entity.getCoordinates().y() <= mesh.getDimensions().y();
-        entity.getCoordinates().y() ++ )
+   auto& c = entity.getCoordinates();
+   for( c.y() = 0; c.y() <= mesh.getDimensions().y(); c.y()++ )
    {
-      for( entity.getCoordinates().x() = 0;
-           entity.getCoordinates().x() <= mesh.getDimensions().x();
-           entity.getCoordinates().x() ++ )
+      for( c.x() = 0; c.x() <= mesh.getDimensions().x(); c.x()++ )
       {
          entity.refresh();
          typename MeshType::PointType v = entity.getCenter();
@@ -239,16 +222,11 @@ write( const VectorFieldType& vectorField,
 {
    const MeshType& mesh = vectorField.getMesh();
    typename MeshType::Cell entity( mesh );
-   for( entity.getCoordinates().z() = 0;
-        entity.getCoordinates().z() < mesh.getDimensions().z();
-        entity.getCoordinates().z() ++ )
-      for( entity.getCoordinates().y() = 0;
-           entity.getCoordinates().y() < mesh.getDimensions().y();
-           entity.getCoordinates().y() ++ )
+   auto& c = entity.getCoordinates();
+   for( c.z() = 0; c.z() < mesh.getDimensions().z(); c.z()++ )
+      for( c.y() = 0; c.y() < mesh.getDimensions().y(); c.y()++ )
       {
-         for( entity.getCoordinates().x() = 0;
-              entity.getCoordinates().x() < mesh.getDimensions().x();
-              entity.getCoordinates().x() ++ )
+         for( c.x() = 0; c.x() < mesh.getDimensions().x(); c.x()++ )
          {
             entity.refresh();
             typename MeshType::PointType v = entity.getCenter();
@@ -280,18 +258,13 @@ write( const VectorFieldType& vectorField,
    typedef typename MeshType::Face EntityType;
    typedef typename EntityType::EntityOrientationType EntityOrientation;
    EntityType entity( mesh );
+   auto& c = entity.getCoordinates();
 
    entity.setOrientation( EntityOrientation( 1.0, 0.0, 0.0 ) );
-   for( entity.getCoordinates().z() = 0;
-        entity.getCoordinates().z() < mesh.getDimensions().z();
-        entity.getCoordinates().z() ++ )
-      for( entity.getCoordinates().y() = 0;
-           entity.getCoordinates().y() < mesh.getDimensions().y();
-           entity.getCoordinates().y() ++ )
+   for( c.z() = 0; c.z() < mesh.getDimensions().z(); c.z()++ )
+      for( c.y() = 0; c.y() < mesh.getDimensions().y(); c.y()++ )
       {
-         for( entity.getCoordinates().x() = 0;
-              entity.getCoordinates().x() <= mesh.getDimensions().x();
-              entity.getCoordinates().x() ++ )
+         for( c.x() = 0; c.x() <= mesh.getDimensions().x(); c.x()++ )
          {
             entity.refresh();
             typename MeshType::PointType v = entity.getCenter();
@@ -304,16 +277,10 @@ write( const VectorFieldType& vectorField,
       }
 
    entity.setOrientation( EntityOrientation( 0.0, 1.0, 0.0 ) );
-   for( entity.getCoordinates().z() = 0;
-        entity.getCoordinates().z() < mesh.getDimensions().z();
-        entity.getCoordinates().z() ++ )
-      for( entity.getCoordinates().x() = 0;
-           entity.getCoordinates().x() < mesh.getDimensions().x();
-           entity.getCoordinates().x() ++ )
+   for( c.z() = 0; c.z() < mesh.getDimensions().z(); c.z()++ )
+      for( c.x() = 0; c.x() < mesh.getDimensions().x(); c.x()++ )
       {
-         for( entity.getCoordinates().y() = 0;
-              entity.getCoordinates().y() <= mesh.getDimensions().y();
-              entity.getCoordinates().y() ++ )
+         for( c.y() = 0; c.y() <= mesh.getDimensions().y(); c.y()++ )
          {
             entity.refresh();
             typename MeshType::PointType v = entity.getCenter();
@@ -326,16 +293,10 @@ write( const VectorFieldType& vectorField,
       }
 
    entity.setOrientation( EntityOrientation( 0.0, 0.0, 1.0 ) );
-   for( entity.getCoordinates().x() = 0;
-        entity.getCoordinates().x() < mesh.getDimensions().x();
-        entity.getCoordinates().x() ++ )
-      for( entity.getCoordinates().y() = 0;
-           entity.getCoordinates().y() <= mesh.getDimensions().y();
-           entity.getCoordinates().y() ++ )
+   for( c.x() = 0; c.x() < mesh.getDimensions().x(); c.x()++ )
+      for( c.y() = 0; c.y() <= mesh.getDimensions().y(); c.y()++ )
       {
-         for( entity.getCoordinates().z() = 0;
-              entity.getCoordinates().z() < mesh.getDimensions().z();
-              entity.getCoordinates().z() ++ )
+         for( c.z() = 0; c.z() < mesh.getDimensions().z(); c.z()++ )
          {
             entity.refresh();
             typename MeshType::PointType v = entity.getCenter();
@@ -366,16 +327,11 @@ write( const VectorFieldType& vectorField,
 {
    const MeshType& mesh = vectorField.getMesh();
    typename MeshType::Vertex entity( mesh );
-   for( entity.getCoordinates().z() = 0;
-        entity.getCoordinates().z() <= mesh.getDimensions().z();
-        entity.getCoordinates().z() ++ )
-      for( entity.getCoordinates().y() = 0;
-           entity.getCoordinates().y() <= mesh.getDimensions().y();
-           entity.getCoordinates().y() ++ )
+   auto& c = entity.getCoordinates();
+   for( c.z() = 0; c.z() <= mesh.getDimensions().z(); c.z()++ )
+      for( c.y() = 0; c.y() <= mesh.getDimensions().y(); c.y()++ )
       {
-         for( entity.getCoordinates().x() = 0;
-              entity.getCoordinates().x() <= mesh.getDimensions().x();
-              entity.getCoordinates().x() ++ )
+         for( c.x() = 0; c.x() <= mesh.getDimensions().x(); c.x()++ )
          {
             entity.refresh();
             typename MeshType::PointType v = entity.getCenter();
@@ -391,4 +347,3 @@ write( const VectorFieldType& vectorField,
 
 } // namespace Functions
 } // namespace TNL
-
diff --git a/src/TNL/Functions/VectorFieldVTKWriter.h b/src/TNL/Functions/VectorFieldVTKWriter.h
index 6d8b1a8535b25e076c83e688706f37490086c39d..5eceea57fe21d52055294ca3f22a437988e39701 100644
--- a/src/TNL/Functions/VectorFieldVTKWriter.h
+++ b/src/TNL/Functions/VectorFieldVTKWriter.h
@@ -2,7 +2,7 @@
                           VectorFieldVTKWriter.h  -  description
                              -------------------
     begin                : Jan 10, 2018
-    copyright            : (C) 2018 by oberhuber
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
 
@@ -10,255 +10,52 @@
 
 #pragma once
 
-#include <TNL/Meshes/Grid.h>
+#include <TNL/Meshes/Writers/VTKWriter.h>
 
 namespace TNL {
 namespace Functions {
 
-template< int, typename > class VectorField;
-
 template< typename VectorField >
 class VectorFieldVTKWriter
 {
-   public:
-
-      static bool write( const VectorField& vectorField,
-                         std::ostream& str,
-                         const double& scale );
-      
-      static void writeHeader( const VectorField& vectorField,
-                               std::ostream& str ){}
-      
-};
-
-/***
- * 1D grids cells
- */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 1, Real > > >
-{
-   public:
-      typedef Meshes::Grid< 1, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 1, RealType > > VectorFieldType;
-      using VectorType = typename VectorFieldType::VectorType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
-      
-      static void writeHeader( const VectorFieldType& vectorField,
-                               std::ostream& str );
-      
-};
-
-/***
- * 1D grids vertices
- */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 0, Real > > >
-{
-   public:
-      typedef Meshes::Grid< 1, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > > VectorFieldType;
-      using VectorType = typename VectorFieldType::VectorType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
-      
-      static void writeHeader( const VectorFieldType& vectorField,
-                               std::ostream& str );
-      
-};
-
-
-/***
- * 2D grids cells
- */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 2, Real > > >
-{
-   public:
-      typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 2, RealType > > VectorFieldType;
-      using VectorType = typename VectorFieldType::VectorType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
-
-      static void writeHeader( const VectorFieldType& vectorField,
-                               std::ostream& str );
-      
-};
-
-/***
- * 2D grids faces
- */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 1, Real > > >
-{
-   public:
-      typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 1, RealType > > VectorFieldType;
-      using VectorType = typename VectorFieldType::VectorType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
-      
-      static void writeHeader( const VectorFieldType& vectorField,
-                               std::ostream& str );
-      
-};
-
-/***
- * 2D grids vertices
- */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 0, Real > > >
-{
-   public:
-      typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > > VectorFieldType;
-      using VectorType = typename VectorFieldType::VectorType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
-      
-      static void writeHeader( const VectorFieldType& vectorField,
-                               std::ostream& str );
-      
-};
-
-
-/***
- * 3D grids cells
- */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 3, Real > > >
-{
-   public:
-      typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 3, RealType > > VectorFieldType;
-      using VectorType = typename VectorFieldType::VectorType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
-      
-      static void writeHeader( const VectorFieldType& vectorField,
-                               std::ostream& str );
-      
-};
-
-/***
- * 3D grids faces
- */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 2, Real > > >
-{
-   public:
-      typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 2, RealType > > VectorFieldType;
-      using VectorType = typename VectorFieldType::VectorType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
-      
-      static void writeHeader( const VectorFieldType& vectorField,
-                               std::ostream& str );
-      
-};
-
-/***
- * 3D grids edges
- */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 1, Real > > >
-{
-   public:
-      typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 1, RealType > > VectorFieldType;
-      using VectorType = typename VectorFieldType::VectorType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
-      
-      static void writeHeader( const VectorFieldType& vectorField,
-                               std::ostream& str );
-      
-};
-
-/***
- * 3D grids vertices
- */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 0, Real > > >
-{
-   public:
-      typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType;
-      typedef Real RealType;
-      typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > > VectorFieldType;
-      using VectorType = typename VectorFieldType::VectorType;
-
-      static bool write( const VectorFieldType& function,
-                         std::ostream& str,
-                         const double& scale  );
-      
-      static void writeHeader( const VectorFieldType& vectorField,
-                               std::ostream& str );
-      
+   using MeshType = typename VectorField::MeshType;
+   using MeshWriter = Meshes::Writers::VTKWriter< MeshType >;
+   using EntityType = typename MeshType::template EntityType< VectorField::getEntitiesDimension() >;
+   using GlobalIndex = typename MeshType::GlobalIndexType;
+
+public:
+   static bool write( const VectorField& field,
+                      std::ostream& str,
+                      const double& scale = 1.0,
+                      const String& fieldName = "cellVectorFieldValues" )
+   {
+      const MeshType& mesh = field.getMesh();
+      MeshWriter::template writeEntities< VectorField::getEntitiesDimension() >( mesh, str );
+      appendField( field, str, fieldName, scale );
+      return true;
+   }
+
+   // VTK supports writing multiple fields into the same file.
+   // You can call this after 'write', which initializes the mesh entities,
+   // with different field name.
+   static void appendField( const VectorField& field,
+                            std::ostream& str,
+                            const String& fieldName,
+                            const double& scale = 1.0 )
+   {
+      const MeshType& mesh = field.getMesh();
+      const GlobalIndex entitiesCount = mesh.template getEntitiesCount< EntityType >();
+      str << std::endl << "CELL_DATA " << entitiesCount << std::endl;
+      str << "VECTORS " << fieldName << " " << getType< typename VectorField::RealType >() << " 1" << std::endl;
+      for( GlobalIndex i = 0; i < entitiesCount; i++ ) {
+         const typename VectorField::VectorType vector = field.getElement( i );
+         static_assert( VectorField::getVectorDimension() <= 3, "The VTK format supports only up to 3D vector fields." );
+         for( int i = 0; i < 3; i++ )
+            str << scale * ( i < vector.getSize() ? vector[ i ] : 0.0 ) << " ";
+         str << "\n";
+      }
+   }
 };
 
 } // namespace Functions
 } // namespace TNL
-
-#include <TNL/Functions/VectorFieldVTKWriter_impl.h>
diff --git a/src/TNL/Functions/VectorFieldVTKWriter_impl.h b/src/TNL/Functions/VectorFieldVTKWriter_impl.h
deleted file mode 100644
index 938227d22b57f61d2f6c4d5f4b7b13a9044d3aa8..0000000000000000000000000000000000000000
--- a/src/TNL/Functions/VectorFieldVTKWriter_impl.h
+++ /dev/null
@@ -1,881 +0,0 @@
-/***************************************************************************
-                          VectorFieldVTKWriter_impl.h  -  description
-                             -------------------
-    begin                : Jan 10, 2018
-    copyright            : (C) 2018 by oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <TNL/Functions/VectorFieldVTKWriter.h>
-#include <TNL/Functions/VectorField.h>
-
-namespace TNL {
-namespace Functions {   
-
-template< typename VectorField >
-bool
-VectorFieldVTKWriter< VectorField >::
-write( const VectorField& vectorField,
-       std::ostream& str,
-       const double& scale )
-{
-   std::cerr << "VTK writer for vector field defined on mesh type " << VectorField::MeshType::getType() << " is not (yet) implemented." << std::endl;
-   return false;
-}
-
-/****
- * 1D grid, cells
- */
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-void
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 1, Real > > >::
-writeHeader( const VectorFieldType& vectorField,
-             std::ostream& str )
-{
-    const MeshType& mesh = vectorField.getMesh();
-    const typename MeshType::PointType& origin = mesh.getOrigin();
-    const typename MeshType::PointType& proportions = mesh.getProportions();
-    str << "# vtk DataFile Version 2.0" << std::endl;
-    str << "TNL DATA" << std::endl;
-    str << "ASCII" << std::endl;
-    str << "DATASET UNSTRUCTURED_GRID" << std::endl;
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-bool
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 1, Real > > >::
-write( const VectorFieldType& vectorField,
-       std::ostream& str,
-       const double& scale )
-{
-   writeHeader(vectorField, str);
- 
-   const MeshType& mesh = vectorField.getMesh();
-   const RealType origin = mesh.getOrigin().x();
-   const RealType spaceStep = mesh.getSpaceSteps().x();
- 
-   str << "POINTS " << mesh.getDimensions().x() + 1 << " float" << std::endl;
-   for (int i = 0; i <= mesh.getDimensions().x(); i++)
-   {
-       str << origin + i * spaceStep << " 0 0" << std::endl;
-   }
- 
-   str << std::endl << "CELLS " << mesh.getDimensions().x() << " " << mesh.getDimensions().x() * 3 << std::endl;
-   for (int i = 0; i < mesh.getDimensions().x(); i++)
-   {
-       str << "2 " << i << " " << i+1 << std::endl;
-   }
- 
-   str << std::endl << "CELL_TYPES " << mesh.getDimensions().x() << std::endl;
-   for (int i = 0; i < mesh.getDimensions().x(); i++)
-   {
-       str << "3 " << std::endl;
-   }
- 
-   str << std::endl << "CELL_DATA " << mesh.getDimensions().x() << std::endl;
-   str << "VECTORS cellVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl;
-
-   for( MeshIndex i = 0; i < mesh.template getEntitiesCount< typename MeshType::Cell >(); i++ )
-   {
-      typename MeshType::Cell entity = mesh.template getEntity< typename MeshType::Cell >( i );
-      entity.refresh();
-      const VectorType v = vectorField.getElement( entity.getIndex() );
-      for( int i = 0; i < 3; i++ )
-         str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " ";
-      str << std::endl;
-   }
- 
-   return true;
-}
-
-/****
- * 1D grid, vertices
- */
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-void
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 0, Real > > >::
-writeHeader( const VectorFieldType& vectorField,
-             std::ostream& str )
-{
-    const MeshType& mesh = vectorField.getMesh();
-    const typename MeshType::PointType& origin = mesh.getOrigin();
-    const typename MeshType::PointType& proportions = mesh.getProportions();
-    str << "# vtk DataFile Version 2.0" << std::endl;
-    str << "TNL DATA" << std::endl;
-    str << "ASCII" << std::endl;
-    str << "DATASET UNSTRUCTURED_GRID" << std::endl;
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-bool
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 0, Real > > >::
-write( const VectorFieldType& vectorField,
-       std::ostream& str,
-       const double& scale )
-{
-   writeHeader(vectorField, str);
- 
-   const MeshType& mesh = vectorField.getMesh();
-   const RealType origin = mesh.getOrigin().x();
-   const RealType spaceStep = mesh.getSpaceSteps().x();
- 
-   str << "POINTS " << mesh.getDimensions().x() + 1 << " float" << std::endl;
-   for (int i = 0; i < mesh.getDimensions().x() + 1; i++)
-   {
-       str << origin + i * spaceStep << " 0 0" << std::endl;
-   }
- 
-   str << std::endl << "CELLS " << mesh.getDimensions().x() + 1 << " " << ( mesh.getDimensions().x() + 1 ) * 2 << std::endl;
-   for (int i = 0; i < mesh.getDimensions().x() + 1; i++)
-   {
-       str << "1 " << i << std::endl;
-   }
- 
-   str << std::endl << "CELL_TYPES " << mesh.getDimensions().x() + 1 << std::endl;
-   for (int i = 0; i < mesh.getDimensions().x() + 1; i++)
-   {
-       str << "1 " << std::endl;
-   }
- 
-   str << std::endl << "CELL_DATA " << mesh.getDimensions().x() + 1 << std::endl;
-   str << "VECTORS VerticesVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl;
-
-   for( MeshIndex i = 0; i < mesh.template getEntitiesCount< typename MeshType::Vertex >(); i++ )
-   {
-      typename MeshType::Vertex entity = mesh.template getEntity< typename MeshType::Vertex >( i );
-      entity.refresh();
-      const VectorType v = vectorField.getElement( entity.getIndex() );
-      for( int i = 0; i < 3; i++ )
-         str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " ";
-      str << std::endl;
-   }
- 
-   return true;
-}
-
-/****
- * 2D grid, cells
- */
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-void
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 2, Real > > >::
-writeHeader( const VectorFieldType& vectorField,
-             std::ostream& str )
-{
-    const MeshType& mesh = vectorField.getMesh();
-    const typename MeshType::PointType& origin = mesh.getOrigin();
-    const typename MeshType::PointType& proportions = mesh.getProportions();
-    str << "# vtk DataFile Version 2.0" << std::endl;
-    str << "TNL DATA" << std::endl;
-    str << "ASCII" << std::endl;
-    str << "DATASET UNSTRUCTURED_GRID" << std::endl;
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-bool
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 2, Real > > >::
-write( const VectorFieldType& vectorField,
-       std::ostream& str,
-       const double& scale )
-{
-   writeHeader(vectorField, str);
- 
-   const MeshType& mesh = vectorField.getMesh();
-   const RealType originX = mesh.getOrigin().x();
-   const RealType spaceStepX = mesh.getSpaceSteps().x();
-   const RealType originY = mesh.getOrigin().y();
-   const RealType spaceStepY = mesh.getSpaceSteps().y();
-   const MeshIndex verticesCount = mesh.template getEntitiesCount< typename MeshType::Vertex >();
-   const MeshIndex entitiesCount = mesh.template getEntitiesCount< typename MeshType::Cell >();
-
-   str << "POINTS " << verticesCount << " " << getType< RealType >() << std::endl;
-   for (int j = 0; j < mesh.getDimensions().y() + 1; j++)
-   {
-        for (int i = 0; i < mesh.getDimensions().x() + 1; i++)
-        {
-             str << originX + i * spaceStepX << " " << originY + j * spaceStepY << " 0" << std::endl;
-        }
-   }
- 
-   str << std::endl << "CELLS " << entitiesCount << " " << entitiesCount * 5 << std::endl;
-   for (int j = 0; j < mesh.getDimensions().y(); j++)
-   {
-        for (int i = 0; i < mesh.getDimensions().x(); i++)
-        {
-            str << "4 " << j * ( mesh.getDimensions().x() + 1 ) + i << " " << j * ( mesh.getDimensions().x() + 1 )+ i + 1 <<
-                   " " << (j+1) * ( mesh.getDimensions().x() + 1 ) + i << " " << (j+1) * ( mesh.getDimensions().x() + 1 ) + i + 1 << std::endl;
-        }
-   }
- 
-   str << std::endl << "CELL_TYPES " << mesh.getDimensions().x() * mesh.getDimensions().y() << std::endl;
-   for (int i = 0; i < mesh.getDimensions().x()*mesh.getDimensions().y(); i++)
-   {
-       str << "8 " << std::endl;
-   }
- 
-   str << std::endl << "CELL_DATA " << entitiesCount << std::endl;
-   str << "VECTORS cellVectorFieldValues " << getType< RealType >() << std::endl;
-
-   for( MeshIndex i = 0; i < entitiesCount; i++ )
-   {
-      typename MeshType::Cell entity = mesh.template getEntity< typename MeshType::Cell >( i );
-      entity.refresh();
-      const VectorType v = vectorField.getElement( entity.getIndex() );
-      for( int i = 0; i < 3; i++ )
-      {
-         str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 );
-         if( i < 2 )
-            str << " ";
-      }
-      str << std::endl;
-   }
-
-   return true;
-}
-
-/****
- * 2D grid, faces
- */
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-void
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 1, Real > > >::
-writeHeader( const VectorFieldType& vectorField,
-             std::ostream& str )
-{
-    const MeshType& mesh = vectorField.getMesh();
-    const typename MeshType::PointType& origin = mesh.getOrigin();
-    const typename MeshType::PointType& proportions = mesh.getProportions();
-    str << "# vtk DataFile Version 2.0" << std::endl;
-    str << "TNL DATA" << std::endl;
-    str << "ASCII" << std::endl;
-    str << "DATASET UNSTRUCTURED_GRID" << std::endl;
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-bool
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 1, Real > > >::
-write( const VectorFieldType& vectorField,
-       std::ostream& str,
-       const double& scale )
-{
-   typedef typename MeshType::template EntityType< 0 > Vertex;
-   typedef typename MeshType::template EntityType< 1 > Face;
-   writeHeader(vectorField, str);
- 
-   const MeshType& mesh = vectorField.getMesh();
-   const RealType originX = mesh.getOrigin().x();
-   const RealType spaceStepX = mesh.getSpaceSteps().x();
-   const RealType originY = mesh.getOrigin().y();
-   const RealType spaceStepY = mesh.getSpaceSteps().y();
-   const MeshIndex verticesCount = mesh.template getEntitiesCount< typename MeshType::Vertex >();
-   const MeshIndex entitiesCount = mesh.template getEntitiesCount< typename MeshType::Face >();
- 
-   str << "POINTS " << verticesCount << " float" << std::endl;
-   for (int j = 0; j < ( mesh.getDimensions().y() + 1); j++)
-   {
-        for (int i = 0; i < ( mesh.getDimensions().x() + 1 ); i++)
-        {
-             str << originX + i * spaceStepX << " " << originY + j * spaceStepY << " 0" << std::endl;
-        }
-   }
- 
-   str << std::endl << "CELLS " << entitiesCount << " " << entitiesCount * 3 << std::endl;
-   for (int j = 0; j < mesh.getDimensions().y(); j++)
-   {
-        for (int i = 0; i < ( mesh.getDimensions().x() + 1 ); i++)
-        {
-            str << "2 " << j * ( mesh.getDimensions().x() + 1 ) + i << " " << (j+1) * ( mesh.getDimensions().x() + 1 ) + i << std::endl;
-        }
-   }
- 
-   for (int j = 0; j < (mesh.getDimensions().y()+1); j++)
-   {
-        for (int i = 0; i < mesh.getDimensions().x(); i++)
-        {
-            str << "2 " << j * ( mesh.getDimensions().x() + 1 ) + i << " " <<j * ( mesh.getDimensions().x() + 1 ) + i + 1<< std::endl;
-        }
-   }
- 
-   str << std::endl << "CELL_TYPES " << entitiesCount << std::endl;
-   for (int i = 0; i < entitiesCount; i++)
-   {
-       str << "3" << std::endl;
-   }
- 
-   str << std::endl << "CELL_DATA " << entitiesCount << std::endl;
-   str << "VECTORS FaceslVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl;
-
-   for( MeshIndex i = 0; i < entitiesCount; i++ )
-   {
-      typename MeshType::Face entity = mesh.template getEntity< typename MeshType::Face >( i );
-      entity.refresh();
-      const VectorType v = vectorField.getElement( entity.getIndex() );
-      for( int i = 0; i < 3; i++ )
-         str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " ";
-      str << std::endl;
-   }
-
-   return true;
-}
-
-/****
- * 2D grid, vertices
- */
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-void
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 0, Real > > >::
-writeHeader( const VectorFieldType& vectorField,
-             std::ostream& str )
-{
-    const MeshType& mesh = vectorField.getMesh();
-    const typename MeshType::PointType& origin = mesh.getOrigin();
-    const typename MeshType::PointType& proportions = mesh.getProportions();
-    str << "# vtk DataFile Version 2.0" << std::endl;
-    str << "TNL DATA" << std::endl;
-    str << "ASCII" << std::endl;
-    str << "DATASET UNSTRUCTURED_GRID" << std::endl;
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-bool
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 0, Real > > >::
-write( const VectorFieldType& vectorField,
-       std::ostream& str,
-       const double& scale )
-{
-   typedef typename MeshType::template EntityType< 0 > Vertex;
-   writeHeader(vectorField, str);
- 
-   const MeshType& mesh = vectorField.getMesh();
-   const RealType originX = mesh.getOrigin().x();
-   const RealType spaceStepX = mesh.getSpaceSteps().x();
-   const RealType originY = mesh.getOrigin().y();
-   const RealType spaceStepY = mesh.getSpaceSteps().y();
-   const MeshIndex verticesCount = mesh.template getEntitiesCount< typename MeshType::Vertex >();
- 
-   str << "POINTS " << verticesCount << " float" << std::endl;
-   for (int j = 0; j < ( mesh.getDimensions().y() + 1); j++)
-   {
-        for (int i = 0; i < ( mesh.getDimensions().x() + 1 ); i++)
-        {
-             str << originX + i * spaceStepX << " " << originY + j * spaceStepY << " 0" << std::endl;
-        }
-   }
- 
-   str << std::endl << "CELLS " << verticesCount << " " << verticesCount * 2 << std::endl;
-   for (int j = 0; j < ( mesh.getDimensions().y() + 1 ); j++)
-   {
-        for (int i = 0; i < ( mesh.getDimensions().x() + 1 ); i++)
-        {
-            str << "1 " << j * mesh.getDimensions().x() + i  << std::endl;
-        }
-   }
- 
-   str << std::endl << "CELL_TYPES " << verticesCount << std::endl;
-   for (int i = 0; i < verticesCount; i++)
-   {
-       str << "1" << std::endl;
-   }
- 
-   str << std::endl << "CELL_DATA " << verticesCount << std::endl;
-   str << "VECTORS VerticesVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl;
-
-   for( MeshIndex i = 0; i < verticesCount; i++ )
-   {
-      typename MeshType::Vertex entity = mesh.template getEntity< typename MeshType::Vertex >( i );
-      entity.refresh();
-      const VectorType v = vectorField.getElement( entity.getIndex() );
-      for( int i = 0; i < 3; i++ )
-         str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " ";
-      str << std::endl;
-   }
-
-   return true;
-}
-
-/****
- * 3D grid, cells
- */
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-void
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 3, Real > > >::
-writeHeader( const VectorFieldType& vectorField,
-             std::ostream& str )
-{
-    const MeshType& mesh = vectorField.getMesh();
-    const typename MeshType::PointType& origin = mesh.getOrigin();
-    const typename MeshType::PointType& proportions = mesh.getProportions();
-    str << "# vtk DataFile Version 2.0" << std::endl;
-    str << "TNL DATA" << std::endl;
-    str << "ASCII" << std::endl;
-    str << "DATASET UNSTRUCTURED_GRID" << std::endl;
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-bool
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 3, Real > > >::
-write( const VectorFieldType& vectorField,
-       std::ostream& str,
-       const double& scale )
-{
-   writeHeader(vectorField, str);
- 
-   const MeshType& mesh = vectorField.getMesh();
-   const RealType originX = mesh.getOrigin().x();
-   const RealType spaceStepX = mesh.getSpaceSteps().x();
-   const RealType originY = mesh.getOrigin().y();
-   const RealType spaceStepY = mesh.getSpaceSteps().y();
-   const RealType originZ = mesh.getOrigin().z();
-   const RealType spaceStepZ = mesh.getSpaceSteps().z();
-   const MeshIndex verticesCount = mesh.template getEntitiesCount< typename MeshType::Vertex >();
-   const MeshIndex entitiesCount = mesh.template getEntitiesCount< typename MeshType::Cell >();
- 
-   str << "POINTS " << verticesCount << " float" << std::endl;
-   for (int k = 0; k <= mesh.getDimensions().y(); k++)
-   {
-       for (int j = 0; j <= mesh.getDimensions().y(); j++)
-       {
-            for (int i = 0; i <= mesh.getDimensions().x(); i++)
-            {
-                 str << originX + i * spaceStepX << " " << originY + j * spaceStepY << " " <<
-                        originZ + k * spaceStepZ << std::endl;
-            }
-       }
-   }
- 
-   str << std::endl << "CELLS " << entitiesCount << " " <<
-          entitiesCount * 9 << std::endl;
-   for (int k = 0; k < mesh.getDimensions().z(); k++)
-   {
-        for (int j = 0; j < mesh.getDimensions().y(); j++)
-        {
-            for (int i = 0; i < mesh.getDimensions().x(); i++)
-            {
-                str << "8 " <<  k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i + 1 << " "
-                    << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i + 1 << " "
-                    << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i + 1 << " "
-                    << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i + 1 << std::endl;
-            }
-        }
-   }
- 
-   str << std::endl << "CELL_TYPES " << entitiesCount << std::endl;
-   for (int i = 0; i < entitiesCount; i++)
-   {
-       str << "11" << std::endl;
-   }
- 
-   str << std::endl << "CELL_DATA " << entitiesCount << std::endl;
-   str << "VECTORS cellVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl;
-
-   for( MeshIndex i = 0; i < entitiesCount; i++ )
-   {
-      typename MeshType::Cell entity = mesh.template getEntity< typename MeshType::Cell >( i );
-      entity.refresh();
-      const VectorType v = vectorField.getElement( entity.getIndex() );
-      for( int i = 0; i < 3; i++ )
-         str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " ";
-      str << std::endl;
-   }
-
-   return true;
-}
-
-/****
- * 3D grid, faces
- */
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-void
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 2, Real > > >::
-writeHeader( const VectorFieldType& vectorField,
-             std::ostream& str )
-{
-    const MeshType& mesh = vectorField.getMesh();
-    const typename MeshType::PointType& origin = mesh.getOrigin();
-    const typename MeshType::PointType& proportions = mesh.getProportions();
-    str << "# vtk DataFile Version 2.0" << std::endl;
-    str << "TNL DATA" << std::endl;
-    str << "ASCII" << std::endl;
-    str << "DATASET UNSTRUCTURED_GRID" << std::endl;
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-bool
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 2, Real > > >::
-write( const VectorFieldType& vectorField,
-       std::ostream& str,
-       const double& scale )
-{
-   writeHeader(vectorField, str);
- 
-   const MeshType& mesh = vectorField.getMesh();
-   const RealType originX = mesh.getOrigin().x();
-   const RealType spaceStepX = mesh.getSpaceSteps().x();
-   const RealType originY = mesh.getOrigin().y();
-   const RealType spaceStepY = mesh.getSpaceSteps().y();
-   const RealType originZ = mesh.getOrigin().z();
-   const RealType spaceStepZ = mesh.getSpaceSteps().z();
-   const MeshIndex verticesCount = mesh.template getEntitiesCount< typename MeshType::Vertex >();
-   const MeshIndex entitiesCount = mesh.template getEntitiesCount< typename MeshType::Face >();
- 
-   str << "POINTS " << verticesCount << " float" << std::endl;
-   for (int k = 0; k <= mesh.getDimensions().y(); k++)
-   {
-       for (int j = 0; j <= mesh.getDimensions().y(); j++)
-       {
-            for (int i = 0; i <= mesh.getDimensions().x(); i++)
-            {
-                 str << originX + i * spaceStepX << " " << originY + j * spaceStepY << " " <<
-                        originZ + k * spaceStepZ << std::endl;
-            }
-       }
-   }
- 
-   str << std::endl << "CELLS " << entitiesCount << " " << entitiesCount * 5 << std::endl;
-   for (int k = 0; k < mesh.getDimensions().z(); k++)
-   {
-        for (int j = 0; j < mesh.getDimensions().y(); j++)
-        {
-            for (int i = 0; i <= mesh.getDimensions().x(); i++)
-            {
-                str << "4 " << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i << std::endl;
-            }
-        }
-   }
- 
-   for (int k = 0; k < mesh.getDimensions().z(); k++)
-   {
-        for (int j = 0; j <= mesh.getDimensions().y(); j++)
-        {
-            for (int i = 0; i < mesh.getDimensions().x(); i++)
-            {
-                str << "4 " << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i + 1 << " "
-                    << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i + 1 << std::endl;
-            }
-        }
-   }
- 
-   for (int k = 0; k <= mesh.getDimensions().z(); k++)
-   {
-        for (int j = 0; j < mesh.getDimensions().y(); j++)
-        {
-            for (int i = 0; i < mesh.getDimensions().x(); i++)
-            {
-                str << "4 " << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i + 1 << " "
-                    << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i + 1<< std::endl;
-            }
-        }
-   }
- 
-   str << std::endl << "CELL_TYPES " << entitiesCount << std::endl;
-   for (int i = 0; i < entitiesCount; i++)
-   {
-       str << "8" << std::endl;
-   }
- 
-   str << std::endl << "CELL_DATA " << entitiesCount << std::endl;
-   str << "VECTORS facesVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl;
-
-   for( MeshIndex i = 0; i < entitiesCount; i++ )
-   {
-      typename MeshType::Face entity = mesh.template getEntity< typename MeshType::Face >( i );
-      entity.refresh();
-      const VectorType v = vectorField.getElement( entity.getIndex() );
-      for( int i = 0; i < 3; i++ )
-         str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " ";
-      str << std::endl;
-   }
-
-   return true;
-}
-
-/****
- * 3D grid, edges
- */
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-void
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 1, Real > > >::
-writeHeader( const VectorFieldType& vectorField,
-             std::ostream& str )
-{
-    const MeshType& mesh = vectorField.getMesh();
-    const typename MeshType::PointType& origin = mesh.getOrigin();
-    const typename MeshType::PointType& proportions = mesh.getProportions();
-    str << "# vtk DataFile Version 2.0" << std::endl;
-    str << "TNL DATA" << std::endl;
-    str << "ASCII" << std::endl;
-    str << "DATASET UNSTRUCTURED_GRID" << std::endl;
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-bool
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 1, Real > > >::
-write( const VectorFieldType& vectorField,
-       std::ostream& str,
-       const double& scale )
-{
-   writeHeader(vectorField, str);
- 
-   const MeshType& mesh = vectorField.getMesh();
-   const RealType originX = mesh.getOrigin().x();
-   const RealType spaceStepX = mesh.getSpaceSteps().x();
-   const RealType originY = mesh.getOrigin().y();
-   const RealType spaceStepY = mesh.getSpaceSteps().y();
-   const RealType originZ = mesh.getOrigin().z();
-   const RealType spaceStepZ = mesh.getSpaceSteps().z();
-   const MeshIndex verticesCount = mesh.template getEntitiesCount< typename MeshType::Vertex >();
-   const MeshIndex entitiesCount = mesh.template getEntitiesCount< typename MeshType::Edge >();
- 
-   str << "POINTS " << verticesCount << " float" << std::endl;
-   for (int k = 0; k <= mesh.getDimensions().y(); k++)
-   {
-       for (int j = 0; j <= mesh.getDimensions().y(); j++)
-       {
-            for (int i = 0; i <= mesh.getDimensions().x(); i++)
-            {
-                 str << originX + i * spaceStepX << " " << originY + j * spaceStepY << " " <<
-                        originZ + k * spaceStepZ << std::endl;
-            }
-       }
-   }
- 
-   str << std::endl << "CELLS " << entitiesCount << " " << entitiesCount * 3 << std::endl;
-   for (int k = 0; k <= mesh.getDimensions().z(); k++)
-   {
-        for (int j = 0; j <= mesh.getDimensions().y(); j++)
-        {
-            for (int i = 0; i < mesh.getDimensions().x(); i++)
-            {
-                str << "3 " << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i + 1 << std::endl;
-            }
-        }
-   }
- 
-   for (int k = 0; k <= mesh.getDimensions().z(); k++)
-   {
-        for (int j = 0; j < mesh.getDimensions().y(); j++)
-        {
-            for (int i = 0; i <= mesh.getDimensions().x(); i++)
-            {
-                str << "3 " << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i << std::endl;
-            }
-        }
-   }
- 
-   for (int k = 0; k < mesh.getDimensions().z(); k++)
-   {
-        for (int j = 0; j <= mesh.getDimensions().y(); j++)
-        {
-            for (int i = 0; i <= mesh.getDimensions().x(); i++)
-            {
-                str << "3 " << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " "
-                    << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << std::endl;
-            }
-        }
-   }
- 
-   str << std::endl << "CELL_TYPES " << entitiesCount << std::endl;
-   for (int i = 0; i < entitiesCount; i++)
-   {
-       str << "3" << std::endl;
-   }
- 
-   str << std::endl << "CELL_DATA " << entitiesCount << std::endl;
-   str << "VECTORS edgesVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl;
-
-   for( MeshIndex i = 0; i < entitiesCount; i++ )
-   {
-      typename MeshType::Edge entity = mesh.template getEntity< typename MeshType::Edge >( i );
-      entity.refresh();
-      const VectorType v = vectorField.getElement( entity.getIndex() );
-      for( int i = 0; i < 3; i++ )
-         str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " ";
-      str << std::endl;
-   }
-
-   return true;
-}
-
-/****
- * 3D grid, vertices
- */
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-void
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 0, Real > > >::
-writeHeader( const VectorFieldType& vectorField,
-             std::ostream& str )
-{
-    const MeshType& mesh = vectorField.getMesh();
-    const typename MeshType::PointType& origin = mesh.getOrigin();
-    const typename MeshType::PointType& proportions = mesh.getProportions();
-    str << "# vtk DataFile Version 2.0" << std::endl;
-    str << "TNL DATA" << std::endl;
-    str << "ASCII" << std::endl;
-    str << "DATASET UNSTRUCTURED_GRID" << std::endl;
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          int VectorFieldSize >
-bool
-VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 0, Real > > >::
-write( const VectorFieldType& vectorField,
-       std::ostream& str,
-       const double& scale )
-{
-   writeHeader(vectorField, str);
- 
-   const MeshType& mesh = vectorField.getMesh();
-   const RealType originX = mesh.getOrigin().x();
-   const RealType spaceStepX = mesh.getSpaceSteps().x();
-   const RealType originY = mesh.getOrigin().y();
-   const RealType spaceStepY = mesh.getSpaceSteps().y();
-   const RealType originZ = mesh.getOrigin().z();
-   const RealType spaceStepZ = mesh.getSpaceSteps().z();
-   const MeshIndex verticesCount = mesh.template getEntitiesCount< typename MeshType::Vertex >();
- 
-   str << "POINTS " << verticesCount << " float" << std::endl;
-   for (int k = 0; k <= mesh.getDimensions().y(); k++)
-   {
-       for (int j = 0; j <= mesh.getDimensions().y(); j++)
-       {
-            for (int i = 0; i <= mesh.getDimensions().x(); i++)
-            {
-                 str << originX + i * spaceStepX << " " << originY + j * spaceStepY << " " <<
-                        originZ + k * spaceStepZ << std::endl;
-            }
-       }
-   }
- 
-   str << std::endl << "CELLS " << verticesCount << " " << verticesCount * 2 << std::endl;
-   for (int k = 0; k < ( mesh.getDimensions().z() + 1 ); k++)
-   {
-        for (int j = 0; j < ( mesh.getDimensions().y() + 1 ); j++)
-        {
-            for (int i = 0; i < ( mesh.getDimensions().x() + 1 ); i++)
-            {
-                str << "1 " << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i  << std::endl;
-            }
-        }
-   }
- 
-   str << std::endl << "CELL_TYPES " << verticesCount << std::endl;
-   for (int i = 0; i < verticesCount; i++)
-   {
-       str << "1" << std::endl;
-   }
- 
-   str << std::endl << "CELL_DATA " << verticesCount << std::endl;
-   str << "VECTORS verticesVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl;
-
-   for( MeshIndex i = 0; i < verticesCount; i++ )
-   {
-      typename MeshType::Vertex entity = mesh.template getEntity< typename MeshType::Vertex >( i );
-      entity.refresh();
-      const VectorType v = vectorField.getElement( entity.getIndex() );
-      for( int i = 0; i < 3; i++ )
-         str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " ";
-      str << std::endl;
-   }
-
-   return true;
-}
-
-} // namespace Functions
-} // namespace TNL
diff --git a/src/TNL/Images/DicomSeries.h b/src/TNL/Images/DicomSeries.h
index 36e626ab6691a32b3c207d25aac294600b84ac65..b5aa77a57d3a2c3322cc7d79b6a31ff7c22e68d4 100644
--- a/src/TNL/Images/DicomSeries.h
+++ b/src/TNL/Images/DicomSeries.h
@@ -14,10 +14,11 @@
 
 #pragma once
 
+#include <list>
+
 #include <TNL/Containers/Array.h>
-#include <TNL/Containers/List.h>
 #include <TNL/String.h>
-#include <TNL/param-types.h>
+#include <TNL/TypeInfo.h>
 #include <TNL/Images//Image.h>
 #include <TNL/Images//DicomHeader.h>
 #include <TNL/Images//RegionOfInterest.h>
@@ -33,10 +34,7 @@
 #include <string>
 
 namespace TNL {
-
-template<> inline String getType< Images::DicomHeader * > () { return String( "DicomHeader *" ); }
-
-namespace Images {   
+namespace Images {
 
 struct WindowCenterWidth
 {
@@ -105,7 +103,7 @@ class DicomSeries : public Image< int >
  
       bool loadImage( const String& filePath, int number );
 
-      Containers::List< String > fileList;
+      std::list< String > fileList;
  
       Containers::Array<DicomHeader *,Devices::Host,int> dicomSeriesHeaders;
 
diff --git a/src/TNL/Images/DicomSeries_impl.h b/src/TNL/Images/DicomSeries_impl.h
index 350bf384bbadae0eeb1045e1553010953f6a9390..533808b0d53559aee3c56fbd268194fafc7ecd34 100644
--- a/src/TNL/Images/DicomSeries_impl.h
+++ b/src/TNL/Images/DicomSeries_impl.h
@@ -155,22 +155,22 @@ inline bool DicomSeries::retrieveFileList( const String& filePath)
       String fileNamePrefix(fileName.getString(), 0, fileName.getLength() - separatorPosition);
 
       struct dirent **dirp;
-      Containers::List<String > files;
+      std::list< String > files;
 
       //scan and sort directory
       int ndirs = scandir(directoryPath.getString(), &dirp, filter, alphasort);
       for(int i = 0 ; i < ndirs; ++i)
       {
-         files.Append( String((char *)dirp[i]->d_name));
+         files.push_back( String((char *)dirp[i]->d_name) );
          delete dirp[i];
       }
 
-      for (int i = 0; i < files.getSize(); i++)
+      for (auto& file : files)
       {
          //check if file prefix contained
-         if (strstr(files[ i ].getString(), fileNamePrefix.getString()))
+         if (strstr(file.getString(), fileNamePrefix.getString()))
          {
-            fileList.Append( directoryPath + files[ i ] );
+            fileList.push_back( directoryPath + file );
          }
       }
    }
@@ -182,7 +182,7 @@ inline bool DicomSeries::loadImage( const String& filePath, int number)
 #ifdef HAVE_DCMTK_H
    //load header
    DicomHeader *header = new DicomHeader();
-   dicomSeriesHeaders.setSize( fileList.getSize() );
+   dicomSeriesHeaders.setSize( fileList.size() );
    dicomSeriesHeaders.setElement( number, header );
    if( !header->loadFromFile( filePath ) )
       return false;
@@ -283,7 +283,7 @@ inline bool DicomSeries::loadImage( const String& filePath, int number)
         imagesInfo.frameSize = size;
         if (pixelData)
             delete pixelData;
-        pixelData = new Uint16[imagesInfo.frameUintsCount * fileList.getSize()];
+        pixelData = new Uint16[imagesInfo.frameUintsCount * fileList.size()];
     }
     else
     {//check image size for compatibility
@@ -328,13 +328,14 @@ inline bool DicomSeries::loadDicomSeries( const String& filePath )
    }
 
    //load images
-   int imagesCountToLoad = fileList.getSize();
-   for( int i=0; i < imagesCountToLoad; i++ )
+   int counter = 0;
+   for( auto& file : fileList )
    {
-      if( !loadImage( fileList[ i ].getString(),i ) )
+      if( !loadImage( file.getString(), counter ) )
       {
-         std::cerr << fileList[ i ] << " skipped";
+         std::cerr << file << " skipped";
       }
+      counter++;
    }
    return true;
 }
diff --git a/src/TNL/Logger.h b/src/TNL/Logger.h
index d1f6c5c678c7c80c241e99f7d67d13f24403dabc..efcbbb3b5a1db18df8ae59827d30d13185d39188 100644
--- a/src/TNL/Logger.h
+++ b/src/TNL/Logger.h
@@ -12,6 +12,7 @@
 
 #include <ostream>
 
+#include <TNL/String.h>
 #include <TNL/Config/ParameterContainer.h>
 
 namespace TNL {
diff --git a/src/TNL/Logger_impl.h b/src/TNL/Logger_impl.h
index 0e1dd8dc62434faf07b64a95f1f896ed9b8af940..6f71b40277515ce60ffd3a09082e140731f1a4b5 100644
--- a/src/TNL/Logger_impl.h
+++ b/src/TNL/Logger_impl.h
@@ -14,8 +14,8 @@
 #include <iomanip>
 
 #include <TNL/Logger.h>
-#include <TNL/Devices/CudaDeviceInfo.h>
-#include <TNL/Devices/SystemInfo.h>
+#include <TNL/Cuda/DeviceInfo.h>
+#include <TNL/SystemInfo.h>
 
 namespace TNL {
 
@@ -61,24 +61,24 @@ Logger::writeSystemInformation( const Config::ParameterContainer& parameters )
    const char* compiler_name = "(unknown)";
 #endif
 
-   writeParameter< String >( "Host name:", Devices::SystemInfo::getHostname() );
-   writeParameter< String >( "System:", Devices::SystemInfo::getSystemName() );
-   writeParameter< String >( "Release:", Devices::SystemInfo::getSystemRelease() );
-   writeParameter< String >( "Architecture:", Devices::SystemInfo::getArchitecture() );
+   writeParameter< String >( "Host name:", SystemInfo::getHostname() );
+   writeParameter< String >( "System:", SystemInfo::getSystemName() );
+   writeParameter< String >( "Release:", SystemInfo::getSystemRelease() );
+   writeParameter< String >( "Architecture:", SystemInfo::getArchitecture() );
    writeParameter< String >( "TNL compiler:", compiler_name );
    // FIXME: generalize for multi-socket systems, here we consider only the first found CPU
    const int cpu_id = 0;
-   const int threads = Devices::SystemInfo::getNumberOfThreads( cpu_id );
-   const int cores = Devices::SystemInfo::getNumberOfCores( cpu_id );
+   const int threads = SystemInfo::getNumberOfThreads( cpu_id );
+   const int cores = SystemInfo::getNumberOfCores( cpu_id );
    int threadsPerCore = 0;
    if( cores > 0 )
       threadsPerCore = threads / cores;
    writeParameter< String >( "CPU info", "" );
-   writeParameter< String >( "Model name:", Devices::SystemInfo::getCPUModelName( cpu_id ), 1 );
+   writeParameter< String >( "Model name:", SystemInfo::getCPUModelName( cpu_id ), 1 );
    writeParameter< int >( "Cores:", cores, 1 );
    writeParameter< int >( "Threads per core:", threadsPerCore, 1 );
-   writeParameter< double >( "Max clock rate (in MHz):", Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1000, 1 );
-   const Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id );
+   writeParameter< double >( "Max clock rate (in MHz):", SystemInfo::getCPUMaxFrequency( cpu_id ) / 1000, 1 );
+   const CacheSizes cacheSizes = SystemInfo::getCPUCacheSizes( cpu_id );
    const String cacheInfo = convertToString( cacheSizes.L1data ) + ", "
                           + convertToString( cacheSizes.L1instruction ) + ", "
                           + convertToString( cacheSizes.L2 ) + ", "
@@ -95,19 +95,19 @@ Logger::writeSystemInformation( const Config::ParameterContainer& parameters )
    //   for( int i = 0; i < devices; i++ )
    //   {
    //      logger.writeParameter< int >( "Device no.", i, 1 );
-         const int i = Devices::CudaDeviceInfo::getActiveDevice();
-         writeParameter< String >( "Name", Devices::CudaDeviceInfo::getDeviceName( i ), 2 );
-         const String deviceArch = convertToString( Devices::CudaDeviceInfo::getArchitectureMajor( i ) ) + "." +
-                                   convertToString( Devices::CudaDeviceInfo::getArchitectureMinor( i ) );
+         const int i = Cuda::DeviceInfo::getActiveDevice();
+         writeParameter< String >( "Name", Cuda::DeviceInfo::getDeviceName( i ), 2 );
+         const String deviceArch = convertToString( Cuda::DeviceInfo::getArchitectureMajor( i ) ) + "." +
+                                   convertToString( Cuda::DeviceInfo::getArchitectureMinor( i ) );
          writeParameter< String >( "Architecture", deviceArch, 2 );
-         writeParameter< int >( "CUDA cores", Devices::CudaDeviceInfo::getCudaCores( i ), 2 );
-         const double clockRate = ( double ) Devices::CudaDeviceInfo::getClockRate( i ) / 1.0e3;
+         writeParameter< int >( "CUDA cores", Cuda::DeviceInfo::getCudaCores( i ), 2 );
+         const double clockRate = ( double ) Cuda::DeviceInfo::getClockRate( i ) / 1.0e3;
          writeParameter< double >( "Clock rate (in MHz)", clockRate, 2 );
-         const double globalMemory = ( double ) Devices::CudaDeviceInfo::getGlobalMemory( i ) / 1.0e9;
+         const double globalMemory = ( double ) Cuda::DeviceInfo::getGlobalMemory( i ) / 1.0e9;
          writeParameter< double >( "Global memory (in GB)", globalMemory, 2 );
-         const double memoryClockRate = ( double ) Devices::CudaDeviceInfo::getMemoryClockRate( i ) / 1.0e3;
+         const double memoryClockRate = ( double ) Cuda::DeviceInfo::getMemoryClockRate( i ) / 1.0e3;
          writeParameter< double >( "Memory clock rate (in Mhz)", memoryClockRate, 2 );
-         writeParameter< bool >( "ECC enabled", Devices::CudaDeviceInfo::getECCEnabled( i ), 2 );
+         writeParameter< bool >( "ECC enabled", Cuda::DeviceInfo::getECCEnabled( i ), 2 );
    //   }
    }
    return true;
@@ -116,7 +116,7 @@ Logger::writeSystemInformation( const Config::ParameterContainer& parameters )
 inline void
 Logger::writeCurrentTime( const char* label )
 {
-   writeParameter< String >( label, Devices::SystemInfo::getCurrentTime() );
+   writeParameter< String >( label, SystemInfo::getCurrentTime() );
 }
 
 template< typename T >
diff --git a/src/TNL/Math.h b/src/TNL/Math.h
index cd73b020e4c35fbe08c969864c9c26e400bd76ef..321cc7ce39e0d0beb8c3c3c2a5ab3ac7bbfddbdd 100644
--- a/src/TNL/Math.h
+++ b/src/TNL/Math.h
@@ -15,7 +15,7 @@
 #include <algorithm>
 
 #include <TNL/TypeTraits.h>
-#include <TNL/Devices/CudaCallable.h>
+#include <TNL/Cuda/CudaCallable.h>
 
 namespace TNL {
 
@@ -30,7 +30,7 @@ ResultType sum( const T1& a, const T2& b )
  * \brief This function returns minimum of two numbers.
  *
  * GPU device code uses the functions defined in the CUDA's math_functions.h,
- * MIC uses trivial override and host uses the STL functions.
+ * host uses the STL functions.
  */
 template< typename T1, typename T2, typename ResultType = typename std::common_type< T1, T2 >::type,
           // enable_if is necessary to avoid ambiguity in vector expressions
@@ -44,8 +44,6 @@ ResultType min( const T1& a, const T2& b )
 #else
  #if defined(__CUDA_ARCH__)
    return ::min( (ResultType) a, (ResultType) b );
- #elif defined(__MIC__)
-   return a < b ? a : b;
  #else
    return std::min( (ResultType) a, (ResultType) b );
  #endif
@@ -57,7 +55,7 @@ ResultType min( const T1& a, const T2& b )
  * \brief This function returns maximum of two numbers.
  *
  * GPU device code uses the functions defined in the CUDA's math_functions.h,
- * MIC uses trivial override and host uses the STL functions.
+ * host uses the STL functions.
  */
 template< typename T1, typename T2, typename ResultType = typename std::common_type< T1, T2 >::type,
           // enable_if is necessary to avoid ambiguity in vector expressions
@@ -71,8 +69,6 @@ ResultType max( const T1& a, const T2& b )
 #else
  #if defined(__CUDA_ARCH__)
    return ::max( (ResultType) a, (ResultType) b );
- #elif defined(__MIC__)
-   return a > b ? a : b;
  #else
    return std::max( (ResultType) a, (ResultType) b );
  #endif
@@ -92,10 +88,6 @@ T abs( const T& n )
       return ::abs( n );
    else
       return ::fabs( n );
-#elif defined(__MIC__)
-   if( n < ( T ) 0 )
-      return -n;
-   return n;
 #else
    return std::abs( n );
 #endif
@@ -159,7 +151,7 @@ template< typename T1, typename T2, typename ResultType = typename std::common_t
 __cuda_callable__ inline
 ResultType pow( const T1& base, const T2& exp )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::pow( (ResultType) base, (ResultType) exp );
 #else
    return std::pow( (ResultType) base, (ResultType) exp );
@@ -173,7 +165,7 @@ template< typename T >
 __cuda_callable__ inline
 auto exp( const T& value ) -> decltype( std::exp(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::exp( value );
 #else
    return std::exp( value );
@@ -187,7 +179,7 @@ template< typename T >
 __cuda_callable__ inline
 auto sqrt( const T& value ) -> decltype( std::sqrt(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::sqrt( value );
 #else
    return std::sqrt( value );
@@ -201,7 +193,7 @@ template< typename T >
 __cuda_callable__ inline
 auto cbrt( const T& value ) -> decltype( std::cbrt(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::cbrt( value );
 #else
    return std::cbrt( value );
@@ -215,7 +207,7 @@ template< typename T >
 __cuda_callable__ inline
 auto log( const T& value ) -> decltype( std::log(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::log( value );
 #else
    return std::log( value );
@@ -229,7 +221,7 @@ template< typename T >
 __cuda_callable__ inline
 auto log10( const T& value ) -> decltype( std::log10(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::log10( value );
 #else
    return std::log10( value );
@@ -243,7 +235,7 @@ template< typename T >
 __cuda_callable__ inline
 auto log2( const T& value ) -> decltype( std::log2(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::log2( value );
 #else
    return std::log2( value );
@@ -257,7 +249,7 @@ template< typename T >
 __cuda_callable__ inline
 auto sin( const T& value ) -> decltype( std::sin(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::sin( value );
 #else
    return std::sin( value );
@@ -271,7 +263,7 @@ template< typename T >
 __cuda_callable__ inline
 auto cos( const T& value ) -> decltype( std::cos(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::cos( value );
 #else
    return std::cos( value );
@@ -285,7 +277,7 @@ template< typename T >
 __cuda_callable__ inline
 auto tan( const T& value ) -> decltype( std::tan(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::tan( value );
 #else
    return std::tan( value );
@@ -299,7 +291,7 @@ template< typename T >
 __cuda_callable__ inline
 auto asin( const T& value ) -> decltype( std::asin(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::asin( value );
 #else
    return std::asin( value );
@@ -313,7 +305,7 @@ template< typename T >
 __cuda_callable__ inline
 auto acos( const T& value ) -> decltype( std::acos(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::acos( value );
 #else
    return std::acos( value );
@@ -327,7 +319,7 @@ template< typename T >
 __cuda_callable__ inline
 auto atan( const T& value ) -> decltype( std::atan(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::atan( value );
 #else
    return std::atan( value );
@@ -341,7 +333,7 @@ template< typename T >
 __cuda_callable__ inline
 auto sinh( const T& value ) -> decltype( std::sinh(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::sinh( value );
 #else
    return std::sinh( value );
@@ -355,7 +347,7 @@ template< typename T >
 __cuda_callable__ inline
 auto cosh( const T& value ) -> decltype( std::cosh(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::cosh( value );
 #else
    return std::cosh( value );
@@ -369,7 +361,7 @@ template< typename T >
 __cuda_callable__ inline
 auto tanh( const T& value ) -> decltype( std::tanh(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::tanh( value );
 #else
    return std::tanh( value );
@@ -383,7 +375,7 @@ template< typename T >
 __cuda_callable__ inline
 auto asinh( const T& value ) -> decltype( std::asinh(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::asinh( value );
 #else
    return std::asinh( value );
@@ -397,7 +389,7 @@ template< typename T >
 __cuda_callable__ inline
 auto acosh( const T& value ) -> decltype( std::acosh(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::acosh( value );
 #else
    return std::acosh( value );
@@ -411,7 +403,7 @@ template< typename T >
 __cuda_callable__ inline
 auto atanh( const T& value ) -> decltype( std::atanh(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::atanh( value );
 #else
    return std::atanh( value );
@@ -425,7 +417,7 @@ template< typename T >
 __cuda_callable__ inline
 auto floor( const T& value ) -> decltype( std::floor(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::floor( value );
 #else
    return std::floor( value );
@@ -439,7 +431,7 @@ template< typename T >
 __cuda_callable__ inline
 auto ceil( const T& value ) -> decltype( std::ceil(value) )
 {
-#if defined(__CUDA_ARCH__) || defined(__MIC__)
+#if defined(__CUDA_ARCH__)
    return ::ceil( value );
 #else
    return std::ceil( value );
diff --git a/src/TNL/Matrices/AdEllpack.h b/src/TNL/Matrices/AdEllpack.h
index dd6618d5e40e481e453c46b29b0988a067046b76..a50a17232fb184086930e69834ea4d498d9ab5a2 100644
--- a/src/TNL/Matrices/AdEllpack.h
+++ b/src/TNL/Matrices/AdEllpack.h
@@ -84,14 +84,13 @@ public:
     typedef Index IndexType;
     typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector;
     typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
-    typedef AdEllpack< Real, Devices::Host, Index > HostType;
-    typedef AdEllpack< Real, Devices::Cuda, Index > CudaType;
 
-    AdEllpack();
-
-    static String getType();
+    template< typename _Real = Real,
+              typename _Device = Device,
+              typename _Index = Index >
+    using Self = AdEllpack< _Real, _Device, _Index >;
 
-    String getTypeVirtual() const;
+    AdEllpack();
 
     void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
 
diff --git a/src/TNL/Matrices/AdEllpack_impl.h b/src/TNL/Matrices/AdEllpack_impl.h
index 12d7336b8f7ea3b46ace0df151475df354537ced..a0f293b3df94afcfeda8124f8e1d8173cb4c7718 100644
--- a/src/TNL/Matrices/AdEllpack_impl.h
+++ b/src/TNL/Matrices/AdEllpack_impl.h
@@ -11,7 +11,7 @@
 #include <TNL/Matrices/AdEllpack.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Math.h>
-#include <TNL/param-types.h>
+#include <TNL/TypeInfo.h>
 
 #pragma once
 
@@ -157,26 +157,6 @@ AdEllpack< Real, Device, Index >::AdEllpack()
 warpSize( 32 )
 {}
 
-template< typename Real,
-          typename Device,
-          typename Index >
-String AdEllpack< Real, Device, Index >::getTypeVirtual() const
-{
-    return this->getType();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String AdEllpack< Real, Device, Index >::getType()
-{
-    return String( "AdEllpack< ") +
-           String( TNL::getType< Real >() ) +
-           String( ", " ) +
-           Device::getDeviceType() +
-           String( " >" );
-}
-
 template< typename Real,
           typename Device,
           typename Index >
@@ -956,14 +936,14 @@ void AdEllpack< Real, Device, Index >::spmvCuda2( const InVector& inVector,
                                                   OutVector& outVector,
                                                   const int gridIdx ) const
 {
-    IndexType globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+    IndexType globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
     IndexType warpIdx = globalIdx >> 5;
     IndexType inWarpIdx = globalIdx & ( this->warpSize - 1 );
     if( globalIdx >= this->reduceMap.getSize() )
 	return;
 
     const int blockSize = 256;
-    Real* temp = Devices::Cuda::getSharedMemory< Real >();
+    Real* temp = Cuda::getSharedMemory< Real >();
     __shared__ IndexType reduceMap[ blockSize ];
     reduceMap[ threadIdx.x ] = this->reduceMap[ globalIdx ];
     temp[ threadIdx.x ] = 0.0;
@@ -1004,14 +984,14 @@ void AdEllpack< Real, Device, Index >::spmvCuda4( const InVector& inVector,
                                                            OutVector& outVector,
                                                            const int gridIdx ) const
 {
-    IndexType globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+    IndexType globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
     IndexType warpIdx = globalIdx >> 5;
     IndexType inWarpIdx = globalIdx & ( this->warpSize - 1 );
     if( globalIdx >= this->reduceMap.getSize() )
 	return;
 
     const int blockSize = 192;
-    Real* temp = Devices::Cuda::getSharedMemory< Real >();
+    Real* temp = Cuda::getSharedMemory< Real >();
     __shared__ IndexType reduceMap[ blockSize ];
     reduceMap[ threadIdx.x ] = this->reduceMap[ globalIdx ];
     temp[ threadIdx.x ] = 0.0;
@@ -1063,14 +1043,14 @@ void AdEllpack< Real, Device, Index >::spmvCuda8( const InVector& inVector,
                                                            OutVector& outVector,
                                                            const int gridIdx ) const
 {
-    IndexType globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+    IndexType globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
     IndexType warpIdx = globalIdx >> 5;
     IndexType inWarpIdx = globalIdx & ( this->warpSize - 1 );
     if( globalIdx >= this->reduceMap.getSize() )
 	return;
 
     const int blockSize = 128;
-    Real* temp = Devices::Cuda::getSharedMemory< Real >();
+    Real* temp = Cuda::getSharedMemory< Real >();
     __shared__ IndexType reduceMap[ blockSize ];
     reduceMap[ threadIdx.x ] = this->reduceMap[ globalIdx ];
     temp[ threadIdx.x ] = 0.0;
@@ -1121,14 +1101,14 @@ void AdEllpack< Real, Device, Index >::spmvCuda16( const InVector& inVector,
                                                             OutVector& outVector,
                                                             const int gridIdx ) const
 {
-    IndexType globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+    IndexType globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
     IndexType warpIdx = globalIdx >> 5;
     IndexType inWarpIdx = globalIdx & ( this->warpSize - 1 );
     if( globalIdx >= this->reduceMap.getSize() )
 	return;
 
     const int blockSize = 128;
-    Real* temp = Devices::Cuda::getSharedMemory< Real >();
+    Real* temp = Cuda::getSharedMemory< Real >();
     __shared__ IndexType reduceMap[ blockSize ];
     reduceMap[ threadIdx.x ] = this->reduceMap[ globalIdx ];
     temp[ threadIdx.x ] = 0.0;
@@ -1179,14 +1159,14 @@ void AdEllpack< Real, Device, Index >::spmvCuda32( const InVector& inVector,
                                                             OutVector& outVector,
                                                             const int gridIdx ) const
 {
-    IndexType globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+    IndexType globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
     IndexType warpIdx = globalIdx >> 5;
     IndexType inWarpIdx = globalIdx & ( this->warpSize - 1 );
     if( globalIdx >= this->reduceMap.getSize() )
 	return;
 
     const int blockSize = 96;
-    Real* temp = Devices::Cuda::getSharedMemory< Real >();
+    Real* temp = Cuda::getSharedMemory< Real >();
     __shared__ IndexType reduceMap[ blockSize ];
     reduceMap[ threadIdx.x ] = this->reduceMap[ globalIdx ];
     temp[ threadIdx.x ] = 0.0;
@@ -1312,18 +1292,18 @@ public:
     {
         typedef AdEllpack< Real, Devices::Cuda, Index > Matrix;
 	typedef typename Matrix::IndexType IndexType;
-	Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-	InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-	OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
+	Matrix* kernel_this = Cuda::passToDevice( matrix );
+	InVector* kernel_inVector = Cuda::passToDevice( inVector );
+	OutVector* kernel_outVector = Cuda::passToDevice( outVector );
 	if( matrix.totalLoad < 2 )
 	{
-	    dim3 blockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+	    dim3 blockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
 	    IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x );
-	    IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+	    IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
 	    for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
 	    {
 	        if( gridIdx == cudaGrids - 1 )
-		    cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+		    cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
 	        const int sharedMemory = blockSize.x * sizeof( Real );
 	        AdEllpackVectorProductCuda2< Real, Index, InVector, OutVector >
                                                     <<< cudaGridSize, blockSize, sharedMemory >>>
@@ -1333,20 +1313,20 @@ public:
                                                       gridIdx );
 	    }
 	    TNL_CHECK_CUDA_DEVICE;
-	    Devices::Cuda::freeFromDevice( kernel_this );
-	    Devices::Cuda::freeFromDevice( kernel_inVector );
-	    Devices::Cuda::freeFromDevice( kernel_outVector );
+	    Cuda::freeFromDevice( kernel_this );
+	    Cuda::freeFromDevice( kernel_inVector );
+	    Cuda::freeFromDevice( kernel_outVector );
 	    TNL_CHECK_CUDA_DEVICE;
 	}
 	else if( matrix.totalLoad < 4 )
 	{
-	    dim3 blockSize( 192 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+	    dim3 blockSize( 192 ), cudaGridSize( Cuda::getMaxGridSize() );
 	    IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x );
-	    IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+	    IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
 	    for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
 	    {
 	        if( gridIdx == cudaGrids - 1 )
-		    cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+		    cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
 	        const int sharedMemory = blockSize.x * sizeof( Real );
 	        AdEllpackVectorProductCuda4< Real, Index, InVector, OutVector >
                                                     <<< cudaGridSize, blockSize, sharedMemory >>>
@@ -1356,20 +1336,20 @@ public:
                                                       gridIdx );
 	    }
 	    TNL_CHECK_CUDA_DEVICE;
-	    Devices::Cuda::freeFromDevice( kernel_this );
-	    Devices::Cuda::freeFromDevice( kernel_inVector );
-	    Devices::Cuda::freeFromDevice( kernel_outVector );
+	    Cuda::freeFromDevice( kernel_this );
+	    Cuda::freeFromDevice( kernel_inVector );
+	    Cuda::freeFromDevice( kernel_outVector );
 	    TNL_CHECK_CUDA_DEVICE;
 	}
 	else if( matrix.totalLoad < 8 )
 	{
-	    dim3 blockSize( 128 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+	    dim3 blockSize( 128 ), cudaGridSize( Cuda::getMaxGridSize() );
 	    IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x );
-	    IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+	    IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
 	    for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
 	    {
 	        if( gridIdx == cudaGrids - 1 )
-		    cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+		    cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
 	        const int sharedMemory = blockSize.x * sizeof( Real );
 	        AdEllpackVectorProductCuda8< Real, Index, InVector, OutVector >
                                                     <<< cudaGridSize, blockSize, sharedMemory >>>
@@ -1379,20 +1359,20 @@ public:
                                                       gridIdx );
 	    }
 	    TNL_CHECK_CUDA_DEVICE;
-	    Devices::Cuda::freeFromDevice( kernel_this );
-	    Devices::Cuda::freeFromDevice( kernel_inVector );
-	    Devices::Cuda::freeFromDevice( kernel_outVector );
+	    Cuda::freeFromDevice( kernel_this );
+	    Cuda::freeFromDevice( kernel_inVector );
+	    Cuda::freeFromDevice( kernel_outVector );
 	    TNL_CHECK_CUDA_DEVICE;
 	}
 	else if( matrix.totalLoad < 16 )
 	{
-	    dim3 blockSize( 128 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+	    dim3 blockSize( 128 ), cudaGridSize( Cuda::getMaxGridSize() );
 	    IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x );
-	    IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+	    IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
 	    for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
 	    {
 	        if( gridIdx == cudaGrids - 1 )
-		    cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+		    cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
 	        const int sharedMemory = blockSize.x * sizeof( Real );
 	        AdEllpackVectorProductCuda16< Real, Index, InVector, OutVector >
                                                      <<< cudaGridSize, blockSize, sharedMemory >>>
@@ -1402,20 +1382,20 @@ public:
                                                        gridIdx );
 	    }
 	    TNL_CHECK_CUDA_DEVICE;
-	    Devices::Cuda::freeFromDevice( kernel_this );
-	    Devices::Cuda::freeFromDevice( kernel_inVector );
-	    Devices::Cuda::freeFromDevice( kernel_outVector );
+	    Cuda::freeFromDevice( kernel_this );
+	    Cuda::freeFromDevice( kernel_inVector );
+	    Cuda::freeFromDevice( kernel_outVector );
 	    TNL_CHECK_CUDA_DEVICE;
 	}
 	else
 	{
-	    dim3 blockSize( 96 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+	    dim3 blockSize( 96 ), cudaGridSize( Cuda::getMaxGridSize() );
 	    IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x );
-	    IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+	    IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
 	    for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
 	    {
 	        if( gridIdx == cudaGrids - 1 )
-		    cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+		    cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
 	        const int sharedMemory = blockSize.x * sizeof( Real );
 	        AdEllpackVectorProductCuda32< Real, Index, InVector, OutVector >
                                                      <<< cudaGridSize, blockSize, sharedMemory >>>
@@ -1425,9 +1405,9 @@ public:
                                                        gridIdx );
 	    }
 	    TNL_CHECK_CUDA_DEVICE;
-	    Devices::Cuda::freeFromDevice( kernel_this );
-	    Devices::Cuda::freeFromDevice( kernel_inVector );
-	    Devices::Cuda::freeFromDevice( kernel_outVector );
+	    Cuda::freeFromDevice( kernel_this );
+	    Cuda::freeFromDevice( kernel_inVector );
+	    Cuda::freeFromDevice( kernel_outVector );
 	    TNL_CHECK_CUDA_DEVICE;
 	}
     }
diff --git a/src/TNL/Matrices/BiEllpack.h b/src/TNL/Matrices/BiEllpack.h
index 08bb5366671ebbf6abb208d213027914a765ad2e..cfc132ccd56318a0c160d6eab943fc5de90b7c7c 100644
--- a/src/TNL/Matrices/BiEllpack.h
+++ b/src/TNL/Matrices/BiEllpack.h
@@ -39,14 +39,13 @@ public:
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
 	typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
 	typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
-	typedef BiEllpack< Real, Devices::Host, Index > HostType;
-	typedef BiEllpack< Real, Devices::Cuda, Index > CudaType;
 
-	BiEllpack();
-
-	static String getType();
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = BiEllpack< _Real, _Device, _Index >;
 
-	String getTypeVirtual() const;
+	BiEllpack();
 
 	void setDimensions( const IndexType rows,
 	                    const IndexType columns );
diff --git a/src/TNL/Matrices/BiEllpackSymmetric.h b/src/TNL/Matrices/BiEllpackSymmetric.h
index 0d2ae9f1e64d42a13d6849e185048b974a9ac61b..8a845a08372c0647af36c911ec59b79e6e857747 100644
--- a/src/TNL/Matrices/BiEllpackSymmetric.h
+++ b/src/TNL/Matrices/BiEllpackSymmetric.h
@@ -30,14 +30,13 @@ public:
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
 	typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
 	typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
-	typedef BiEllpackSymmetric< Real, Devices::Host, Index > HostType;
-	typedef BiEllpackSymmetric< Real, Devices::Cuda, Index > CudaType;
 
-	BiEllpackSymmetric();
-
-	static String getType();
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = BiEllpackSymmetric< _Real, _Device, _Index >;
 
-	String getTypeVirtual() const;
+	BiEllpackSymmetric();
 
 	void setDimensions( const IndexType rows, const IndexType columns );
 
diff --git a/src/TNL/Matrices/BiEllpackSymmetric_impl.h b/src/TNL/Matrices/BiEllpackSymmetric_impl.h
index d4c755a236fa9833dbed19d7f70223cc6c7a0608..0af180c0e8c2c54d2c4fdb304fa3e2813d76786c 100644
--- a/src/TNL/Matrices/BiEllpackSymmetric_impl.h
+++ b/src/TNL/Matrices/BiEllpackSymmetric_impl.h
@@ -45,28 +45,6 @@ BiEllpackSymmetric< Real, Device, Index, StripSize >::BiEllpackSymmetric()
   logWarpSize( 5 )
 {}
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          int StripSize >
-String BiEllpackSymmetric< Real, Device, Index, StripSize >::getType()
-{
-    return String( "BiEllpackMatrix< ") +
-           String( TNL::getType< Real >() ) +
-           String( ", " ) +
-           Device :: getDeviceType() +
-           String( " >" );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          int StripSize >
-String BiEllpackSymmetric< Real, Device, Index, StripSize >::getTypeVirtual() const
-{
-    return this->getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -1075,7 +1053,7 @@ void BiEllpackSymmetric< Real, Device, Index, StripSize >::spmvCuda( const InVec
     IndexType bisection = this->warpSize;
     IndexType groupBegin = strip * ( this->logWarpSize + 1 );
 
-    Real* temp = Devices::Cuda::getSharedMemory< Real >();
+    Real* temp = Cuda::getSharedMemory< Real >();
     __shared__ Real results[ cudaBlockSize ];
     results[ threadIdx.x ] = 0.0;
     IndexType elementPtr = ( this->groupPointers[ groupBegin ] << this->logWarpSize ) + inWarpIdx;
@@ -1296,7 +1274,7 @@ void BiEllpackSymmetricVectorProductCuda( const BiEllpackSymmetric< Real, Device
                                           int gridIdx,
                                           const int warpSize )
 {
-    Index globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+    Index globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
     matrix->spmvCuda( *inVector, *outVector, globalIdx );
 }
 #endif
@@ -1416,7 +1394,7 @@ void performRowBubbleSortCuda( BiEllpackSymmetric< Real, Devices::Cuda, Index, S
                                const typename BiEllpackSymmetric< Real, Devices::Cuda, Index, StripSize >::RowLengthsVector* rowLengths,
                                int gridIdx )
 {
-    const Index stripIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
+    const Index stripIdx = gridIdx * Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
     matrix->performRowBubbleSortCudaKernel( *rowLengths, stripIdx );
 }
 #endif
@@ -1431,7 +1409,7 @@ void computeColumnSizesCuda( BiEllpackSymmetric< Real, Devices::Cuda, Index, Str
                              const Index numberOfStrips,
                              int gridIdx )
 {
-    const Index stripIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
+    const Index stripIdx = gridIdx * Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
     matrix->computeColumnSizesCudaKernel( *rowLengths, numberOfStrips, stripIdx );
 }
 #endif
@@ -1535,23 +1513,23 @@ public:
         Index numberOfStrips = matrix.virtualRows / StripSize;
         typedef BiEllpackSymmetric< Real, Devices::Cuda, Index, StripSize > Matrix;
         typedef typename Matrix::RowLengthsVector CompressedRowLengthsVector;
-        Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-        CompressedRowLengthsVector* kernel_rowLengths = Devices::Cuda::passToDevice( rowLengths );
-        dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+        Matrix* kernel_this = Cuda::passToDevice( matrix );
+        CompressedRowLengthsVector* kernel_rowLengths = Cuda::passToDevice( rowLengths );
+        dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
         const Index cudaBlocks = roundUpDivision( numberOfStrips, cudaBlockSize.x );
-        const Index cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+        const Index cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
         for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
         {
              if( gridIdx == cudaGrids - 1 )
-                 cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+                 cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
              performRowBubbleSortCuda< Real, Index, StripSize >
                                      <<< cudaGridSize, cudaBlockSize >>>
                                      ( kernel_this,
                                        kernel_rowLengths,
                                        gridIdx );
         }
-        Devices::Cuda::freeFromDevice( kernel_this );
-        Devices::Cuda::freeFromDevice( kernel_rowLengths );
+        Cuda::freeFromDevice( kernel_this );
+        Cuda::freeFromDevice( kernel_rowLengths );
         TNL_CHECK_CUDA_DEVICE;
 #endif
     }
@@ -1566,15 +1544,15 @@ public:
         const Index numberOfStrips = matrix.virtualRows / StripSize;
         typedef BiEllpackSymmetric< Real, Devices::Cuda, Index, StripSize > Matrix;
         typedef typename Matrix::RowLengthsVector CompressedRowLengthsVector;
-        Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-        CompressedRowLengthsVector* kernel_rowLengths = Devices::Cuda::passToDevice( rowLengths );
-        dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+        Matrix* kernel_this = Cuda::passToDevice( matrix );
+        CompressedRowLengthsVector* kernel_rowLengths = Cuda::passToDevice( rowLengths );
+        dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
         const Index cudaBlocks = roundUpDivision( numberOfStrips, cudaBlockSize.x );
-        const Index cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+        const Index cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
         for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
         {
              if( gridIdx == cudaGrids - 1 )
-                 cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+                 cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
              computeColumnSizesCuda< Real, Index, StripSize >
                                    <<< cudaGridSize, cudaBlockSize >>>
                                    ( kernel_this,
@@ -1582,8 +1560,8 @@ public:
                                      numberOfStrips,
                                      gridIdx );
         }
-        Devices::Cuda::freeFromDevice( kernel_this );
-        Devices::Cuda::freeFromDevice( kernel_rowLengths );
+        Cuda::freeFromDevice( kernel_this );
+        Cuda::freeFromDevice( kernel_rowLengths );
         TNL_CHECK_CUDA_DEVICE;
 #endif
     }
@@ -1601,16 +1579,16 @@ public:
 #ifdef HAVE_CUDA
         typedef BiEllpackSymmetric< Real, Devices::Cuda, Index > Matrix;
         typedef typename Matrix::IndexType IndexType;
-        Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-        InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-        OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
-        dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+        Matrix* kernel_this = Cuda::passToDevice( matrix );
+        InVector* kernel_inVector = Cuda::passToDevice( inVector );
+        OutVector* kernel_outVector = Cuda::passToDevice( outVector );
+        dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
         const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-        const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+        const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
         for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
         {
             if( gridIdx == cudaGrids - 1 )
-                cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+                cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
             const int sharedMemory = cudaBlockSize.x * sizeof( Real );
             BiEllpackSymmetricVectorProductCuda< Real, Index, StripSize, InVector, OutVector >
                                                <<< cudaGridSize, cudaBlockSize, sharedMemory >>>
@@ -1620,9 +1598,9 @@ public:
                                                  gridIdx,
                                                  matrix.warpSize );
         }
-        Devices::Cuda::freeFromDevice( kernel_this );
-        Devices::Cuda::freeFromDevice( kernel_inVector );
-        Devices::Cuda::freeFromDevice( kernel_outVector );
+        Cuda::freeFromDevice( kernel_this );
+        Cuda::freeFromDevice( kernel_inVector );
+        Cuda::freeFromDevice( kernel_outVector );
         TNL_CHECK_CUDA_DEVICE;
 #endif
     }
diff --git a/src/TNL/Matrices/BiEllpack_impl.h b/src/TNL/Matrices/BiEllpack_impl.h
index 0be6ac4b068111b8dbb424142e16ed19f6de2477..51646152e8e62d8c26fb55a961869b8acef7826e 100644
--- a/src/TNL/Matrices/BiEllpack_impl.h
+++ b/src/TNL/Matrices/BiEllpack_impl.h
@@ -47,28 +47,6 @@ BiEllpack< Real, Device, Index, StripSize >::BiEllpack()
   logWarpSize( 5 )
 {}
 
-template< typename Real,
-	  typename Device,
-	  typename Index,
-	  int StripSize >
-String BiEllpack< Real, Device, Index, StripSize >::getType()
-{
-	return String( "BiEllpack< ") +
-	       String( TNL::getType< Real >() ) +
-	       String( ", " ) +
-	       Device::getDeviceType() +
-	       String( " >" );
-}
-
-template< typename Real,
-	  typename Device,
-	  typename Index,
-	  int StripSize >
-String BiEllpack< Real, Device, Index, StripSize >::getTypeVirtual() const
-{
-    return this->getType();
-}
-
 template< typename Real,
 	  typename Device,
 	  typename Index,
@@ -116,7 +94,7 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
 	//DeviceDependentCode::performRowBubbleSort( *this, rowLengths );
 	//DeviceDependentCode::computeColumnSizes( *this, rowLengths );
 
-	this->groupPointers.template prefixSum< Containers::Algorithms::ScanType::Exclusive >();
+	this->groupPointers.template scan< Algorithms::ScanType::Exclusive >();
 
 	// uncomment to perform structure test
 	//DeviceDependentCode::verifyRowPerm( *this, rowLengths );
@@ -1079,7 +1057,7 @@ void BiEllpack< Real, Device, Index, StripSize >::spmvCuda( const InVector& inVe
     IndexType bisection = this->warpSize;
     IndexType groupBegin = strip * ( this->logWarpSize + 1 );
 
-    Real* temp = Devices::Cuda::getSharedMemory< Real >();
+    Real* temp = Cuda::getSharedMemory< Real >();
     __shared__ Real results[ cudaBlockSize ];
     results[ threadIdx.x ] = 0.0;
     IndexType elementPtr = ( this->groupPointers[ groupBegin ] << this->logWarpSize ) + inWarpIdx;
@@ -1299,7 +1277,7 @@ void BiEllpackVectorProductCuda( const BiEllpack< Real, Devices::Cuda, Index, St
 										  int gridIdx,
 										  const int warpSize )
 {
-	Index globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+	Index globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
 	matrix->spmvCuda( *inVector, *outVector, globalIdx );
 }
 #endif
@@ -1419,7 +1397,7 @@ void performRowBubbleSortCuda( BiEllpack< Real, Devices::Cuda, Index, StripSize
 							   const typename BiEllpack< Real, Devices::Cuda, Index, StripSize >::CompressedRowLengthsVector* rowLengths,
 							   int gridIdx )
 {
-	const Index stripIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
+	const Index stripIdx = gridIdx * Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
 	matrix->performRowBubbleSortCudaKernel( *rowLengths, stripIdx );
 }
 #endif
@@ -1434,7 +1412,7 @@ void computeColumnSizesCuda( BiEllpack< Real, Devices::Cuda, Index, StripSize >*
 							 const Index numberOfStrips,
 							 int gridIdx )
 {
-	const Index stripIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
+	const Index stripIdx = gridIdx * Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
 	matrix->computeColumnSizesCudaKernel( *rowLengths, numberOfStrips, stripIdx );
 }
 #endif
@@ -1538,23 +1516,23 @@ public:
 		Index numberOfStrips = matrix.virtualRows / StripSize;
 		typedef BiEllpack< Real, Devices::Cuda, Index, StripSize > Matrix;
 		typedef typename Matrix::CompressedRowLengthsVector CompressedRowLengthsVector;
-		Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-		CompressedRowLengthsVector* kernel_rowLengths = Devices::Cuda::passToDevice( rowLengths );
-		dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+		Matrix* kernel_this = Cuda::passToDevice( matrix );
+		CompressedRowLengthsVector* kernel_rowLengths = Cuda::passToDevice( rowLengths );
+		dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
 		const Index cudaBlocks = roundUpDivision( numberOfStrips, cudaBlockSize.x );
-		const Index cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+		const Index cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
 		for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
 		{
 		     if( gridIdx == cudaGrids - 1 )
-		         cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+		         cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
 		     performRowBubbleSortCuda< Real, Index, StripSize >
 		     	 	 	 	 	 	 <<< cudaGridSize, cudaBlockSize >>>
 		                             ( kernel_this,
 		                               kernel_rowLengths,
 		                               gridIdx );
 		}
-		Devices::Cuda::freeFromDevice( kernel_this );
-		Devices::Cuda::freeFromDevice( kernel_rowLengths );
+		Cuda::freeFromDevice( kernel_this );
+		Cuda::freeFromDevice( kernel_rowLengths );
 		TNL_CHECK_CUDA_DEVICE;
 #endif
 	}
@@ -1569,15 +1547,15 @@ public:
 		const Index numberOfStrips = matrix.virtualRows / StripSize;
 		typedef BiEllpack< Real, Devices::Cuda, Index, StripSize > Matrix;
 		typedef typename Matrix::CompressedRowLengthsVector CompressedRowLengthsVector;
-		Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-		CompressedRowLengthsVector* kernel_rowLengths = Devices::Cuda::passToDevice( rowLengths );
-		dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+		Matrix* kernel_this = Cuda::passToDevice( matrix );
+		CompressedRowLengthsVector* kernel_rowLengths = Cuda::passToDevice( rowLengths );
+		dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
 		const Index cudaBlocks = roundUpDivision( numberOfStrips, cudaBlockSize.x );
-		const Index cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+		const Index cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
 		for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
 		{
 		     if( gridIdx == cudaGrids - 1 )
-		         cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+		         cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
 		     computeColumnSizesCuda< Real, Index, StripSize >
 		     	 	 	 	 	   <<< cudaGridSize, cudaBlockSize >>>
 		                           ( kernel_this,
@@ -1585,8 +1563,8 @@ public:
 		                             numberOfStrips,
 		                             gridIdx );
         }
-		Devices::Cuda::freeFromDevice( kernel_this );
-		Devices::Cuda::freeFromDevice( kernel_rowLengths );
+		Cuda::freeFromDevice( kernel_this );
+		Cuda::freeFromDevice( kernel_rowLengths );
 		TNL_CHECK_CUDA_DEVICE;
 #endif
 	}
@@ -1604,16 +1582,16 @@ public:
 #ifdef HAVE_CUDA
 		typedef BiEllpack< Real, Devices::Cuda, Index > Matrix;
 		typedef typename Matrix::IndexType IndexType;
-		Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-		InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-		OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
-		dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+		Matrix* kernel_this = Cuda::passToDevice( matrix );
+		InVector* kernel_inVector = Cuda::passToDevice( inVector );
+		OutVector* kernel_outVector = Cuda::passToDevice( outVector );
+		dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
 		const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-		const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+		const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
 		for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
 		{
 			if( gridIdx == cudaGrids - 1 )
-				cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+				cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
 			const int sharedMemory = cudaBlockSize.x * sizeof( Real );
 			BiEllpackVectorProductCuda< Real, Index, StripSize, InVector, OutVector >
 			                                   <<< cudaGridSize, cudaBlockSize, sharedMemory >>>
@@ -1623,9 +1601,9 @@ public:
 			                                     gridIdx,
 			                                     matrix.warpSize );
 		}
-		Devices::Cuda::freeFromDevice( kernel_this );
-		Devices::Cuda::freeFromDevice( kernel_inVector );
-		Devices::Cuda::freeFromDevice( kernel_outVector );
+		Cuda::freeFromDevice( kernel_this );
+		Cuda::freeFromDevice( kernel_inVector );
+		Cuda::freeFromDevice( kernel_outVector );
 		TNL_CHECK_CUDA_DEVICE;
 #endif
     }
diff --git a/src/TNL/Matrices/COOMatrix.h b/src/TNL/Matrices/COOMatrix.h
index e5a4a0fd94634a627cfb0080e0700896f2c98f5c..c5ce76244dcb54b415e38ab57b1fa5e11cbeeab8 100644
--- a/src/TNL/Matrices/COOMatrix.h
+++ b/src/TNL/Matrices/COOMatrix.h
@@ -35,14 +35,13 @@ public:
 	typedef Index IndexType;
 	typedef typename Sparse< RealType, DeviceType, IndexType >:: CompressedRowLengthsVector CompressedRowLengthsVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
-	typedef COOMatrix< Real, Devices::Host, Index > HostType;
-	typedef COOMatrix< Real, Devices::Cuda, Index > CudaType;
 
-	COOMatrix();
-
-	static String getType();
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = COOMatrix< _Real, _Device, _Index >;
 
-	String getTypeVirtual() const;
+	COOMatrix();
 
 	bool setDimensions(const IndexType rows,
 			   	   	   const IndexType columns);
diff --git a/src/TNL/Matrices/COOMatrix_impl.h b/src/TNL/Matrices/COOMatrix_impl.h
index 090ccd1180349e41e02093ed740aef79b049b14c..bbdd36002ee4af0ca59da81815ed2527c0c0c828 100644
--- a/src/TNL/Matrices/COOMatrix_impl.h
+++ b/src/TNL/Matrices/COOMatrix_impl.h
@@ -12,7 +12,7 @@
 
 #include <TNL/Matrices/COOMatrix.h>
 #include <TNL/Math.h>
-#include <TNL/param-types.h>
+#include <TNL/TypeInfo.h>
 
 namespace TNL {
 namespace Matrices {
@@ -27,26 +27,6 @@ COOMatrix< Real, Device, Index >::COOMatrix()
 {
 };
 
-template< typename Real,
-	  	  typename Device,
-	  	  typename Index >
-String COOMatrix< Real, Device, Index >::getType()
-{
-	return String("COOMatrix< ") +
-  	 	   String(TNL::getType< Real>()) +
-		   String(", ") +
-		   Device::getDeviceType() +
-		   String(" >");
-}
-
-template< typename Real,
-	  	  typename Device,
-	  	  typename Index >
-String COOMatrix< Real, Device, Index >::getTypeVirtual() const
-{
-	return this->getType();
-}
-
 template< typename Real,
 		  typename Device,
 		  typename Index >
diff --git a/src/TNL/Matrices/CSR.h b/src/TNL/Matrices/CSR.h
index 64e202a67716a9710d65505c6c81d7e6069ce9a9..485176d1d849b4be2c296a0f131f5ee2299f89f2 100644
--- a/src/TNL/Matrices/CSR.h
+++ b/src/TNL/Matrices/CSR.h
@@ -49,20 +49,19 @@ public:
    using IndexType = Index;
    typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
-   typedef CSR< Real, Devices::Host, Index > HostType;
-   typedef CSR< Real, Devices::Cuda, Index > CudaType;
    typedef Sparse< Real, Device, Index > BaseType;
    using MatrixRow = typename BaseType::MatrixRow;
    using ConstMatrixRow = typename BaseType::ConstMatrixRow;
 
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = CSR< _Real, _Device, _Index >;
+
    enum SPMVCudaKernel { scalar, vector, hybrid };
 
    CSR();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    static String getSerializationType();
 
    virtual String getSerializationTypeVirtual() const;
diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/CSR_impl.h
index 74ff682fdaaa94d0b32f8b02375f0b9678f21307..327d250028acca4349495bd663340f999f55024e 100644
--- a/src/TNL/Matrices/CSR_impl.h
+++ b/src/TNL/Matrices/CSR_impl.h
@@ -33,7 +33,7 @@ template< typename Real,
           typename Index >
 CSR< Real, Device, Index >::CSR()
 : spmvCudaKernel( hybrid ),
-  cudaWarpSize( 32 ), //Devices::Cuda::getWarpSize() )
+  cudaWarpSize( 32 ), //Cuda::getWarpSize() )
   hybridModeSplit( 4 )
 {
 };
@@ -41,31 +41,15 @@ CSR< Real, Device, Index >::CSR()
 template< typename Real,
           typename Device,
           typename Index >
-String CSR< Real, Device, Index >::getType()
+String CSR< Real, Device, Index >::getSerializationType()
 {
    return String( "Matrices::CSR< ") +
-          String( TNL::getType< Real>() ) +
+          TNL::getType< Real>() +
           String( ", " ) +
-          Device :: getDeviceType() +
+          getType< Devices::Host >() +
           String( " >" );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index >
-String CSR< Real, Device, Index >::getTypeVirtual() const
-{
-   return this->getType();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String CSR< Real, Device, Index >::getSerializationType()
-{
-   return HostType::getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index >
@@ -104,7 +88,7 @@ void CSR< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLeng
    rowPtrs.bind( this->rowPointers.getData(), this->getRows() );
    rowPtrs = rowLengths;
    this->rowPointers.setElement( this->rows, 0 );
-   this->rowPointers.template prefixSum< Containers::Algorithms::ScanType::Exclusive >();
+   this->rowPointers.template scan< Algorithms::ScanType::Exclusive >();
    this->maxRowLength = max( rowLengths );
 
    /****
@@ -161,16 +145,16 @@ Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) con
 //       //  (gdb) p rowPointers.getElement(0)
 //       //    Attempt to take address of value not located in memory.
 //       IndexType resultHost ( 0 );
-//       IndexType* resultCuda = Devices::Cuda::passToDevice( resultHost );
+//       IndexType* resultCuda = Cuda::passToDevice( resultHost );
 //       // PROBLEM: If the second parameter of getNonZeroRowLengthCudaKernel is '&resultCuda', the following issue is thrown:
 //       //          'error: no instance of function template "TNL::Matrices::getNonZeroRowLengthCudaKernel" matches the argument list'
 //       TNL::Matrices::getNonZeroRowLengthCudaKernel< ConstMatrixRow, IndexType ><<< 1, 1 >>>( matrixRow, resultCuda ); // matrixRow works fine, tested them both separately
 //       delete []cols;
 //       delete []vals;
 //       std::cout << "Checkpoint BEFORE passFromDevice" << std::endl;
-//       resultHost = Devices::Cuda::passFromDevice( resultCuda ); // This causes a crash: Illegal memory address in Cuda_impl.h at TNL_CHECK_CUDA_DEVICE
+//       resultHost = Cuda::passFromDevice( resultCuda ); // This causes a crash: Illegal memory address in Cuda_impl.h at TNL_CHECK_CUDA_DEVICE
 //       std::cout << "Checkpoint AFTER passFromDevice" << std::endl;
-//       Devices::Cuda::freeFromDevice( resultCuda );
+//       Cuda::freeFromDevice( resultCuda );
 //       return resultHost;
 //   }
 }
@@ -729,7 +713,7 @@ void CSR< Real, Device, Index >::spmvCudaVectorized( const InVector& inVector,
                                                               const IndexType warpEnd,
                                                               const IndexType inWarpIdx ) const
 {
-   volatile Real* aux = Devices::Cuda::getSharedMemory< Real >();
+   volatile Real* aux = Cuda::getSharedMemory< Real >();
    for( IndexType row = warpStart; row < warpEnd; row++ )
    {
       aux[ threadIdx.x ] = 0.0;
@@ -769,7 +753,7 @@ void CSR< Real, Device, Index >::vectorProductCuda( const InVector& inVector,
                                                              OutVector& outVector,
                                                              int gridIdx ) const
 {
-   IndexType globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   IndexType globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    const IndexType warpStart = warpSize * ( globalIdx / warpSize );
    const IndexType warpEnd = min( warpStart + warpSize, this->getRows() );
    const IndexType inWarpIdx = globalIdx % warpSize;
@@ -780,7 +764,7 @@ void CSR< Real, Device, Index >::vectorProductCuda( const InVector& inVector,
    /****
     * Hybrid mode
     */
-   const Index firstRow = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x;
+   const Index firstRow = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x;
    const IndexType lastRow = min( this->getRows(), firstRow + blockDim. x );
    const IndexType nonzerosPerRow = ( this->rowPointers[ lastRow ] - this->rowPointers[ firstRow ] ) /
                                     ( lastRow - firstRow );
@@ -831,38 +815,6 @@ class CSRDeviceDependentCode< Devices::Host >
 
 };
 
-#ifdef HAVE_MIC
-template<>
-class CSRDeviceDependentCode< Devices::MIC >
-{
-   public:
-
-      typedef Devices::MIC Device;
-
-      template< typename Real,
-                typename Index,
-                typename InVector,
-                typename OutVector >
-      static void vectorProduct( const CSR< Real, Device, Index >& matrix,      
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-      {
-         throw Exceptions::NotImplementedError("CSRDeviceDependentCode is not implemented for MIC.");
-      }
-  /*       const Index rows = matrix.getRows();
-         const tnlCSRMatrix< Real, Device, Index >* matrixPtr = &matrix;
-         const InVector* inVectorPtr = &inVector;
-         OutVector* outVectorPtr = &outVector;
-#ifdef HAVE_OPENMP
-#pragma omp parallel for firstprivate( matrixPtr, inVectorPtr, outVectorPtr ), schedule(static ), if( Devices::Host::isOMPEnabled() )
-#endif         
-         for( Index row = 0; row < rows; row ++ )
-            ( *outVectorPtr )[ row ] = matrixPtr->rowVectorProduct( row, *inVectorPtr );
-      }*/
-
-};
-#endif
-
 #ifdef HAVE_CUDA
 template< typename Real,
           typename Index,
@@ -876,7 +828,7 @@ __global__ void CSRVectorProductCudaKernel( const CSR< Real, Devices::Cuda, Inde
 {
    typedef CSR< Real, Devices::Cuda, Index > Matrix;
    static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "" );
-   const typename Matrix::IndexType rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   const typename Matrix::IndexType rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    if( matrix->getCudaKernelType() == Matrix::scalar )
    {
       if( rowIdx < matrix->getRows() )
@@ -902,17 +854,17 @@ void CSRVectorProductCuda( const CSR< Real, Devices::Cuda, Index >& matrix,
 #ifdef HAVE_CUDA
    typedef CSR< Real, Devices::Cuda, Index > Matrix;
    typedef typename Matrix::IndexType IndexType;
-   Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-   InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-   OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
+   Matrix* kernel_this = Cuda::passToDevice( matrix );
+   InVector* kernel_inVector = Cuda::passToDevice( inVector );
+   OutVector* kernel_outVector = Cuda::passToDevice( outVector );
    TNL_CHECK_CUDA_DEVICE;
-   dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+   dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
    const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-   const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+   const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
    for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
    {
       if( gridIdx == cudaGrids - 1 )
-         cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+         cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
       const int sharedMemory = cudaBlockSize.x * sizeof( Real );
       if( matrix.getCudaWarpSize() == 32 )
          CSRVectorProductCudaKernel< Real, Index, InVector, OutVector, 32 >
@@ -959,9 +911,9 @@ void CSRVectorProductCuda( const CSR< Real, Devices::Cuda, Index >& matrix,
 
    }
    TNL_CHECK_CUDA_DEVICE;
-   Devices::Cuda::freeFromDevice( kernel_this );
-   Devices::Cuda::freeFromDevice( kernel_inVector );
-   Devices::Cuda::freeFromDevice( kernel_outVector );
+   Cuda::freeFromDevice( kernel_this );
+   Cuda::freeFromDevice( kernel_inVector );
+   Cuda::freeFromDevice( kernel_outVector );
    TNL_CHECK_CUDA_DEVICE;
 #endif
 }
diff --git a/src/TNL/Matrices/ChunkedEllpack.h b/src/TNL/Matrices/ChunkedEllpack.h
index a6f06e79745d02f308604fd2bcba46fbeeed497c..a66e1283ab6b8df5e763de35e6a6ac2c14a70bf9 100644
--- a/src/TNL/Matrices/ChunkedEllpack.h
+++ b/src/TNL/Matrices/ChunkedEllpack.h
@@ -44,9 +44,6 @@ struct tnlChunkedEllpackSliceInfo
    IndexType chunkSize;
    IndexType firstRow;
    IndexType pointer;
-
-   static inline String getType()
-   { return String( "tnlChunkedEllpackSliceInfo" ); };
 };
 
 #ifdef HAVE_CUDA
@@ -78,17 +75,16 @@ public:
    typedef tnlChunkedEllpackSliceInfo< IndexType > ChunkedEllpackSliceInfo;
    typedef typename Sparse< RealType, DeviceType, IndexType >:: CompressedRowLengthsVector CompressedRowLengthsVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
-   typedef ChunkedEllpack< Real, Devices::Host, Index > HostType;
-   typedef ChunkedEllpack< Real, Devices::Cuda, Index > CudaType;
    typedef Sparse< Real, Device, Index > BaseType;
    typedef typename BaseType::MatrixRow MatrixRow;
    typedef SparseRow< const RealType, const IndexType > ConstMatrixRow;
 
-   ChunkedEllpack();
-
-   static String getType();
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = ChunkedEllpack< _Real, _Device, _Index >;
 
-   String getTypeVirtual() const;
+   ChunkedEllpack();
 
    static String getSerializationType();
 
diff --git a/src/TNL/Matrices/ChunkedEllpack_impl.h b/src/TNL/Matrices/ChunkedEllpack_impl.h
index 6106ba2cdb0ba474ab9ab80b1b91fd837d33a033..48119c659163d9f57bb3feefe58210f48666b224 100644
--- a/src/TNL/Matrices/ChunkedEllpack_impl.h
+++ b/src/TNL/Matrices/ChunkedEllpack_impl.h
@@ -39,31 +39,15 @@ ChunkedEllpack< Real, Device, Index >::ChunkedEllpack()
 template< typename Real,
           typename Device,
           typename Index >
-String ChunkedEllpack< Real, Device, Index >::getType()
+String ChunkedEllpack< Real, Device, Index >::getSerializationType()
 {
    return String( "Matrices::ChunkedEllpack< ") +
-          String( TNL::getType< Real >() ) +
+          getType< Real >() +
           String( ", " ) +
-          Device :: getDeviceType() +
+          getType< Device >() +
           String( " >" );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index >
-String ChunkedEllpack< Real, Device, Index >::getTypeVirtual() const
-{
-   return this->getType();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String ChunkedEllpack< Real, Device, Index >::getSerializationType()
-{
-   return getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index >
@@ -248,7 +232,7 @@ void ChunkedEllpack< Real, Device, Index >::setCompressedRowLengths( ConstCompre
       this->rowPointers.setElement( 0, 0 );
       for( IndexType sliceIndex = 0; sliceIndex < numberOfSlices; sliceIndex++ )
          this->setSlice( rowLengths, sliceIndex, elementsToAllocation );
-      this->rowPointers.prefixSum();
+      this->rowPointers.scan();
    }
    
 //   std::cout << "\ngetRowLength after first if: " << std::endl;
@@ -314,7 +298,7 @@ template< typename Real,
 Index ChunkedEllpack< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const
 {
     ConstMatrixRow matrixRow = getRow( row );
-    return matrixRow.getNonZeroElementsCount( Device::getDeviceType() );
+    return matrixRow.getNonZeroElementsCount( getType< Device >() );
     
 //    IndexType elementCount ( 0 );
 //    ConstMatrixRow matrixRow = this->getRow( row );
@@ -1139,7 +1123,7 @@ __device__ void ChunkedEllpack< Real, Device, Index >::computeSliceVectorProduct
 {
    static_assert( std::is_same < DeviceType, Devices::Cuda >::value, "" );
 
-   RealType* chunkProducts = Devices::Cuda::getSharedMemory< RealType >();
+   RealType* chunkProducts = Cuda::getSharedMemory< RealType >();
    ChunkedEllpackSliceInfo* sliceInfo = ( ChunkedEllpackSliceInfo* ) & chunkProducts[ blockDim.x ];
 
    if( threadIdx.x == 0 )
@@ -1349,7 +1333,7 @@ void ChunkedEllpack< Real, Device, Index >::printStructure( std::ostream& str,
                                                                      const String& name ) const
 {
    const IndexType numberOfSlices = this->getNumberOfSlices();
-   str << "Matrix type: " << getType() << std::endl
+   str << "Matrix type: " << getType( *this ) << std::endl
        << "Marix name: " << name << std::endl
        << "Rows: " << this->getRows() << std::endl
        << "Columns: " << this->getColumns() << std::endl
@@ -1419,7 +1403,7 @@ __global__ void ChunkedEllpackVectorProductCudaKernel( const ChunkedEllpack< Rea
                                                                 OutVector* outVector,
                                                                 int gridIdx )
 {
-   const Index sliceIdx = gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x;
+   const Index sliceIdx = gridIdx * Cuda::getMaxGridSize() + blockIdx.x;
    if( sliceIdx < matrix->getNumberOfSlices() )
       matrix->computeSliceVectorProduct( inVector, outVector, sliceIdx );
 
@@ -1472,19 +1456,19 @@ class ChunkedEllpackDeviceDependentCode< Devices::Cuda >
             typedef ChunkedEllpack< Real, Devices::Cuda, Index > Matrix;
             typedef Index IndexType;
             typedef Real RealType;
-            Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-            InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-            OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
+            Matrix* kernel_this = Cuda::passToDevice( matrix );
+            InVector* kernel_inVector = Cuda::passToDevice( inVector );
+            OutVector* kernel_outVector = Cuda::passToDevice( outVector );
             dim3 cudaBlockSize( matrix.getNumberOfChunksInSlice() ),
-                 cudaGridSize( Devices::Cuda::getMaxGridSize() );
+                 cudaGridSize( Cuda::getMaxGridSize() );
             const IndexType cudaBlocks = matrix.getNumberOfSlices();
-            const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+            const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
             const IndexType sharedMemory = cudaBlockSize.x * sizeof( RealType ) +
                                            sizeof( tnlChunkedEllpackSliceInfo< IndexType > );
             for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
             {
                if( gridIdx == cudaGrids - 1 )
-                  cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+                  cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
                ChunkedEllpackVectorProductCudaKernel< Real, Index, InVector, OutVector >
                                                              <<< cudaGridSize, cudaBlockSize, sharedMemory  >>>
                                                              ( kernel_this,
@@ -1492,9 +1476,9 @@ class ChunkedEllpackDeviceDependentCode< Devices::Cuda >
                                                                kernel_outVector,
                                                                gridIdx );
             }
-            Devices::Cuda::freeFromDevice( kernel_this );
-            Devices::Cuda::freeFromDevice( kernel_inVector );
-            Devices::Cuda::freeFromDevice( kernel_outVector );
+            Cuda::freeFromDevice( kernel_this );
+            Cuda::freeFromDevice( kernel_inVector );
+            Cuda::freeFromDevice( kernel_outVector );
             TNL_CHECK_CUDA_DEVICE;
          #endif
       }
diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index 8c21e33b0f02300c7223ce420125f91bc064699d..c469927234cd835bef7bcfe36599a47cb843b6cc 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -41,18 +41,16 @@ public:
    typedef Index IndexType;
    typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector;
    typedef typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
-   typedef Dense< Real, Devices::Host, Index > HostType;
-   typedef Dense< Real, Devices::Cuda, Index > CudaType;
    typedef Matrix< Real, Device, Index > BaseType;
    typedef DenseRow< Real, Index > MatrixRow;
 
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = Dense< _Real, _Device, _Index >;
 
    Dense();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    static String getSerializationType();
 
    virtual String getSerializationTypeVirtual() const;
diff --git a/src/TNL/Matrices/Dense_impl.h b/src/TNL/Matrices/Dense_impl.h
index 5b55dbc292b76e63d28c6039ff6f552f89a05183..246bd09edb459e6df9749af9d1589f508c2c5806 100644
--- a/src/TNL/Matrices/Dense_impl.h
+++ b/src/TNL/Matrices/Dense_impl.h
@@ -24,31 +24,15 @@ Dense< Real, Device, Index >::Dense()
 {
 }
 
-template< typename Real,
-          typename Device,
-          typename Index >
-String Dense< Real, Device, Index >::getType()
-{
-   return String( "Matrices::Dense< " ) +
-          String( TNL::getType< RealType >() ) + ", " +
-          String( Device :: getDeviceType() ) + ", " +
-          String( TNL::getType< IndexType >() ) + " >";
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Dense< Real, Device, Index >::getTypeVirtual() const
-{
-   return this->getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index >
 String Dense< Real, Device, Index >::getSerializationType()
 {
-   return getType();
+   return String( "Matrices::Dense< " ) +
+          getType< RealType >() + ", " +
+          getType< Device >() + ", " +
+          getType< IndexType >() + " >";
 }
 
 template< typename Real,
@@ -602,20 +586,20 @@ void Dense< Real, Device, Index >::getMatrixProduct( const Matrix1& matrix1,
       const IndexType cudaBlockRows( matrixProductCudaBlockSize / tileDim );
       cudaBlockSize.x = cudaBlockColumns;
       cudaBlockSize.y = cudaBlockRows;
-      const IndexType rowGrids = roundUpDivision( rowTiles, Devices::Cuda::getMaxGridSize() );
-      const IndexType columnGrids = roundUpDivision( columnTiles, Devices::Cuda::getMaxGridSize() );
+      const IndexType rowGrids = roundUpDivision( rowTiles, Cuda::getMaxGridSize() );
+      const IndexType columnGrids = roundUpDivision( columnTiles, Cuda::getMaxGridSize() );
 
       for( IndexType gridIdx_x = 0; gridIdx_x < columnGrids; gridIdx_x++ )
          for( IndexType gridIdx_y = 0; gridIdx_y < rowGrids; gridIdx_y++ )
          {
-            cudaGridSize.x = cudaGridSize.y = Devices::Cuda::getMaxGridSize();
+            cudaGridSize.x = cudaGridSize.y = Cuda::getMaxGridSize();
             if( gridIdx_x == columnGrids - 1 )
-               cudaGridSize.x = columnTiles % Devices::Cuda::getMaxGridSize();
+               cudaGridSize.x = columnTiles % Cuda::getMaxGridSize();
             if( gridIdx_y == rowGrids - 1 )
-               cudaGridSize.y = rowTiles % Devices::Cuda::getMaxGridSize();
-            Dense* this_kernel = Devices::Cuda::passToDevice( *this );
-            Matrix1* matrix1_kernel = Devices::Cuda::passToDevice( matrix1 );
-            Matrix2* matrix2_kernel = Devices::Cuda::passToDevice( matrix2 );
+               cudaGridSize.y = rowTiles % Cuda::getMaxGridSize();
+            Dense* this_kernel = Cuda::passToDevice( *this );
+            Matrix1* matrix1_kernel = Cuda::passToDevice( matrix1 );
+            Matrix2* matrix2_kernel = Cuda::passToDevice( matrix2 );
             DenseMatrixProductKernel< Real,
                                                Index,
                                                Matrix1,
@@ -632,9 +616,9 @@ void Dense< Real, Device, Index >::getMatrixProduct( const Matrix1& matrix1,
                                                matrix2Multiplicator,
                                                gridIdx_x,
                                                gridIdx_y );
-            Devices::Cuda::freeFromDevice( this_kernel );
-            Devices::Cuda::freeFromDevice( matrix1_kernel );
-            Devices::Cuda::freeFromDevice( matrix2_kernel );
+            Cuda::freeFromDevice( this_kernel );
+            Cuda::freeFromDevice( matrix1_kernel );
+            Cuda::freeFromDevice( matrix2_kernel );
          }
 #endif
    }
@@ -685,7 +669,7 @@ __global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Ind
         rowBlock < tileDim;
         rowBlock += tileRowBlockSize )
    {
-      tile[ Devices::Cuda::getInterleaving( threadIdx.x*tileDim +  threadIdx.y + rowBlock ) ] =
+      tile[ Cuda::getInterleaving( threadIdx.x*tileDim +  threadIdx.y + rowBlock ) ] =
                inputMatrix->getElementFast( readColumnPosition,
                                             readRowPosition + rowBlock );
    }
@@ -704,7 +688,7 @@ __global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Ind
    {
       resultMatrix->setElementFast( writeColumnPosition,
                                     writeRowPosition + rowBlock,
-                                    matrixMultiplicator * tile[ Devices::Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] );
+                                    matrixMultiplicator * tile[ Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] );
 
    }
 
@@ -757,7 +741,7 @@ __global__ void DenseTranspositionNonAlignedKernel( Dense< Real, Devices::Cuda,
            rowBlock += tileRowBlockSize )
       {
          if( readRowPosition + rowBlock < rows )
-            tile[ Devices::Cuda::getInterleaving( threadIdx.x*tileDim +  threadIdx.y + rowBlock ) ] =
+            tile[ Cuda::getInterleaving( threadIdx.x*tileDim +  threadIdx.y + rowBlock ) ] =
                inputMatrix->getElementFast( readColumnPosition,
                                             readRowPosition + rowBlock );
       }
@@ -781,7 +765,7 @@ __global__ void DenseTranspositionNonAlignedKernel( Dense< Real, Devices::Cuda,
          if( writeRowPosition + rowBlock < columns )
             resultMatrix->setElementFast( writeColumnPosition,
                                           writeRowPosition + rowBlock,
-                                          matrixMultiplicator * tile[ Devices::Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] );
+                                          matrixMultiplicator * tile[ Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] );
       }
    }
 
@@ -825,21 +809,21 @@ void Dense< Real, Device, Index >::getTransposition( const Matrix& matrix,
       const IndexType cudaBlockRows( matrixProductCudaBlockSize / tileDim );
       cudaBlockSize.x = cudaBlockColumns;
       cudaBlockSize.y = cudaBlockRows;
-      const IndexType rowGrids = roundUpDivision( rowTiles, Devices::Cuda::getMaxGridSize() );
-      const IndexType columnGrids = roundUpDivision( columnTiles, Devices::Cuda::getMaxGridSize() );
-      const IndexType sharedMemorySize = tileDim*tileDim + tileDim*tileDim/Devices::Cuda::getNumberOfSharedMemoryBanks();
+      const IndexType rowGrids = roundUpDivision( rowTiles, Cuda::getMaxGridSize() );
+      const IndexType columnGrids = roundUpDivision( columnTiles, Cuda::getMaxGridSize() );
+      const IndexType sharedMemorySize = tileDim*tileDim + tileDim*tileDim/Cuda::getNumberOfSharedMemoryBanks();
 
-      Dense* this_device = Devices::Cuda::passToDevice( *this );
-      Matrix* matrix_device = Devices::Cuda::passToDevice( matrix );
+      Dense* this_device = Cuda::passToDevice( *this );
+      Matrix* matrix_device = Cuda::passToDevice( matrix );
 
       for( IndexType gridIdx_x = 0; gridIdx_x < columnGrids; gridIdx_x++ )
          for( IndexType gridIdx_y = 0; gridIdx_y < rowGrids; gridIdx_y++ )
          {
-            cudaGridSize.x = cudaGridSize.y = Devices::Cuda::getMaxGridSize();
+            cudaGridSize.x = cudaGridSize.y = Cuda::getMaxGridSize();
             if( gridIdx_x == columnGrids - 1)
-               cudaGridSize.x = columnTiles % Devices::Cuda::getMaxGridSize();
+               cudaGridSize.x = columnTiles % Cuda::getMaxGridSize();
             if( gridIdx_y == rowGrids - 1 )
-               cudaGridSize.y = rowTiles % Devices::Cuda::getMaxGridSize();
+               cudaGridSize.y = rowTiles % Cuda::getMaxGridSize();
             if( ( gridIdx_x < columnGrids - 1 || matrix.getColumns() % tileDim == 0 ) &&
                 ( gridIdx_y < rowGrids - 1 || matrix.getRows() % tileDim == 0 ) )
             {
@@ -875,8 +859,8 @@ void Dense< Real, Device, Index >::getTransposition( const Matrix& matrix,
             }
             TNL_CHECK_CUDA_DEVICE;
          }
-      Devices::Cuda::freeFromDevice( this_device );
-      Devices::Cuda::freeFromDevice( matrix_device );
+      Cuda::freeFromDevice( this_device );
+      Cuda::freeFromDevice( matrix_device );
 #endif
    }
 }
diff --git a/src/TNL/Matrices/DistributedMatrix.h b/src/TNL/Matrices/DistributedMatrix.h
index 72586dbb3814b95d08cf746c4e32d5704185db2f..76b6ea8c1d5173ee8d0cd85421d919085fe590e5 100644
--- a/src/TNL/Matrices/DistributedMatrix.h
+++ b/src/TNL/Matrices/DistributedMatrix.h
@@ -54,14 +54,17 @@ public:
    using CommunicatorType = Communicator;
    using LocalRangeType = Containers::Subrange< typename Matrix::IndexType >;
 
-   using HostType = DistributedMatrix< typename Matrix::HostType, Communicator >;
-   using CudaType = DistributedMatrix< typename Matrix::CudaType, Communicator >;
-
    using CompressedRowLengthsVector = Containers::DistributedVector< IndexType, DeviceType, IndexType, CommunicatorType >;
 
    using MatrixRow = Matrices::SparseRow< RealType, IndexType >;
    using ConstMatrixRow = Matrices::SparseRow< std::add_const_t< RealType >, std::add_const_t< IndexType > >;
 
+   template< typename _Real = RealType,
+             typename _Device = DeviceType,
+             typename _Index = IndexType,
+             typename _Communicator = Communicator >
+   using Self = DistributedMatrix< typename MatrixType::template Self< _Real, _Device, _Index >, _Communicator >;
+
    DistributedMatrix() = default;
 
    DistributedMatrix( DistributedMatrix& ) = default;
@@ -80,13 +83,6 @@ public:
    const Matrix& getLocalMatrix() const;
 
 
-   static String getType();
-
-   virtual String getTypeVirtual() const;
-
-   // TODO: no getSerializationType method until there is support for serialization
-
-
    /*
     * Some common Matrix methods follow below.
     */
diff --git a/src/TNL/Matrices/DistributedMatrix_impl.h b/src/TNL/Matrices/DistributedMatrix_impl.h
index 33eeef26458cf1c69a744578400f83dc23782466..c1a13a713391f4231b41191fbedb5aa1cb4050c7 100644
--- a/src/TNL/Matrices/DistributedMatrix_impl.h
+++ b/src/TNL/Matrices/DistributedMatrix_impl.h
@@ -71,28 +71,6 @@ getLocalMatrix() const
 }
 
 
-template< typename Matrix,
-          typename Communicator >
-String
-DistributedMatrix< Matrix, Communicator >::
-getType()
-{
-   return String( "Matrices::DistributedMatrix< " ) +
-          Matrix::getType() + ", " +
-          // TODO: communicators don't have a getType method
-          "<Communicator>" + " >";
-}
-
-template< typename Matrix,
-          typename Communicator >
-String
-DistributedMatrix< Matrix, Communicator >::
-getTypeVirtual() const
-{
-   return getType();
-}
-
-
 /*
  * Some common Matrix methods follow below.
  */
diff --git a/src/TNL/Matrices/DistributedSpMV.h b/src/TNL/Matrices/DistributedSpMV.h
index 0886d686c3947c5d57d022e46beb21cb9ca01f3d..b2abd13c537dc181de638caec4b6adf06755b2bf 100644
--- a/src/TNL/Matrices/DistributedSpMV.h
+++ b/src/TNL/Matrices/DistributedSpMV.h
@@ -27,7 +27,7 @@
 // operations
 #include <type_traits>  // std::add_const
 #include <TNL/Atomic.h>
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Pointers/DevicePointer.h>
 
 namespace TNL {
@@ -105,13 +105,13 @@ public:
             local_span[1].fetch_min( i );
       };
 
-      ParallelFor< DeviceType >::exec( (IndexType) 0, localMatrix.getRows(),
-                                       kernel,
-                                       &localMatrixPointer.template getData< DeviceType >(),
-                                       span_starts.getData(),
-                                       span_ends.getData(),
-                                       local_span.getData()
-                                    );
+      Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, localMatrix.getRows(),
+                                                   kernel,
+                                                   &localMatrixPointer.template getData< DeviceType >(),
+                                                   span_starts.getData(),
+                                                   span_ends.getData(),
+                                                   local_span.getData()
+                                                );
 
       // set the local-only span (optimization for banded matrices)
       localOnlySpan.first = local_span.getElement( 0 );
@@ -192,8 +192,8 @@ public:
          {
             outVectorView[ i ] = localMatrix->rowVectorProduct( i, globalBufferView );
          };
-         ParallelFor< DeviceType >::exec( (IndexType) 0, localMatrix.getRows(), kernel,
-                                          &localMatrixPointer.template getData< DeviceType >() );
+         Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, localMatrix.getRows(), kernel,
+                                                      &localMatrixPointer.template getData< DeviceType >() );
       }
       // optimization for banded matrices
       else {
@@ -206,8 +206,8 @@ public:
          {
             outVectorView[ i ] = localMatrix->rowVectorProduct( i, inView );
          };
-         ParallelFor< DeviceType >::exec( localOnlySpan.first, localOnlySpan.second, kernel1,
-                                          &localMatrixPointer.template getData< DeviceType >() );
+         Algorithms::ParallelFor< DeviceType >::exec( localOnlySpan.first, localOnlySpan.second, kernel1,
+                                                      &localMatrixPointer.template getData< DeviceType >() );
 
          // wait for all communications to finish
          CommunicatorType::WaitAll( &commRequests[0], commRequests.size() );
@@ -217,10 +217,10 @@ public:
          {
             outVectorView[ i ] = localMatrix->rowVectorProduct( i, globalBufferView );
          };
-         ParallelFor< DeviceType >::exec( (IndexType) 0, localOnlySpan.first, kernel2,
-                                          &localMatrixPointer.template getData< DeviceType >() );
-         ParallelFor< DeviceType >::exec( localOnlySpan.second, localMatrix.getRows(), kernel2,
-                                          &localMatrixPointer.template getData< DeviceType >() );
+         Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, localOnlySpan.first, kernel2,
+                                                      &localMatrixPointer.template getData< DeviceType >() );
+         Algorithms::ParallelFor< DeviceType >::exec( localOnlySpan.second, localMatrix.getRows(), kernel2,
+                                                      &localMatrixPointer.template getData< DeviceType >() );
       }
    }
 
diff --git a/src/TNL/Matrices/Ellpack.h b/src/TNL/Matrices/Ellpack.h
index e2479fd54349a93424a1d2e67a6fc42a1d0a6eff..6536f5f6ca6ffa7869851e2ad0883c51de83ed28 100644
--- a/src/TNL/Matrices/Ellpack.h
+++ b/src/TNL/Matrices/Ellpack.h
@@ -39,17 +39,16 @@ public:
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
-   typedef Ellpack< Real, Devices::Host, Index > HostType;
-   typedef Ellpack< Real, Devices::Cuda, Index > CudaType;
    typedef Sparse< Real, Device, Index > BaseType;
    typedef typename BaseType::MatrixRow MatrixRow;
    typedef SparseRow< const RealType, const IndexType > ConstMatrixRow;
 
-   Ellpack();
-
-   static String getType();
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = Ellpack< _Real, _Device, _Index >;
 
-   String getTypeVirtual() const;
+   Ellpack();
 
    static String getSerializationType();
 
diff --git a/src/TNL/Matrices/EllpackSymmetric.h b/src/TNL/Matrices/EllpackSymmetric.h
index 323772551aac67668cca60ea69fa6651f24dbe1c..d92fc77ee25f203adad1470de46d17552047a290 100644
--- a/src/TNL/Matrices/EllpackSymmetric.h
+++ b/src/TNL/Matrices/EllpackSymmetric.h
@@ -31,16 +31,14 @@ class EllpackSymmetric : public Sparse< Real, Device, Index >
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
-   typedef EllpackSymmetric< Real, Devices::Host, Index > HostType;
-   typedef EllpackSymmetric< Real, Devices::Cuda, Index > CudaType;
 
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = EllpackSymmetric< _Real, _Device, _Index >;
 
    EllpackSymmetric();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    void setDimensions( const IndexType rows,
                        const IndexType columns );
 
diff --git a/src/TNL/Matrices/EllpackSymmetricGraph.h b/src/TNL/Matrices/EllpackSymmetricGraph.h
index 4c56a8716b5b9ba1612a52750063a42120223682..03e3298557171cd2faaed57b698819af0c87b7d2 100644
--- a/src/TNL/Matrices/EllpackSymmetricGraph.h
+++ b/src/TNL/Matrices/EllpackSymmetricGraph.h
@@ -31,16 +31,14 @@ class EllpackSymmetricGraph : public Sparse< Real, Device, Index >
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
-   typedef EllpackSymmetricGraph< Real, Devices::Host, Index > HostType;
-   typedef EllpackSymmetricGraph< Real, Devices::Cuda, Index > CudaType;
 
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = EllpackSymmetricGraph< _Real, _Device, _Index >;
 
    EllpackSymmetricGraph();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    void setDimensions( const IndexType rows,
                        const IndexType columns );
 
diff --git a/src/TNL/Matrices/EllpackSymmetricGraph_impl.h b/src/TNL/Matrices/EllpackSymmetricGraph_impl.h
index 6304d5f9dbc4753b3f545919c8039b182928251a..b949292c5f1664562525a4ead8ca17b2ad9f343b 100644
--- a/src/TNL/Matrices/EllpackSymmetricGraph_impl.h
+++ b/src/TNL/Matrices/EllpackSymmetricGraph_impl.h
@@ -42,26 +42,6 @@ Index EllpackSymmetricGraph< Real, Device, Index >::getAlignedRows() const
     return this->alignedRows;
 }
 
-template< typename Real,
-          typename Device,
-          typename Index >
-String EllpackSymmetricGraph< Real, Device, Index > :: getType()
-{
-   return String( "EllpackSymmetricGraph< ") +
-          String( TNL::getType< Real >() ) +
-          String( ", " ) +
-          Device::getDeviceType() +
-          String( " >" );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String EllpackSymmetricGraph< Real, Device, Index >::getTypeVirtual() const
-{
-   return this->getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index >
@@ -74,7 +54,7 @@ void EllpackSymmetricGraph< Real, Device, Index >::setDimensions( const IndexTyp
    this->rows = rows;
    this->columns = columns;   
    if( std::is_same< DeviceType, Devices::Cuda >::value )
-      this->alignedRows = roundToMultiple( columns, Devices::Cuda::getWarpSize() );
+      this->alignedRows = roundToMultiple( columns, Cuda::getWarpSize() );
    else this->alignedRows = rows;
    if( this->rowLengths != 0 )
    allocateElements();
@@ -937,7 +917,7 @@ void EllpackSymmetricGraphVectorProductCuda( const EllpackSymmetricGraph< Real,
                                              const int gridIdx,
                                              const int color )
 {
-   int globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   int globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    matrix->spmvCuda( *inVector, *outVector, globalIdx, color );
 }
 #endif
@@ -986,19 +966,19 @@ class EllpackSymmetricGraphDeviceDependentCode< Devices::Cuda >
 #ifdef HAVE_CUDA
           typedef EllpackSymmetricGraph< Real, Devices::Cuda, Index > Matrix;
           typedef typename Matrix::IndexType IndexType;
-          Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-          InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-          OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
-          dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+          Matrix* kernel_this = Cuda::passToDevice( matrix );
+          InVector* kernel_inVector = Cuda::passToDevice( inVector );
+          OutVector* kernel_outVector = Cuda::passToDevice( outVector );
+          dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
           for( IndexType color = 0; color < matrix.getNumberOfColors(); color++ )
           {
               IndexType rows = matrix.getRowsOfColor( color );
               const IndexType cudaBlocks = roundUpDivision( rows, cudaBlockSize.x );
-              const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+              const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
               for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
               {
                   if( gridIdx == cudaGrids - 1 )
-                      cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+                      cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
                   EllpackSymmetricGraphVectorProductCuda< Real, Index, InVector, OutVector >
                                                       <<< cudaGridSize, cudaBlockSize >>>
                                                         ( kernel_this,
@@ -1009,9 +989,9 @@ class EllpackSymmetricGraphDeviceDependentCode< Devices::Cuda >
               }
           }
 
-          Devices::Cuda::freeFromDevice( kernel_this );
-          Devices::Cuda::freeFromDevice( kernel_inVector );
-          Devices::Cuda::freeFromDevice( kernel_outVector );
+          Cuda::freeFromDevice( kernel_this );
+          Cuda::freeFromDevice( kernel_inVector );
+          Cuda::freeFromDevice( kernel_outVector );
           TNL_CHECK_CUDA_DEVICE;
 #endif
       }
diff --git a/src/TNL/Matrices/EllpackSymmetric_impl.h b/src/TNL/Matrices/EllpackSymmetric_impl.h
index 7207afc568fffb86687edd45cb183170a57a1775..90369f77af0f0085b140934c27fe3fe5a2d8f015 100644
--- a/src/TNL/Matrices/EllpackSymmetric_impl.h
+++ b/src/TNL/Matrices/EllpackSymmetric_impl.h
@@ -26,26 +26,6 @@ EllpackSymmetric< Real, Device, Index > :: EllpackSymmetric()
 {
 };
 
-template< typename Real,
-          typename Device,
-          typename Index >
-String EllpackSymmetric< Real, Device, Index > :: getType()
-{
-   return String( "EllpackSymmetric< ") +
-          String( TNL::getType< Real >() ) +
-          String( ", " ) +
-          Device::getDeviceType() +
-          String( " >" );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String EllpackSymmetric< Real, Device, Index >::getTypeVirtual() const
-{
-   return this->getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index >
@@ -58,7 +38,7 @@ void EllpackSymmetric< Real, Device, Index >::setDimensions( const IndexType row
    this->rows = rows;
    this->columns = columns;   
    if( std::is_same< DeviceType, Devices::Cuda >::value )
-      this->alignedRows = roundToMultiple( columns, Devices::Cuda::getWarpSize() );
+      this->alignedRows = roundToMultiple( columns, Cuda::getWarpSize() );
    else this->alignedRows = rows;
    if( this->rowLengths != 0 )
       allocateElements();
@@ -728,7 +708,7 @@ void EllpackSymmetricVectorProductCuda( const EllpackSymmetric< Real, Devices::C
                                            OutVector* outVector,
                                            const int gridIdx )
 {
-    int globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+    int globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
     if( globalIdx >= matrix->getRows() )
         return;
     matrix->spmvCuda( *inVector, *outVector, globalIdx );
@@ -780,16 +760,16 @@ class EllpackSymmetricDeviceDependentCode< Devices::Cuda >
 #ifdef HAVE_CUDA
           typedef EllpackSymmetric< Real, Devices::Cuda, Index > Matrix;
           typedef typename Matrix::IndexType IndexType;
-          Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-          InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-          OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
-          dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+          Matrix* kernel_this = Cuda::passToDevice( matrix );
+          InVector* kernel_inVector = Cuda::passToDevice( inVector );
+          OutVector* kernel_outVector = Cuda::passToDevice( outVector );
+          dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
           const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-          const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+          const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
           for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
           {
               if( gridIdx == cudaGrids - 1 )
-                  cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+                  cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
               const int sharedMemory = cudaBlockSize.x * sizeof( Real );
               EllpackSymmetricVectorProductCuda< Real, Index, InVector, OutVector >
                                                 <<< cudaGridSize, cudaBlockSize, sharedMemory >>>
@@ -798,9 +778,9 @@ class EllpackSymmetricDeviceDependentCode< Devices::Cuda >
                                                     kernel_outVector,
                                                     gridIdx );
           }
-          Devices::Cuda::freeFromDevice( kernel_this );
-          Devices::Cuda::freeFromDevice( kernel_inVector );
-          Devices::Cuda::freeFromDevice( kernel_outVector );
+          Cuda::freeFromDevice( kernel_this );
+          Cuda::freeFromDevice( kernel_inVector );
+          Cuda::freeFromDevice( kernel_outVector );
           TNL_CHECK_CUDA_DEVICE;
 #endif
       }
diff --git a/src/TNL/Matrices/Ellpack_impl.h b/src/TNL/Matrices/Ellpack_impl.h
index b4e453793fd2bc7d3c2e2f45182d54faf667c4fa..5ac812cf2101e7f13bafbfd871ac168429be49cd 100644
--- a/src/TNL/Matrices/Ellpack_impl.h
+++ b/src/TNL/Matrices/Ellpack_impl.h
@@ -29,33 +29,17 @@ Ellpack< Real, Device, Index > :: Ellpack()
 template< typename Real,
           typename Device,
           typename Index >
-String Ellpack< Real, Device, Index > :: getType()
+String Ellpack< Real, Device, Index >::getSerializationType()
 {
    return String( "Matrices::Ellpack< ") +
-          String( TNL::getType< Real >() ) +
+          getType< Real >() +
           String( ", " ) +
-          Device :: getDeviceType() +
+          getType< Device >() +
           String( ", " ) +
-          String( TNL::getType< Index >() ) +
+          getType< Index >() +
           String( " >" );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index >
-String Ellpack< Real, Device, Index >::getTypeVirtual() const
-{
-   return this->getType();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Ellpack< Real, Device, Index >::getSerializationType()
-{
-   return getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index >
@@ -76,7 +60,7 @@ void Ellpack< Real, Device, Index >::setDimensions( const IndexType rows,
    this->rows = rows;
    this->columns = columns;
    if( std::is_same< Device, Devices::Cuda >::value )
-      this->alignedRows = roundToMultiple( rows, Devices::Cuda::getWarpSize() );
+      this->alignedRows = roundToMultiple( rows, Cuda::getWarpSize() );
    else this->alignedRows = rows;
    if( this->rowLengths != 0 )
       allocateElements();
@@ -130,7 +114,7 @@ template< typename Real,
 Index Ellpack< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const
 {
     ConstMatrixRow matrixRow = getRow( row );
-    return matrixRow.getNonZeroElementsCount( Device::getDeviceType() );
+    return matrixRow.getNonZeroElementsCount( getType< Device >() );
 }
 
 template< typename Real,
@@ -144,7 +128,7 @@ void Ellpack< Real, Device, Index >::setLike( const Ellpack< Real2, Device2, Ind
    Sparse< Real, Device, Index >::setLike( matrix );
    this->rowLengths = matrix.rowLengths;
    if( std::is_same< Device, Devices::Cuda >::value )
-      this->alignedRows = roundToMultiple( this->getRows(), Devices::Cuda::getWarpSize() );
+      this->alignedRows = roundToMultiple( this->getRows(), Cuda::getWarpSize() );
    else this->alignedRows = this->getRows();
 }
 
@@ -664,8 +648,8 @@ Ellpack< Real, Device, Index >::operator=( const Ellpack< Real2, Device2, Index2
 
    // host -> cuda
    if( std::is_same< Device, Devices::Cuda >::value ) {
-      typename ValuesVector::HostType tmpValues;
-      typename ColumnIndexesVector::HostType tmpColumnIndexes;
+      typename ValuesVector::template Self< typename ValuesVector::ValueType, Devices::Sequential > tmpValues;
+      typename ColumnIndexesVector::template Self< typename ColumnIndexesVector::ValueType, Devices::Sequential > tmpColumnIndexes;
       tmpValues.setLike( this->values );
       tmpColumnIndexes.setLike( this->columnIndexes );
 
@@ -852,7 +836,7 @@ __global__ void EllpackVectorProductCudaKernel(
    Real multiplicator,
    const Index gridIdx )
 {
-   const Index rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    if( rowIdx >= rows )
       return;
    Index i = rowIdx;
@@ -918,16 +902,16 @@ class EllpackDeviceDependentCode< Devices::Cuda >
          #ifdef HAVE_CUDA
             typedef Ellpack< Real, Device, Index > Matrix;
             typedef typename Matrix::IndexType IndexType;
-            //Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-            //InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-            //OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
-            dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+            //Matrix* kernel_this = Cuda::passToDevice( matrix );
+            //InVector* kernel_inVector = Cuda::passToDevice( inVector );
+            //OutVector* kernel_outVector = Cuda::passToDevice( outVector );
+            dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
             const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-            const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+            const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
             for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
             {
                if( gridIdx == cudaGrids - 1 )
-                  cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+                  cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
                EllpackVectorProductCudaKernel
                < Real, Index >
                 <<< cudaGridSize, cudaBlockSize >>>
@@ -944,9 +928,9 @@ class EllpackDeviceDependentCode< Devices::Cuda >
                   gridIdx );
                TNL_CHECK_CUDA_DEVICE;
             }
-            //Devices::Cuda::freeFromDevice( kernel_this );
-            //Devices::Cuda::freeFromDevice( kernel_inVector );
-            //Devices::Cuda::freeFromDevice( kernel_outVector );
+            //Cuda::freeFromDevice( kernel_this );
+            //Cuda::freeFromDevice( kernel_inVector );
+            //Cuda::freeFromDevice( kernel_outVector );
             TNL_CHECK_CUDA_DEVICE;
             cudaDeviceSynchronize();
          #endif
diff --git a/src/TNL/Matrices/MatrixOperations.h b/src/TNL/Matrices/MatrixOperations.h
index 07991a573662c7380d6fd2814b0a20db30e7dca8..354b0a9e19d89ddb3cb457ce578752c8f1f513b0 100644
--- a/src/TNL/Matrices/MatrixOperations.h
+++ b/src/TNL/Matrices/MatrixOperations.h
@@ -21,6 +21,8 @@
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Math.h>
+#include <TNL/Cuda/DeviceInfo.h>
+#include <TNL/Cuda/SharedMemory.h>
 
 namespace TNL {
 namespace Matrices {
@@ -248,7 +250,7 @@ GemvCudaKernel( const IndexType m,
    IndexType elementIdx = blockIdx.x * blockDim.x + threadIdx.x;
    const IndexType gridSize = blockDim.x * gridDim.x;
 
-   RealType* shx = Devices::Cuda::getSharedMemory< RealType >();
+   RealType* shx = Cuda::getSharedMemory< RealType >();
 
    if( threadIdx.x < n )
       shx[ threadIdx.x ] = alpha * x[ threadIdx.x ];
@@ -341,13 +343,13 @@ public:
       // TODO: use static storage, e.g. from the CudaReductionBuffer, to avoid frequent reallocations
       Containers::Vector< RealType, Devices::Cuda, IndexType > xDevice;
       xDevice.setSize( n );
-      Containers::Algorithms::ArrayOperations< Devices::Cuda, Devices::Host >::copy< RealType, RealType, IndexType >( xDevice.getData(), x, n );
+      Algorithms::MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::copy< RealType, RealType, IndexType >( xDevice.getData(), x, n );
 
       // desGridSize = blocksPerMultiprocessor * numberOfMultiprocessors
-      const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
+      const int desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() );
       dim3 blockSize, gridSize;
       blockSize.x = 256;
-      gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( m, blockSize.x ) );
+      gridSize.x = min( desGridSize, Cuda::getNumberOfBlocks( m, blockSize.x ) );
 
       GemvCudaKernel<<< gridSize, blockSize, n * sizeof( RealType ) >>>(
             m, n,
@@ -401,9 +403,9 @@ public:
          blockSize.x /= 2;
 
       // desGridSize = blocksPerMultiprocessor * numberOfMultiprocessors
-      const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
-      gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( m, blockSize.x ) );
-      gridSize.y = Devices::Cuda::getNumberOfBlocks( n, blockSize.y );
+      const int desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() );
+      gridSize.x = min( desGridSize, Cuda::getNumberOfBlocks( m, blockSize.x ) );
+      gridSize.y = Cuda::getNumberOfBlocks( n, blockSize.y );
 
       GeamCudaKernel<<< gridSize, blockSize >>>(
             m, n,
diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h
index eacf8911a149faed530f3fd6138051dfb2795071..418e6f5b3eda29a659c4487dfaf34d88c12fea1d 100644
--- a/src/TNL/Matrices/MatrixReader_impl.h
+++ b/src/TNL/Matrices/MatrixReader_impl.h
@@ -425,11 +425,11 @@ class MatrixReaderDeviceDependentCode< Devices::Cuda >
                             bool verbose,
                             bool symReader )
    {
-      typedef typename Matrix::HostType HostMatrixType;
-      typedef typename HostMatrixType::CompressedRowLengthsVector CompressedRowLengthsVector;
+      using HostMatrixType = typename Matrix::template Self< typename Matrix::RealType, Devices::Sequential >;
+      using CompressedRowLengthsVector = typename HostMatrixType::CompressedRowLengthsVector;
 
       HostMatrixType hostMatrix;
-      typename Matrix::CompressedRowLengthsVector rowLengths;
+      CompressedRowLengthsVector rowLengths;
       return MatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose, symReader );
 
       matrix = hostMatrix;
diff --git a/src/TNL/Matrices/Matrix_impl.h b/src/TNL/Matrices/Matrix_impl.h
index 7472760c23ec6d8df8a4295457cd6773e4ac80df..33c4d2e654cb32f9ba56516a1678b73d17ee3b96 100644
--- a/src/TNL/Matrices/Matrix_impl.h
+++ b/src/TNL/Matrices/Matrix_impl.h
@@ -12,6 +12,9 @@
 
 #include <TNL/Matrices/Matrix.h>
 #include <TNL/Assert.h>
+#include <TNL/Cuda/LaunchHelpers.h>
+#include <TNL/Cuda/MemoryHelpers.h>
+#include <TNL/Cuda/SharedMemory.h>
 
 namespace TNL {
 namespace Matrices {
@@ -240,7 +243,7 @@ __global__ void MatrixVectorProductCudaKernel( const Matrix* matrix,
                                                   int gridIdx )
 {
    static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "" );
-   const typename Matrix::IndexType rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   const typename Matrix::IndexType rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    if( rowIdx < matrix->getRows() )
       ( *outVector )[ rowIdx ] = matrix->rowVectorProduct( rowIdx, *inVector );
 }
@@ -255,16 +258,16 @@ void MatrixVectorProductCuda( const Matrix& matrix,
 {
 #ifdef HAVE_CUDA
    typedef typename Matrix::IndexType IndexType;
-   Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-   InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-   OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
-   dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+   Matrix* kernel_this = Cuda::passToDevice( matrix );
+   InVector* kernel_inVector = Cuda::passToDevice( inVector );
+   OutVector* kernel_outVector = Cuda::passToDevice( outVector );
+   dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
    const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-   const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+   const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
    for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
    {
       if( gridIdx == cudaGrids - 1 )
-         cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+         cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
       MatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>>
                                      ( kernel_this,
                                        kernel_inVector,
@@ -272,9 +275,9 @@ void MatrixVectorProductCuda( const Matrix& matrix,
                                        gridIdx );
       TNL_CHECK_CUDA_DEVICE;
    }
-   Devices::Cuda::freeFromDevice( kernel_this );
-   Devices::Cuda::freeFromDevice( kernel_inVector );
-   Devices::Cuda::freeFromDevice( kernel_outVector );
+   Cuda::freeFromDevice( kernel_this );
+   Cuda::freeFromDevice( kernel_inVector );
+   Cuda::freeFromDevice( kernel_outVector );
    TNL_CHECK_CUDA_DEVICE;
 #endif
 }
diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h
index 0496a25a343f336690b7b0d00e699e209aabcd28..1ee6a25e9af4fbf8d8f28461e6658305e2b0151f 100644
--- a/src/TNL/Matrices/Multidiagonal.h
+++ b/src/TNL/Matrices/Multidiagonal.h
@@ -38,18 +38,16 @@ public:
    typedef Index IndexType;
    typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector;
    typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
-   typedef Multidiagonal< Real, Devices::Host, Index > HostType;
-   typedef Multidiagonal< Real, Devices::Cuda, Index > CudaType;
    typedef Matrix< Real, Device, Index > BaseType;
    typedef MultidiagonalRow< Real, Index > MatrixRow;
 
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = Multidiagonal< _Real, _Device, _Index >;
 
    Multidiagonal();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    static String getSerializationType();
 
    virtual String getSerializationTypeVirtual() const;
diff --git a/src/TNL/Matrices/Multidiagonal_impl.h b/src/TNL/Matrices/Multidiagonal_impl.h
index 065e7780dcde13ea02404de3a5a42447ca4d4ae4..ff1ac384a3a1a95a170f491de8a56dae09651b3c 100644
--- a/src/TNL/Matrices/Multidiagonal_impl.h
+++ b/src/TNL/Matrices/Multidiagonal_impl.h
@@ -31,31 +31,15 @@ Multidiagonal< Real, Device, Index > :: Multidiagonal()
 template< typename Real,
           typename Device,
           typename Index >
-String Multidiagonal< Real, Device, Index > :: getType()
+String Multidiagonal< Real, Device, Index >::getSerializationType()
 {
    return String( "Matrices::Multidiagonal< ") +
-          String( TNL::getType< Real >() ) +
+          getType< Real >() +
           String( ", " ) +
-          Device :: getDeviceType() +
+          getType< Device >() +
           String( " >" );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index >
-String Multidiagonal< Real, Device, Index >::getTypeVirtual() const
-{
-   return this->getType();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Multidiagonal< Real, Device, Index >::getSerializationType()
-{
-   return getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index >
diff --git a/src/TNL/Matrices/SlicedEllpack.h b/src/TNL/Matrices/SlicedEllpack.h
index 8503f6180a27b7e3f31c2555949611da70fc9b3a..5051fc21868b13a5644ebbdb190371bc50c77224 100644
--- a/src/TNL/Matrices/SlicedEllpack.h
+++ b/src/TNL/Matrices/SlicedEllpack.h
@@ -68,19 +68,18 @@ public:
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
-   typedef SlicedEllpack< Real, Devices::Host, Index, SliceSize > HostType;
-   typedef SlicedEllpack< Real, Devices::Cuda, Index, SliceSize > CudaType;
    typedef Sparse< Real, Device, Index > BaseType;
    typedef typename BaseType::MatrixRow MatrixRow;
    typedef SparseRow< const RealType, const IndexType > ConstMatrixRow;
 
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index,
+             int _SliceSize = SliceSize >
+   using Self = SlicedEllpack< _Real, _Device, _Index, _SliceSize >;
 
    SlicedEllpack();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    static String getSerializationType();
 
    virtual String getSerializationTypeVirtual() const;
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric.h b/src/TNL/Matrices/SlicedEllpackSymmetric.h
index 9e7694de47649259e597f13582c6b77fbc6a47ae..835eccf83d43292f75c2e918c7e7ccd3b5d37aaa 100644
--- a/src/TNL/Matrices/SlicedEllpackSymmetric.h
+++ b/src/TNL/Matrices/SlicedEllpackSymmetric.h
@@ -49,16 +49,15 @@ class SlicedEllpackSymmetric : public Sparse< Real, Device, Index >
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
-   typedef SlicedEllpackSymmetric< Real, Devices::Host, Index > HostType;
-   typedef SlicedEllpackSymmetric< Real, Devices::Cuda, Index > CudaType;
 
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index,
+             int _SliceSize = SliceSize >
+   using Self = SlicedEllpackSymmetric< _Real, _Device, _Index, _SliceSize >;
 
    SlicedEllpackSymmetric();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    void setDimensions( const IndexType rows,
                        const IndexType columns );
 
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h b/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h
index 12019b79d6f2acd5c95c53d2dda4abc7da655a61..5fed4082b885093cf7e1a67b1025e73179895471 100644
--- a/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h
+++ b/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h
@@ -49,16 +49,15 @@ class SlicedEllpackSymmetricGraph : public Sparse< Real, Device, Index >
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
-   typedef SlicedEllpackSymmetricGraph< Real, Devices::Host, Index > HostType;
-   typedef SlicedEllpackSymmetricGraph< Real, Devices::Cuda, Index > CudaType;
 
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index,
+             int _SliceSize = SliceSize >
+   using Self = SlicedEllpackSymmetricGraph< _Real, _Device, _Index, _SliceSize >;
 
    SlicedEllpackSymmetricGraph();
 
-   static String getType();
-
-   String getTypeVirtual() const;
-
    void setDimensions( const IndexType rows,
                        const IndexType columns );
 
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h b/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h
index 866211d53b6af573fe4196a8b4767508ead51858..bfe73f231092a0e4ea90c3011b823c6ab8c17d95 100644
--- a/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h
+++ b/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h
@@ -25,28 +25,6 @@ template< typename Real,
 SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >::SlicedEllpackSymmetricGraph()
 : rearranged( false )
 {
-};
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          int SliceSize >
-String SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >::getType()
-{
-   return String( "SlicedEllpackSymmetricGraph< ") +
-          String( TNL::getType< Real >() ) +
-          String( ", " ) +
-          Device::getDeviceType() +
-          String( " >" );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          int SliceSize >
-String SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >::getTypeVirtual() const
-{
-   return this->getType();
 }
 
 template< typename Real,
@@ -1117,7 +1095,7 @@ __global__ void SlicedEllpackSymmetricGraph_computeMaximalRowLengthInSlices_Cuda
                                                                                         typename SlicedEllpackSymmetricGraph< Real, Devices::Cuda, Index, SliceSize >::ConstCompressedRowLengthsVector rowLengths,
                                                                                         int gridIdx )
 {
-   const Index sliceIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
+   const Index sliceIdx = gridIdx * Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
    matrix->computeMaximalRowLengthInSlicesCuda( rowLengths, sliceIdx );
 }
 #endif
@@ -1174,7 +1152,7 @@ void SlicedEllpackSymmetricGraphVectorProductCuda( const SlicedEllpackSymmetricG
                                                    const int color,
                                                    const int sliceOffset )
 {
-    int globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x + sliceOffset;
+    int globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x + sliceOffset;
     matrix->smvCuda( *inVector, *outVector, globalIdx, color );
 }
 #endif
@@ -1235,21 +1213,21 @@ class SlicedEllpackSymmetricGraphDeviceDependentCode< Devices::Cuda >
 #ifdef HAVE_CUDA
          typedef SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize > Matrix;
          typedef typename Matrix::RowLengthsVector CompressedRowLengthsVector;
-         Matrix* kernel_matrix = Devices::Cuda::passToDevice( matrix );
+         Matrix* kernel_matrix = Cuda::passToDevice( matrix );
          const Index numberOfSlices = roundUpDivision( matrix.getRows(), SliceSize );
-         dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+         dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
          const Index cudaBlocks = roundUpDivision( numberOfSlices, cudaBlockSize.x );
-         const Index cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+         const Index cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
          for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
          {
             if( gridIdx == cudaGrids - 1 )
-               cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+               cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
             SlicedEllpackSymmetricGraph_computeMaximalRowLengthInSlices_CudaKernel< Real, Index, SliceSize ><<< cudaGridSize, cudaBlockSize >>>
                                                                              ( kernel_matrix,
                                                                                rowLengths,
                                                                                gridIdx );
          }
-         Devices::Cuda::freeFromDevice( kernel_matrix );
+         Cuda::freeFromDevice( kernel_matrix );
          TNL_CHECK_CUDA_DEVICE;
 #endif
       }
@@ -1267,10 +1245,10 @@ class SlicedEllpackSymmetricGraphDeviceDependentCode< Devices::Cuda >
 #ifdef HAVE_CUDA
          typedef SlicedEllpackSymmetricGraph< Real, Devices::Cuda, Index, SliceSize > Matrix;
          typedef typename Matrix::IndexType IndexType;
-         Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-         InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-         OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
-         dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+         Matrix* kernel_this = Cuda::passToDevice( matrix );
+         InVector* kernel_inVector = Cuda::passToDevice( inVector );
+         OutVector* kernel_outVector = Cuda::passToDevice( outVector );
+         dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
          for( IndexType color = 0; color < matrix.getNumberOfColors(); color++ )
          {
             IndexType offset = matrix.colorPointers.getElement( color ); //can be computed in kernel
@@ -1280,11 +1258,11 @@ class SlicedEllpackSymmetricGraphDeviceDependentCode< Devices::Cuda >
             //IndexType rows = matrix.colorPointers.getElement( color + 1 ) - matrix.colorPointers.getElement( color ) + inSliceIdx;
             // TODO: rows id undefined
             /*const IndexType cudaBlocks = roundUpDivision( rows, cudaBlockSize.x );
-            const IndexType cudaGrids = rondUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize );
+            const IndexType cudaGrids = rondUpDivision( cudaBlocks, Cuda::getMaxGridSize );
             for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
             {
                if( gridIdx == cudaGrids - 1 )
-                  cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+                  cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
                // TODO: this cannot be used here and i is undefined
                //IndexType offset = this->colorPointers[ i ];
                IndexType inSliceIdx = offset % SliceSize;
@@ -1299,9 +1277,9 @@ class SlicedEllpackSymmetricGraphDeviceDependentCode< Devices::Cuda >
                                                              sliceOffset );
             }*/
          }
-         Devices::Cuda::freeFromDevice( kernel_this );
-         Devices::Cuda::freeFromDevice( kernel_inVector );
-         Devices::Cuda::freeFromDevice( kernel_outVector );
+         Cuda::freeFromDevice( kernel_this );
+         Cuda::freeFromDevice( kernel_inVector );
+         Cuda::freeFromDevice( kernel_outVector );
          TNL_CHECK_CUDA_DEVICE;
 #endif
       }
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h
index c9dee062c9feb64a4164618b84474b8d89a6dcab..c403fd4c84f09a59883f14f2fc5c23e79c1c65cb 100644
--- a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h
+++ b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h
@@ -24,28 +24,6 @@ template< typename Real,
           int SliceSize >
 SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::SlicedEllpackSymmetric()
 {
-};
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          int SliceSize >
-String SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::getType()
-{
-   return String( "SlicedEllpackSymmetric< ") +
-          String( TNL::getType< Real >() ) +
-          String( ", " ) +
-          Device :: getDeviceType() +
-          String( " >" );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          int SliceSize >
-String SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::getTypeVirtual() const
-{
-   return this->getType();
 }
 
 template< typename Real,
@@ -80,7 +58,7 @@ void SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::setCompressedRowL
 
    this->maxRowLength = max( rowLengths );
 
-   this->slicePointers.template prefixSum< Containers::Algorithms::ScanType::Exclusive >();
+   this->slicePointers.template scan< Algorithms::ScanType::Exclusive >();
    this->allocateMatrixElements( this->slicePointers.getElement( slices ) );
 }
 
@@ -534,7 +512,7 @@ const SlicedEllpackSymmetric< Real, Devices::Cuda, Index, SliceSize >* matrix,
                                                        OutVector* outVector,
                                                        int gridIdx )
 {
-   int rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   int rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    matrix->spmvCuda( *inVector, *outVector, rowIdx );
 }
 #endif
@@ -806,7 +784,7 @@ __global__ void SlicedEllpackSymmetric_computeMaximalRowLengthInSlices_CudaKerne
                                                                                    typename SlicedEllpackSymmetric< Real, Devices::Cuda, Index, SliceSize >::ConstCompressedRowLengthsVectorView rowLengths,
                                                                                    int gridIdx )
 {
-   const Index sliceIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
+   const Index sliceIdx = gridIdx * Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
    matrix->computeMaximalRowLengthInSlicesCuda( rowLengths, sliceIdx );
 }
 #endif
@@ -865,21 +843,21 @@ class SlicedEllpackSymmetricDeviceDependentCode< Devices::Cuda >
 #ifdef HAVE_CUDA
          typedef SlicedEllpackSymmetric< Real, Device, Index, SliceSize > Matrix;
          typedef typename Matrix::RowLengthsVector CompressedRowLengthsVector;
-         Matrix* kernel_matrix = Devices::Cuda::passToDevice( matrix );
+         Matrix* kernel_matrix = Cuda::passToDevice( matrix );
          const Index numberOfSlices = roundUpDivision( matrix.getRows(), SliceSize );
-         dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+         dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
          const Index cudaBlocks = roundUpDivision( numberOfSlices, cudaBlockSize.x );
-         const Index cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+         const Index cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
          for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
          {
             if( gridIdx == cudaGrids - 1 )
-               cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+               cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
             SlicedEllpackSymmetric_computeMaximalRowLengthInSlices_CudaKernel< Real, Index, SliceSize ><<< cudaGridSize, cudaBlockSize >>>
                                                                              ( kernel_matrix,
                                                                                rowLengths,
                                                                                gridIdx );
          }
-         Devices::Cuda::freeFromDevice( kernel_matrix );
+         Cuda::freeFromDevice( kernel_matrix );
          TNL_CHECK_CUDA_DEVICE;
 #endif
       }
@@ -896,16 +874,16 @@ class SlicedEllpackSymmetricDeviceDependentCode< Devices::Cuda >
 #ifdef HAVE_CUDA
          typedef SlicedEllpackSymmetric< Real, Device, Index, SliceSize > Matrix;
          typedef typename Matrix::IndexType IndexType;
-         Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-         InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-         OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
-         dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+         Matrix* kernel_this = Cuda::passToDevice( matrix );
+         InVector* kernel_inVector = Cuda::passToDevice( inVector );
+         OutVector* kernel_outVector = Cuda::passToDevice( outVector );
+         dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
          const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-         const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+         const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
          for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
          {
             if( gridIdx == cudaGrids - 1 )
-               cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+               cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
             SlicedEllpackSymmetricVectorProductCudaKernel< Real, Index, SliceSize, InVector, OutVector >
                                                             <<< cudaGridSize, cudaBlockSize >>>
                                                               ( kernel_this,
@@ -913,9 +891,9 @@ class SlicedEllpackSymmetricDeviceDependentCode< Devices::Cuda >
                                                                 kernel_outVector,
                                                                 gridIdx );
          }
-         Devices::Cuda::freeFromDevice( kernel_this );
-         Devices::Cuda::freeFromDevice( kernel_inVector );
-         Devices::Cuda::freeFromDevice( kernel_outVector );
+         Cuda::freeFromDevice( kernel_this );
+         Cuda::freeFromDevice( kernel_inVector );
+         Cuda::freeFromDevice( kernel_outVector );
          TNL_CHECK_CUDA_DEVICE;
 #endif
       }
diff --git a/src/TNL/Matrices/SlicedEllpack_impl.h b/src/TNL/Matrices/SlicedEllpack_impl.h
index 016edf6996e19fb5fa3cfedf65364ebc22fb53fd..45e8cdee77fbda670d2e3b23a3844ad0bb53d071 100644
--- a/src/TNL/Matrices/SlicedEllpack_impl.h
+++ b/src/TNL/Matrices/SlicedEllpack_impl.h
@@ -24,39 +24,21 @@ template< typename Real,
           int SliceSize >
 SlicedEllpack< Real, Device, Index, SliceSize >::SlicedEllpack()
 {
-};
+}
 
 template< typename Real,
           typename Device,
           typename Index,
           int SliceSize >
-String SlicedEllpack< Real, Device, Index, SliceSize >::getType()
+String SlicedEllpack< Real, Device, Index, SliceSize >::getSerializationType()
 {
    return String( "Matrices::SlicedEllpack< ") +
-          String( TNL::getType< Real >() ) +
+          TNL::getType< Real >() +
           String( ", " ) +
-          Device :: getDeviceType() +
+          getType< Device >() +
           String( " >" );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          int SliceSize >
-String SlicedEllpack< Real, Device, Index, SliceSize >::getTypeVirtual() const
-{
-   return this->getType();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          int SliceSize >
-String SlicedEllpack< Real, Device, Index, SliceSize >::getSerializationType()
-{
-   return getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -97,7 +79,7 @@ void SlicedEllpack< Real, Device, Index, SliceSize >::setCompressedRowLengths( C
 
    this->maxRowLength = max( rowLengths );
 
-   this->slicePointers.template prefixSum< Containers::Algorithms::ScanType::Exclusive >();
+   this->slicePointers.template scan< Algorithms::ScanType::Exclusive >();
    this->allocateMatrixElements( this->slicePointers.getElement( slices ) );
 }
 
@@ -129,7 +111,7 @@ template< typename Real,
 Index SlicedEllpack< Real, Device, Index, SliceSize >::getNonZeroRowLength( const IndexType row ) const
 {
     ConstMatrixRow matrixRow = getRow( row );
-    return matrixRow.getNonZeroElementsCount( Device::getDeviceType() );
+    return matrixRow.getNonZeroElementsCount( getType< Device >() );
 }
 
 template< typename Real,
@@ -638,19 +620,14 @@ template< typename Real,
 SlicedEllpack< Real, Device, Index, SliceSize >&
 SlicedEllpack< Real, Device, Index, SliceSize >::operator=( const SlicedEllpack< Real2, Device2, Index2, SliceSize >& matrix )
 {
-   static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value || std::is_same< Device, Devices::MIC >::value,
-                  "unknown device" );
-   static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value || std::is_same< Device2, Devices::MIC >::value,
-                  "unknown device" );
-
    this->setLike( matrix );
    this->slicePointers = matrix.slicePointers;
    this->sliceCompressedRowLengths = matrix.sliceCompressedRowLengths;
 
    // host -> cuda
    if( std::is_same< Device, Devices::Cuda >::value ) {
-      typename ValuesVector::HostType tmpValues;
-      typename ColumnIndexesVector::HostType tmpColumnIndexes;
+      typename ValuesVector::template Self< typename ValuesVector::ValueType, Devices::Sequential > tmpValues;
+      typename ColumnIndexesVector::template Self< typename ColumnIndexesVector::ValueType, Devices::Sequential > tmpColumnIndexes;
       tmpValues.setLike( matrix.values );
       tmpColumnIndexes.setLike( matrix.columnIndexes );
 
@@ -672,7 +649,7 @@ SlicedEllpack< Real, Device, Index, SliceSize >::operator=( const SlicedEllpack<
    }
 
    // cuda -> host
-   if( std::is_same< Device, Devices::Host >::value ) {
+   else {
       ValuesVector tmpValues;
       ColumnIndexesVector tmpColumnIndexes;
       tmpValues.setLike( matrix.values );
@@ -693,10 +670,6 @@ SlicedEllpack< Real, Device, Index, SliceSize >::operator=( const SlicedEllpack<
             }
       }
    }
-   
-   if( std::is_same< Device, Devices::MIC >::value ) {
-      throw Exceptions::NotImplementedError("Cross-device assignment for the SlicedEllpack format is not implemented for MIC.");
-   }
 
    return *this;
 }
@@ -746,7 +719,7 @@ template< typename Real,
           int SliceSize >
 void SlicedEllpack< Real, Device, Index, SliceSize >::print( std::ostream& str ) const
 {
-   if( std::is_same< Device, Devices::Host >::value ) {
+   if( ! std::is_same< Device, Devices::Cuda >::value ) {
       for( IndexType row = 0; row < this->getRows(); row++ )
       {
          str <<"Row: " << row << " -> ";
@@ -767,7 +740,7 @@ void SlicedEllpack< Real, Device, Index, SliceSize >::print( std::ostream& str )
       }
    }
    else {
-      HostType hostMatrix;
+      Self< Real, Devices::Sequential > hostMatrix;
       hostMatrix = *this;
       hostMatrix.print( str );
    }
@@ -800,12 +773,13 @@ __device__ void SlicedEllpack< Real, Device, Index, SliceSize >::computeMaximalR
 }
 #endif
 
-template<>
-class SlicedEllpackDeviceDependentCode< Devices::Host >
+// implementation for host types
+template< typename Device_ >
+class SlicedEllpackDeviceDependentCode
 {
    public:
 
-      typedef Devices::Host Device;
+      typedef Device_ Device;
 
       template< typename Real,
                 typename Index,
@@ -898,7 +872,7 @@ __global__ void SlicedEllpack_computeMaximalRowLengthInSlices_CudaKernel( Sliced
                                                                           typename SlicedEllpack< Real, Devices::Cuda, Index, SliceSize >::ConstCompressedRowLengthsVectorView rowLengths,
                                                                           int gridIdx )
 {
-   const Index sliceIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
+   const Index sliceIdx = gridIdx * Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
    matrix->computeMaximalRowLengthInSlicesCuda( rowLengths, sliceIdx );
 }
 #endif
@@ -921,7 +895,7 @@ __global__ void SlicedEllpackVectorProductCudaKernel(
    Real multiplicator,
    const Index gridIdx )
 {
-   const Index rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    if( rowIdx >= rows )
       return;
    const Index sliceIdx = rowIdx / SliceSize;
@@ -997,21 +971,21 @@ class SlicedEllpackDeviceDependentCode< Devices::Cuda >
 #ifdef HAVE_CUDA
          typedef SlicedEllpack< Real, Device, Index, SliceSize > Matrix;
          typedef typename Matrix::CompressedRowLengthsVector CompressedRowLengthsVector;
-         Matrix* kernel_matrix = Devices::Cuda::passToDevice( matrix );
+         Matrix* kernel_matrix = Cuda::passToDevice( matrix );
          const Index numberOfSlices = roundUpDivision( matrix.getRows(), SliceSize );
-         dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+         dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
          const Index cudaBlocks = roundUpDivision( numberOfSlices, cudaBlockSize.x );
-         const Index cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+         const Index cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
          for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
          {
             if( gridIdx == cudaGrids - 1 )
-               cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+               cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
             SlicedEllpack_computeMaximalRowLengthInSlices_CudaKernel< Real, Index, SliceSize ><<< cudaGridSize, cudaBlockSize >>>
                                                                              ( kernel_matrix,
                                                                                rowLengths,
                                                                                gridIdx );
          }
-         Devices::Cuda::freeFromDevice( kernel_matrix );
+         Cuda::freeFromDevice( kernel_matrix );
          TNL_CHECK_CUDA_DEVICE;
 #endif
          return true;
@@ -1031,16 +1005,16 @@ class SlicedEllpackDeviceDependentCode< Devices::Cuda >
          #ifdef HAVE_CUDA
             typedef SlicedEllpack< Real, Device, Index, SliceSize > Matrix;
             typedef typename Matrix::IndexType IndexType;
-            //Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
-            //InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
-            //OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
-            dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+            //Matrix* kernel_this = Cuda::passToDevice( matrix );
+            //InVector* kernel_inVector = Cuda::passToDevice( inVector );
+            //OutVector* kernel_outVector = Cuda::passToDevice( outVector );
+            dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
             const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-            const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+            const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
             for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
             {
                if( gridIdx == cudaGrids - 1 )
-                  cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+                  cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
                SlicedEllpackVectorProductCudaKernel
                < Real, Index, SliceSize >
                 <<< cudaGridSize, cudaBlockSize >>>
@@ -1057,68 +1031,13 @@ class SlicedEllpackDeviceDependentCode< Devices::Cuda >
                   gridIdx );
                TNL_CHECK_CUDA_DEVICE;
             }
-            //Devices::Cuda::freeFromDevice( kernel_this );
-            //Devices::Cuda::freeFromDevice( kernel_inVector );
-            //Devices::Cuda::freeFromDevice( kernel_outVector );
+            //Cuda::freeFromDevice( kernel_this );
+            //Cuda::freeFromDevice( kernel_inVector );
+            //Cuda::freeFromDevice( kernel_outVector );
             TNL_CHECK_CUDA_DEVICE;
             cudaDeviceSynchronize();
          #endif
       }
-
-};
-
-template<>
-class SlicedEllpackDeviceDependentCode< Devices::MIC >
-{
-   public:
-
-      typedef Devices::MIC Device;
-
-      template< typename Real,
-                typename Index,
-                int SliceSize >
-      static void initRowTraverse( const SlicedEllpack< Real, Device, Index, SliceSize >& matrix,
-                                   const Index row,
-                                   Index& rowBegin,
-                                   Index& rowEnd,
-                                   Index& step )
-      {
-         throw Exceptions::NotImplementedError("Not Implemented yet SlicedEllpackDeviceDependentCode< Devices::MIC >::initRowTraverse");
-      }
-
-      template< typename Real,
-                typename Index,
-                int SliceSize >
-      __cuda_callable__
-      static void initRowTraverseFast( const SlicedEllpack< Real, Device, Index, SliceSize >& matrix,
-                                       const Index row,
-                                       Index& rowBegin,
-                                       Index& rowEnd,
-                                       Index& step )
-      {
-         throw Exceptions::NotImplementedError("Not Implemented yet SlicedEllpackDeviceDependentCode< Devices::MIC >::initRowTraverseFast");
-      }
-
-      template< typename Real,
-                typename Index,
-                int SliceSize >
-      static bool computeMaximalRowLengthInSlices( SlicedEllpack< Real, Device, Index, SliceSize >& matrix,
-                                                   typename SlicedEllpack< Real, Device, Index >::ConstCompressedRowLengthsVectorView rowLengths )
-      {
-         throw Exceptions::NotImplementedError("Not Implemented yet SlicedEllpackDeviceDependentCode< Devices::MIC >::computeMaximalRowLengthInSlices");
-      }
-
-      template< typename Real,
-                typename Index,
-                typename InVector,
-                typename OutVector,
-                int SliceSize >
-      static void vectorProduct( const SlicedEllpack< Real, Device, Index, SliceSize >& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-      {
-         throw Exceptions::NotImplementedError("Not Implemented yet SlicedEllpackDeviceDependentCode< Devices::MIC >::vectorProduct");
-      }
 };
 
 } // namespace Matrices
diff --git a/src/TNL/Matrices/SparseOperations_impl.h b/src/TNL/Matrices/SparseOperations_impl.h
index ccc8930f9bb99825d89de1f7df6de5ce31fa427d..ff507c3268ce059108bd217e207c9c6487cb30c5 100644
--- a/src/TNL/Matrices/SparseOperations_impl.h
+++ b/src/TNL/Matrices/SparseOperations_impl.h
@@ -17,7 +17,7 @@
 #include <algorithm>
 
 #include <TNL/Pointers/DevicePointer.h>
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 namespace TNL {
 namespace Matrices {
@@ -130,8 +130,8 @@ copySparseMatrix_impl( Matrix1& A, const Matrix2& B )
 #ifdef HAVE_CUDA
       dim3 blockSize( 256 );
       dim3 gridSize;
-      const IndexType desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
-      gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( rows, blockSize.x ) );
+      const IndexType desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() );
+      gridSize.x = min( desGridSize, Cuda::getNumberOfBlocks( rows, blockSize.x ) );
 
       typename Matrix1::CompressedRowLengthsVector rowLengths;
       rowLengths.setSize( rows );
@@ -140,7 +140,7 @@ copySparseMatrix_impl( Matrix1& A, const Matrix2& B )
       const Pointers::DevicePointer< const Matrix2 > Bpointer( B );
 
       // set row lengths
-      Devices::Cuda::synchronizeDevice();
+      Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
       SparseMatrixSetRowLengthsVectorKernel<<< gridSize, blockSize >>>(
             rowLengths.getData(),
             &Bpointer.template getData< TNL::Devices::Cuda >(),
@@ -150,7 +150,7 @@ copySparseMatrix_impl( Matrix1& A, const Matrix2& B )
       Apointer->setCompressedRowLengths( rowLengths );
 
       // copy rows
-      Devices::Cuda::synchronizeDevice();
+      Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
       SparseMatrixCopyKernel<<< gridSize, blockSize >>>(
             &Apointer.template modifyData< TNL::Devices::Cuda >(),
             &Bpointer.template getData< TNL::Devices::Cuda >(),
@@ -170,7 +170,8 @@ typename std::enable_if< ! std::is_same< typename Matrix1::DeviceType, typename
                            std::is_same< typename Matrix2::DeviceType, Devices::Host >::value >::type
 copySparseMatrix_impl( Matrix1& A, const Matrix2& B )
 {
-   typename Matrix2::CudaType B_tmp;
+   using CudaMatrix2 = typename Matrix2::template Self< typename Matrix2::RealType, Devices::Cuda >;
+   CudaMatrix2 B_tmp;
    B_tmp = B;
    copySparseMatrix_impl( A, B_tmp );
 }
@@ -182,7 +183,8 @@ typename std::enable_if< ! std::is_same< typename Matrix1::DeviceType, typename
                            std::is_same< typename Matrix2::DeviceType, Devices::Cuda >::value >::type
 copySparseMatrix_impl( Matrix1& A, const Matrix2& B )
 {
-   typename Matrix1::CudaType A_tmp;
+   using CudaMatrix1 = typename Matrix1::template Self< typename Matrix1::RealType, Devices::Cuda >;
+   CudaMatrix1 A_tmp;
    copySparseMatrix_impl( A_tmp, B );
    A = A_tmp;
 }
@@ -353,11 +355,11 @@ reorderArray( const Array1& src, Array2& dest, const PermutationArray& perm )
       dest[ i ] = src[ perm[ i ] ];
    };
 
-   ParallelFor< DeviceType >::exec( (IndexType) 0, src.getSize(),
-                                    kernel,
-                                    src.getData(),
-                                    dest.getData(),
-                                    perm.getData() );
+   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, src.getSize(),
+                                                kernel,
+                                                src.getData(),
+                                                dest.getData(),
+                                                perm.getData() );
 }
 
 } // namespace Matrices
diff --git a/src/TNL/Matrices/SparseRow.h b/src/TNL/Matrices/SparseRow.h
index c7ebd07039061fdb775a583ac065e0a71c5f4869..f66cd2ceaf1c6f0cd882bb962a78c6649816aa75 100644
--- a/src/TNL/Matrices/SparseRow.h
+++ b/src/TNL/Matrices/SparseRow.h
@@ -14,7 +14,7 @@
 #include <type_traits>
 #include <ostream>
 
-#include <TNL/Devices/CudaCallable.h>
+#include <TNL/Cuda/CudaCallable.h>
 
 namespace TNL {
 namespace Matrices {
diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h
index 7f58bd9c492fa59c454b2226f301c83b89e74dbf..3f57fe1c3e6de1cf0e608cd68b5846eb711e321d 100644
--- a/src/TNL/Matrices/Tridiagonal.h
+++ b/src/TNL/Matrices/Tridiagonal.h
@@ -40,16 +40,15 @@ public:
    typedef Index IndexType;
    typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector;
    typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
-   typedef Tridiagonal< Real, Devices::Host, Index > HostType;
-   typedef Tridiagonal< Real, Devices::Cuda, Index > CudaType;
    typedef Matrix< Real, Device, Index > BaseType;
    typedef TridiagonalRow< Real, Index > MatrixRow;
 
-   Tridiagonal();
-
-   static String getType();
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = Tridiagonal< _Real, _Device, _Index >;
 
-   String getTypeVirtual() const;
+   Tridiagonal();
 
    static String getSerializationType();
 
diff --git a/src/TNL/Matrices/Tridiagonal_impl.h b/src/TNL/Matrices/Tridiagonal_impl.h
index 9a2d5e4a87c4b505f186bb59a95280eecd5f1bf2..62575f1776144e374b560a65e213248a1177de80 100644
--- a/src/TNL/Matrices/Tridiagonal_impl.h
+++ b/src/TNL/Matrices/Tridiagonal_impl.h
@@ -27,31 +27,15 @@ Tridiagonal< Real, Device, Index >::Tridiagonal()
 {
 }
 
-template< typename Real,
-          typename Device,
-          typename Index >
-String Tridiagonal< Real, Device, Index >::getType()
-{
-   return String( "Matrices::Tridiagonal< " ) +
-          String( TNL::getType< RealType >() ) + ", " +
-          String( Device :: getDeviceType() ) + ", " +
-          String( TNL::getType< IndexType >() ) + " >";
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Tridiagonal< Real, Device, Index >::getTypeVirtual() const
-{
-   return this->getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index >
 String Tridiagonal< Real, Device, Index >::getSerializationType()
 {
-   return getType();
+   return String( "Matrices::Tridiagonal< " ) +
+          getType< RealType >() + ", " +
+          getType< Device >() + ", " +
+          getType< IndexType >() + " >";
 }
 
 template< typename Real,
@@ -468,7 +452,7 @@ __global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, De
                                                              const Real matrixMultiplicator,
                                                              const Index gridIdx )
 {
-   const Index rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    if( rowIdx < inMatrix->getRows() )
    {
       if( rowIdx > 0 )
@@ -510,24 +494,24 @@ void Tridiagonal< Real, Device, Index >::getTransposition( const Tridiagonal< Re
    if( std::is_same< Device, Devices::Cuda >::value )
    {
 #ifdef HAVE_CUDA
-      Tridiagonal* kernel_this = Devices::Cuda::passToDevice( *this );
+      Tridiagonal* kernel_this = Cuda::passToDevice( *this );
       typedef  Tridiagonal< Real2, Device, Index2 > InMatrixType;
-      InMatrixType* kernel_inMatrix = Devices::Cuda::passToDevice( matrix );
-      dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+      InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix );
+      dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
       const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-      const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+      const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
       for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
       {
          if( gridIdx == cudaGrids - 1 )
-            cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+            cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
          TridiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>>
                                                     ( kernel_inMatrix,
                                                       kernel_this,
                                                       matrixMultiplicator,
                                                       gridIdx );
       }
-      Devices::Cuda::freeFromDevice( kernel_this );
-      Devices::Cuda::freeFromDevice( kernel_inMatrix );
+      Cuda::freeFromDevice( kernel_this );
+      Cuda::freeFromDevice( kernel_inMatrix );
       TNL_CHECK_CUDA_DEVICE;
 #endif
    }
diff --git a/src/TNL/Meshes/DefaultConfig.h b/src/TNL/Meshes/DefaultConfig.h
index 5e8a7cbef85fc2a03681553f7348af5c95f45dbb..36635647477ff7b3d70a3dd0d92b836cea6da12b 100644
--- a/src/TNL/Meshes/DefaultConfig.h
+++ b/src/TNL/Meshes/DefaultConfig.h
@@ -17,7 +17,7 @@
 #pragma once
 
 #include <TNL/String.h>
-#include <TNL/param-types.h>
+#include <TNL/TypeInfo.h>
 #include <TNL/Meshes/Topologies/SubentityVertexMap.h>
 
 namespace TNL {
@@ -46,17 +46,6 @@ struct DefaultConfig
    static constexpr int worldDimension = WorldDimension;
    static constexpr int meshDimension = Cell::dimension;
 
-   static String getType()
-   {
-      return String( "Meshes::DefaultConfig< " ) +
-             Cell::getType() + ", " +
-             convertToString( WorldDimension ) + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< GlobalIndex >() + ", " +
-             TNL::getType< LocalIndex >() + ", " +
-             TNL::getType< Id >() + " >";
-   };
-
    /****
     * Storage of mesh entities.
     */
diff --git a/src/TNL/Meshes/DistributedMeshes/BufferEntitiesHelper.h b/src/TNL/Meshes/DistributedMeshes/BufferEntitiesHelper.h
index d0461ddedbe1b0aa1f3ab6d3cfa8a5e57c3908a4..6030b976f038ab290ada814575db1bfb444ce694 100644
--- a/src/TNL/Meshes/DistributedMeshes/BufferEntitiesHelper.h
+++ b/src/TNL/Meshes/DistributedMeshes/BufferEntitiesHelper.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Containers/StaticVector.h>
 #include <TNL/Communicators/MPIPrint.h>
 
@@ -67,7 +67,7 @@ class BufferEntitiesHelper< MeshFunctionType, MaskPointer, 1, RealType, Device,
                   meshFunctionData[ entity.getIndex() ] = buffer[ j ];
             }
          };
-         ParallelFor< Device >::exec( 0, sizex, kernel );
+         Algorithms::ParallelFor< Device >::exec( 0, sizex, kernel );
       };
 };
 
@@ -115,7 +115,7 @@ class BufferEntitiesHelper< MeshFunctionType, MaskPointer, 2, RealType, Device,
                   meshFunctionData[ entity.getIndex() ] = buffer[ j * sizex + i ];
             }
          };
-         ParallelFor2D< Device >::exec( 0, 0, sizex, sizey, kernel );
+         Algorithms::ParallelFor2D< Device >::exec( 0, 0, sizex, sizey, kernel );
       };
 };
 
@@ -164,7 +164,7 @@ class BufferEntitiesHelper< MeshFunctionType, MaskPointer, 3, RealType, Device,
                   meshFunctionData[ entity.getIndex() ] = buffer[ k * sizex * sizey + j * sizex + i ];
             }
          };
-         ParallelFor3D< Device >::exec( 0, 0, 0, sizex, sizey, sizez, kernel );
+         Algorithms::ParallelFor3D< Device >::exec( 0, 0, 0, sizex, sizey, sizez, kernel );
       };
 };
 
diff --git a/src/TNL/Meshes/DistributedMeshes/CopyEntitiesHelper.h b/src/TNL/Meshes/DistributedMeshes/CopyEntitiesHelper.h
index 20efa02599ef6808ad1516f8409bab08648aca47..ec30a4f470f96d855edccbe3d67e4a6542bc657a 100644
--- a/src/TNL/Meshes/DistributedMeshes/CopyEntitiesHelper.h
+++ b/src/TNL/Meshes/DistributedMeshes/CopyEntitiesHelper.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 namespace TNL {
 namespace Meshes {
@@ -45,7 +45,7 @@ class CopyEntitiesHelper<MeshFunctionType, 1>
             fromEntity.refresh();
             toData[toEntity.getIndex()]=fromData[fromEntity.getIndex()];
         };
-        ParallelFor< typename MeshFunctionType::MeshType::DeviceType >::exec( (Index)0, (Index)size.x(), kernel );
+        Algorithms::ParallelFor< typename MeshFunctionType::MeshType::DeviceType >::exec( (Index)0, (Index)size.x(), kernel );
 
     }
 
@@ -79,7 +79,7 @@ class CopyEntitiesHelper<MeshFunctionType,2>
             fromEntity.refresh();
             toData[toEntity.getIndex()]=fromData[fromEntity.getIndex()];
         };
-        ParallelFor2D< typename MeshFunctionType::MeshType::DeviceType >::exec( (Index)0,(Index)0,(Index)size.x(), (Index)size.y(), kernel );
+        Algorithms::ParallelFor2D< typename MeshFunctionType::MeshType::DeviceType >::exec( (Index)0,(Index)0,(Index)size.x(), (Index)size.y(), kernel );
     }
 
 };
@@ -113,7 +113,7 @@ class CopyEntitiesHelper<MeshFunctionType,3>
             fromEntity.refresh();
             toData[toEntity.getIndex()]=fromData[fromEntity.getIndex()];
         };
-        ParallelFor3D< typename MeshFunctionType::MeshType::DeviceType >::exec( (Index)0,(Index)0,(Index)0,(Index)size.x(),(Index)size.y(), (Index)size.z(), kernel );
+        Algorithms::ParallelFor3D< typename MeshFunctionType::MeshType::DeviceType >::exec( (Index)0,(Index)0,(Index)0,(Index)size.x(),(Index)size.y(), (Index)size.z(), kernel );
     }
 };
 
diff --git a/src/TNL/Meshes/Geometry/getEntityCenter.h b/src/TNL/Meshes/Geometry/getEntityCenter.h
index 59cd950ca180cdbf8095382536544e9fe0ffde51..a37c27acf00523341382a7f44c5ef74adfd14d45 100644
--- a/src/TNL/Meshes/Geometry/getEntityCenter.h
+++ b/src/TNL/Meshes/Geometry/getEntityCenter.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Devices/CudaCallable.h>
+#include <TNL/Cuda/CudaCallable.h>
 #include <TNL/Meshes/GridEntity.h>
 #include <TNL/Meshes/Mesh.h>
 #include <TNL/Meshes/MeshEntity.h>
diff --git a/src/TNL/Meshes/Geometry/getEntityMeasure.h b/src/TNL/Meshes/Geometry/getEntityMeasure.h
index 7402e4f6db3226b534caf6753958fc10e4efdb1a..a3381ed96b1e72ddc2f0ccf25bbbdccd7e71739a 100644
--- a/src/TNL/Meshes/Geometry/getEntityMeasure.h
+++ b/src/TNL/Meshes/Geometry/getEntityMeasure.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Devices/CudaCallable.h>
+#include <TNL/Cuda/CudaCallable.h>
 #include <TNL/Meshes/GridEntity.h>
 #include <TNL/Meshes/Mesh.h>
 #include <TNL/Meshes/MeshEntity.h>
diff --git a/src/TNL/Meshes/GridDetails/Grid1D.h b/src/TNL/Meshes/GridDetails/Grid1D.h
index 53b748c4e6a7893d8fb645ffdcc271c80c8c5b58..81811fe9005c44bb1239f06fe423f4d566c7c77c 100644
--- a/src/TNL/Meshes/GridDetails/Grid1D.h
+++ b/src/TNL/Meshes/GridDetails/Grid1D.h
@@ -34,8 +34,6 @@ class Grid< 1, Real, Device, Index > : public Object
    typedef Index GlobalIndexType;
    typedef Containers::StaticVector< 1, Real > PointType;
    typedef Containers::StaticVector< 1, Index > CoordinatesType;
-   typedef Grid< 1, Real, Devices::Host, Index > HostType;
-   typedef Grid< 1, Real, Devices::Cuda, Index > CudaType;
 
    typedef DistributedMeshes::DistributedMesh <Grid> DistributedMeshType;
 
@@ -65,16 +63,6 @@ class Grid< 1, Real, Device, Index > : public Object
    // empty destructor is needed only to avoid crappy nvcc warnings
    ~Grid() {}
 
-   /**
-    * \brief Returns type of grid Real (value), Device type and the type of Index.
-    */
-   static String getType();
-
-   /**
-    * \brief Returns type of grid Real (value), Device type and the type of Index.
-    */
-   String getTypeVirtual() const;
-
    /**
     * \brief Returns (host) type of grid Real (value), Device type and the type of Index.
     */
diff --git a/src/TNL/Meshes/GridDetails/Grid1D_impl.h b/src/TNL/Meshes/GridDetails/Grid1D_impl.h
index a747544df981bec045d710cd18220f96832487d3..279ec9810184ea097212e45984183ad1ddc97e70 100644
--- a/src/TNL/Meshes/GridDetails/Grid1D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Grid1D_impl.h
@@ -44,32 +44,16 @@ Grid< 1, Real, Device, Index >::Grid( const Index xSize )
    this->setDimensions( xSize );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index  >
-String Grid< 1, Real, Device, Index >::getType()
-{
-   return String( "Meshes::Grid< " ) +
-          convertToString( getMeshDimension() ) + ", " +
-          String( TNL::getType< RealType >() ) + ", " +
-          String( Device::getDeviceType() ) + ", " +
-          String( TNL::getType< IndexType >() ) + " >";
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Grid< 1, Real, Device, Index >::getTypeVirtual() const
-{
-   return this->getType();
-}
-
 template< typename Real,
           typename Device,
           typename Index >
 String Grid< 1, Real, Device, Index >::getSerializationType()
 {
-   return HostType::getType();
+   return String( "Meshes::Grid< " ) +
+          convertToString( getMeshDimension() ) + ", " +
+          getType< RealType >() + ", " +
+          getType< Devices::Host >() + ", " +
+          getType< IndexType >() + " >";
 };
 
 template< typename Real,
diff --git a/src/TNL/Meshes/GridDetails/Grid2D.h b/src/TNL/Meshes/GridDetails/Grid2D.h
index 61f3c11c0e0684c892a02971bfc5a2d2df67979c..b24be9ba29503f4f6b80f2c3ad0ada1097d89844 100644
--- a/src/TNL/Meshes/GridDetails/Grid2D.h
+++ b/src/TNL/Meshes/GridDetails/Grid2D.h
@@ -34,8 +34,6 @@ class Grid< 2, Real, Device, Index > : public Object
    typedef Index GlobalIndexType;
    typedef Containers::StaticVector< 2, Real > PointType;
    typedef Containers::StaticVector< 2, Index > CoordinatesType;
-   typedef Grid< 2, Real, Devices::Host, Index > HostType;
-   typedef Grid< 2, Real, Devices::Cuda, Index > CudaType;
 
    typedef DistributedMeshes::DistributedMesh <Grid> DistributedMeshType;
  
@@ -57,21 +55,11 @@ class Grid< 2, Real, Device, Index > : public Object
     */
    Grid();
 
-   /**
-    * \brief See Grid1D::getType().
-    */
    Grid( const Index xSize, const Index ySize );
 
    // empty destructor is needed only to avoid crappy nvcc warnings
    ~Grid() {}
 
-   static String getType();
-
-   /**
-    * \brief See Grid1D::getTypeVirtual().
-    */
-   String getTypeVirtual() const;
-
    /**
     * \brief See Grid1D::getSerializationType().
     */
diff --git a/src/TNL/Meshes/GridDetails/Grid2D_impl.h b/src/TNL/Meshes/GridDetails/Grid2D_impl.h
index 7b673e0a819d85c21426c2e3a7a09f4dcceb4f48..259181688c0566abdd3025bac1cbda1429a60d10 100644
--- a/src/TNL/Meshes/GridDetails/Grid2D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Grid2D_impl.h
@@ -54,29 +54,13 @@ Grid< 2, Real, Device, Index >::Grid( const Index xSize, const Index ySize )
 template< typename Real,
           typename Device,
           typename Index >
-String Grid< 2, Real, Device, Index > :: getType()
+String Grid< 2, Real, Device, Index > :: getSerializationType()
 {
    return String( "Meshes::Grid< " ) +
           convertToString( getMeshDimension() ) + ", " +
-          String( TNL::getType< RealType >() ) + ", " +
-          String( Device :: getDeviceType() ) + ", " +
-          String( TNL::getType< IndexType >() ) + " >";
-}
-
-template< typename Real,
-           typename Device,
-           typename Index >
-String Grid< 2, Real, Device, Index > :: getTypeVirtual() const
-{
-   return this->getType();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Grid< 2, Real, Device, Index > :: getSerializationType()
-{
-   return HostType::getType();
+          getType< RealType >() + ", " +
+          getType< Devices::Host >() + ", " +
+          getType< IndexType >() + " >";
 };
 
 template< typename Real,
diff --git a/src/TNL/Meshes/GridDetails/Grid3D.h b/src/TNL/Meshes/GridDetails/Grid3D.h
index 67c752cb13700628abcac76de2f066473baf26a7..881fb0074bf642ca6fdcd1300df849c93d205792 100644
--- a/src/TNL/Meshes/GridDetails/Grid3D.h
+++ b/src/TNL/Meshes/GridDetails/Grid3D.h
@@ -33,8 +33,6 @@ class Grid< 3, Real, Device, Index > : public Object
    typedef Index GlobalIndexType;
    typedef Containers::StaticVector< 3, Real > PointType;
    typedef Containers::StaticVector< 3, Index > CoordinatesType;
-   typedef Grid< 3, Real, Devices::Host, Index > HostType;
-   typedef Grid< 3, Real, Devices::Cuda, Index > CudaType;
 
    typedef DistributedMeshes::DistributedMesh <Grid> DistributedMeshType;
  
@@ -62,16 +60,6 @@ class Grid< 3, Real, Device, Index > : public Object
    // empty destructor is needed only to avoid crappy nvcc warnings
    ~Grid() {}
 
-   /**
-    * \brief See Grid1D::getType().
-    */
-   static String getType();
-
-   /**
-    * \brief See Grid1D::getTypeVirtual().
-    */
-   String getTypeVirtual() const;
-
    /**
     * \brief See Grid1D::getSerializationType().
     */
diff --git a/src/TNL/Meshes/GridDetails/Grid3D_impl.h b/src/TNL/Meshes/GridDetails/Grid3D_impl.h
index dbd5fcf47e71fd6c2dbb0ba271eb1656773a9a80..f4707a8ce34654544f9d7455122ebc3a340d690b 100644
--- a/src/TNL/Meshes/GridDetails/Grid3D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Grid3D_impl.h
@@ -68,29 +68,13 @@ Grid< 3, Real, Device, Index >::Grid( const Index xSize, const Index ySize, cons
 template< typename Real,
           typename Device,
           typename Index >
-String Grid< 3, Real, Device, Index > :: getType()
+String Grid< 3, Real, Device, Index > :: getSerializationType()
 {
    return String( "Meshes::Grid< " ) +
           convertToString( getMeshDimension() ) + ", " +
-          String( TNL::getType< RealType >() ) + ", " +
-          String( Device :: getDeviceType() ) + ", " +
-          String( TNL::getType< IndexType >() ) + " >";
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Grid< 3, Real, Device, Index > :: getTypeVirtual() const
-{
-   return this->getType();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Grid< 3, Real, Device, Index > :: getSerializationType()
-{
-   return HostType::getType();
+          getType< RealType >() + ", " +
+          getType< Devices::Host >() + ", " +
+          getType< IndexType >() + " >";
 };
 
 template< typename Real,
diff --git a/src/TNL/Meshes/GridDetails/GridTraverser.h b/src/TNL/Meshes/GridDetails/GridTraverser.h
index fb6b34da12fb750c0ad74cc3ba05b086727adf01..e8702153fb03343f85badc160676b1c788eaa948 100644
--- a/src/TNL/Meshes/GridDetails/GridTraverser.h
+++ b/src/TNL/Meshes/GridDetails/GridTraverser.h
@@ -12,7 +12,6 @@
 
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Pointers/SharedPointer.h>
-#include <TNL/CudaStreamPool.h>
 
 namespace TNL {
 namespace Meshes {
@@ -89,38 +88,6 @@ class GridTraverser< Meshes::Grid< 1, Real, Devices::Cuda, Index > >
          const int& stream = 0 );
 };
 
-/****
- * 1D grid, Devices::MIC
- */
-template< typename Real,
-          typename Index >
-class GridTraverser< Meshes::Grid< 1, Real, Devices::MIC, Index > >
-{
-   public:
-      
-      typedef Meshes::Grid< 1, Real, Devices::MIC, Index > GridType;
-      typedef Pointers::SharedPointer<  GridType > GridPointer;
-      typedef Real RealType;
-      typedef Devices::MIC DeviceType;
-      typedef Index IndexType;
-      typedef typename GridType::CoordinatesType CoordinatesType;
- 
-      template<
-         typename GridEntity,
-         typename EntitiesProcessor,
-         typename UserData,
-         bool processOnlyBoundaryEntities  >
-      static void
-      processEntities(
-         const GridPointer& gridPointer,
-         const CoordinatesType& begin,
-         const CoordinatesType& end,
-         UserData& userData,
-         GridTraverserMode mode = synchronousMode,
-         const int& stream = 0 );
-};
-
-
 
 /****
  * 2D grid, Devices::Host
@@ -202,45 +169,6 @@ class GridTraverser< Meshes::Grid< 2, Real, Devices::Cuda, Index > >
          const GridEntityParameters&... gridEntityParameters );
 };
 
-/****
- * 2D grid, Devices::MIC
- */
-template< typename Real,
-          typename Index >
-class GridTraverser< Meshes::Grid< 2, Real, Devices::MIC, Index > >
-{
-   public:
-      
-      typedef Meshes::Grid< 2, Real, Devices::MIC, Index > GridType;
-      typedef Pointers::SharedPointer<  GridType > GridPointer;
-      typedef Real RealType;
-      typedef Devices::MIC DeviceType;
-      typedef Index IndexType;
-      typedef typename GridType::CoordinatesType CoordinatesType;
- 
-      template<
-         typename GridEntity,
-         typename EntitiesProcessor,
-         typename UserData,
-         bool processOnlyBoundaryEntities,
-         int XOrthogonalBoundary = 1,
-         int YOrthogonalBoundary = 1,
-         typename... GridEntityParameters >
-      static void
-      processEntities(
-         const GridPointer& gridPointer,
-         const CoordinatesType& begin,
-         const CoordinatesType& end,
-         UserData& userData,
-         // FIXME: hack around nvcc bug (error: default argument not at end of parameter list)
-         //GridTraverserMode mode = synchronousMode,
-         GridTraverserMode mode,
-         // const int& stream = 0,
-         const int& stream,
-         // gridEntityParameters are passed to GridEntity's constructor
-         // (i.e. orientation and basis for faces)
-         const GridEntityParameters&... gridEntityParameters );
-};
 
 /****
  * 3D grid, Devices::Host
@@ -324,51 +252,9 @@ class GridTraverser< Meshes::Grid< 3, Real, Devices::Cuda, Index > >
          const GridEntityParameters&... gridEntityParameters );
 };
 
-/****
- * 3D grid, Devices::Cuda
- */
-template< typename Real,
-          typename Index >
-class GridTraverser< Meshes::Grid< 3, Real, Devices::MIC, Index > >
-{
-   public:
-      
-      typedef Meshes::Grid< 3, Real, Devices::MIC, Index > GridType;
-      typedef Pointers::SharedPointer<  GridType > GridPointer;
-      typedef Real RealType;
-      typedef Devices::MIC DeviceType;
-      typedef Index IndexType;
-      typedef typename GridType::CoordinatesType CoordinatesType;
- 
-      template<
-         typename GridEntity,
-         typename EntitiesProcessor,
-         typename UserData,
-         bool processOnlyBoundaryEntities,
-         int XOrthogonalBoundary = 1,
-         int YOrthogonalBoundary = 1,
-         int ZOrthogonalBoundary = 1,
-         typename... GridEntityParameters >
-      static void
-      processEntities(
-         const GridPointer& gridPointer,
-         const CoordinatesType& begin,
-         const CoordinatesType& end,
-         UserData& userData,
-         // FIXME: hack around nvcc bug (error: default argument not at end of parameter list)
-         //GridTraverserMode mode = synchronousMode,
-         GridTraverserMode mode,
-         // const int& stream = 0,
-         const int& stream,
-         // gridEntityParameters are passed to GridEntity's constructor
-         // (i.e. orientation and basis for faces and edges)
-         const GridEntityParameters&... gridEntityParameters );
-};
-
 } // namespace Meshes
 } // namespace TNL
 
 #include <TNL/Meshes/GridDetails/GridTraverser_1D.hpp>
 #include <TNL/Meshes/GridDetails/GridTraverser_2D.hpp>
 #include <TNL/Meshes/GridDetails/GridTraverser_3D.hpp>
-
diff --git a/src/TNL/Meshes/GridDetails/GridTraverser_1D.hpp b/src/TNL/Meshes/GridDetails/GridTraverser_1D.hpp
index 59989bb2a14a85443c2f9616c583ab945b727116..c1aab9660d50ee8fe6917ae08c6bb869e333c056 100644
--- a/src/TNL/Meshes/GridDetails/GridTraverser_1D.hpp
+++ b/src/TNL/Meshes/GridDetails/GridTraverser_1D.hpp
@@ -14,10 +14,9 @@
 
 #pragma once
 
-#include <TNL/Devices/MIC.h>
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Pointers/SharedPointer.h>
-#include <TNL/CudaStreamPool.h>
+#include <TNL/Cuda/StreamPool.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 #include <TNL/Meshes/GridDetails/GridTraverser.h>
 #include <TNL/Exceptions/NotImplementedError.h>
@@ -121,7 +120,7 @@ GridTraverser1D(
    typedef Meshes::Grid< 1, Real, Devices::Cuda, Index > GridType;
    typename GridType::CoordinatesType coordinates;
  
-   coordinates.x() = begin.x() + ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   coordinates.x() = begin.x() + ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    if( coordinates <= end )
    {   
       GridEntity entity( *grid, coordinates );
@@ -183,10 +182,10 @@ processEntities(
    const int& stream )
 {
 #ifdef HAVE_CUDA
-   auto& pool = CudaStreamPool::getInstance();
+   auto& pool = Cuda::StreamPool::getInstance();
    const cudaStream_t& s = pool.getStream( stream );
 
-   Devices::Cuda::synchronizeDevice();
+   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
    if( processOnlyBoundaryEntities )
    {
       dim3 cudaBlockSize( 2 );
@@ -201,7 +200,7 @@ processEntities(
    else
    {
       dim3 blockSize( 256 ), blocksCount, gridsCount;
-      Devices::Cuda::setupThreads(
+      Cuda::setupThreads(
          blockSize,
          blocksCount,
          gridsCount,
@@ -210,7 +209,7 @@ processEntities(
       for( gridIdx.x = 0; gridIdx.x < gridsCount.x; gridIdx.x++ )
       {
          dim3 gridSize;
-         Devices::Cuda::setupGrid(
+         Cuda::setupGrid(
             blocksCount,
             gridsCount,
             gridIdx,
@@ -226,8 +225,8 @@ processEntities(
 
       /*dim3 cudaBlockSize( 256 );
       dim3 cudaBlocks;
-      cudaBlocks.x = Devices::Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x );
-      const IndexType cudaXGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.x );
+      cudaBlocks.x = Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x );
+      const IndexType cudaXGrids = Cuda::getNumberOfGrids( cudaBlocks.x );
 
       for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
          GridTraverser1D< Real, Index, GridEntity, UserData, EntitiesProcessor >
@@ -255,69 +254,5 @@ processEntities(
 #endif
 }
 
-/****
- * 1D traverser, MIC
- */
-
-template< typename Real,
-          typename Index >
-   template<
-      typename GridEntity,
-      typename EntitiesProcessor,
-      typename UserData,
-      bool processOnlyBoundaryEntities >
-void
-GridTraverser< Meshes::Grid< 1, Real, Devices::MIC, Index > >::
-processEntities(
-   const GridPointer& gridPointer,
-   const CoordinatesType& begin,
-   const CoordinatesType& end,
-   UserData& userData,
-   GridTraverserMode mode,
-   const int& stream )
-{
-    throw Exceptions::NotImplementedError("Not Implemented yet Grid Traverser <1, Real, Device::MIC>");
-/*
-   auto& pool = CudaStreamPool::getInstance();
-   const cudaStream_t& s = pool.getStream( stream );
-
-   Devices::Cuda::synchronizeDevice();
-   if( processOnlyBoundaryEntities )
-   {
-      dim3 cudaBlockSize( 2 );
-      dim3 cudaBlocks( 1 );
-      GridBoundaryTraverser1D< Real, Index, GridEntity, UserData, EntitiesProcessor >
-            <<< cudaBlocks, cudaBlockSize, 0, s >>>
-            ( &gridPointer.template getData< Devices::Cuda >(),
-              userData,
-              begin,
-              end );
-   }
-   else
-   {
-      dim3 cudaBlockSize( 256 );
-      dim3 cudaBlocks;
-      cudaBlocks.x = Devices::Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x );
-      const IndexType cudaXGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.x );
-
-      for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
-         GridTraverser1D< Real, Index, GridEntity, UserData, EntitiesProcessor >
-            <<< cudaBlocks, cudaBlockSize, 0, s >>>
-            ( &gridPointer.template getData< Devices::Cuda >(),
-              userData,
-              begin,
-              end,
-              gridXIdx );
-   }
-
-   // only launches into the stream 0 are synchronized
-   if( stream == 0 )
-   {
-      cudaStreamSynchronize( s );
-      TNL_CHECK_CUDA_DEVICE;
-   }
-*/
-}
-
-   } // namespace Meshes
+} // namespace Meshes
 } // namespace TNL
diff --git a/src/TNL/Meshes/GridDetails/GridTraverser_2D.hpp b/src/TNL/Meshes/GridDetails/GridTraverser_2D.hpp
index 50b30c0190bdda8c6c266385ecd785884f3282ac..721ec96d2331c103cb0179e5bd77b224b700c28f 100644
--- a/src/TNL/Meshes/GridDetails/GridTraverser_2D.hpp
+++ b/src/TNL/Meshes/GridDetails/GridTraverser_2D.hpp
@@ -10,10 +10,9 @@
 
 #pragma once
 
-#include <TNL/Devices/MIC.h>
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Pointers/SharedPointer.h>
-#include <TNL/CudaStreamPool.h>
+#include <TNL/Cuda/StreamPool.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 #include <TNL/Meshes/GridDetails/GridTraverser.h>
 
@@ -149,8 +148,8 @@ GridTraverser2D(
    typedef Meshes::Grid< 2, Real, Devices::Cuda, Index > GridType;
    typename GridType::CoordinatesType coordinates;
 
-   coordinates.x() = begin.x() + Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
-   coordinates.y() = begin.y() + Devices::Cuda::getGlobalThreadIdx_y( gridIdx );
+   coordinates.x() = begin.x() + Cuda::getGlobalThreadIdx_x( gridIdx );
+   coordinates.y() = begin.y() + Cuda::getGlobalThreadIdx_y( gridIdx );
    
    if( coordinates <= end )
    {
@@ -187,7 +186,7 @@ GridTraverser2DBoundaryAlongX(
    typedef Meshes::Grid< 2, Real, Devices::Cuda, Index > GridType;
    typename GridType::CoordinatesType coordinates;
 
-   coordinates.x() = beginX + Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
+   coordinates.x() = beginX + Cuda::getGlobalThreadIdx_x( gridIdx );
    coordinates.y() = fixedY;
    
    if( coordinates.x() <= endX )
@@ -223,7 +222,7 @@ GridTraverser2DBoundaryAlongY(
    typename GridType::CoordinatesType coordinates;
 
    coordinates.x() = fixedX;
-   coordinates.y() = beginY + Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
+   coordinates.y() = beginY + Cuda::getGlobalThreadIdx_x( gridIdx );
    
    if( coordinates.y() <= endY )
    {
@@ -292,10 +291,10 @@ GridTraverser2DBoundary(
    
    
    /*const Index aux = max( entitiesAlongX, entitiesAlongY );
-   const Index& warpSize = Devices::Cuda::getWarpSize();
+   const Index& warpSize = Cuda::getWarpSize();
    const Index threadsPerAxis = warpSize * ( aux / warpSize + ( aux % warpSize != 0 ) );
    
-   Index threadId = Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
+   Index threadId = Cuda::getGlobalThreadIdx_x( gridIdx );
    GridEntity entity( *grid, 
          CoordinatesType( 0, 0 ),
          gridEntityParameters... );
@@ -415,19 +414,19 @@ processEntities(
       dim3 cudaBlockSize( 256 );
       dim3 cudaBlocksCountAlongX, cudaGridsCountAlongX,
            cudaBlocksCountAlongY, cudaGridsCountAlongY;
-      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongX, cudaGridsCountAlongX, end.x() - begin.x() + 1 );
-      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongY, cudaGridsCountAlongY, end.y() - begin.y() - 1 );
+      Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongX, cudaGridsCountAlongX, end.x() - begin.x() + 1 );
+      Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongY, cudaGridsCountAlongY, end.y() - begin.y() - 1 );
             
-      auto& pool = CudaStreamPool::getInstance();
-      Devices::Cuda::synchronizeDevice();
-      
+      auto& pool = Cuda::StreamPool::getInstance();
+      Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
+
       const cudaStream_t& s1 = pool.getStream( stream );
       const cudaStream_t& s2 = pool.getStream( stream + 1 );
       dim3 gridIdx, cudaGridSize;
       for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongX.x; gridIdx.x++ )
       {
-         Devices::Cuda::setupGrid( cudaBlocksCountAlongX, cudaGridsCountAlongX, gridIdx, cudaGridSize );
-         //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCountAlongX, cudaGridSize, cudaGridsCountAlongX );
+         Cuda::setupGrid( cudaBlocksCountAlongX, cudaGridsCountAlongX, gridIdx, cudaGridSize );
+         //Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCountAlongX, cudaGridSize, cudaGridsCountAlongX );
          GridTraverser2DBoundaryAlongX< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                <<< cudaGridSize, cudaBlockSize, 0, s1 >>>
                ( &gridPointer.template getData< Devices::Cuda >(),
@@ -451,7 +450,7 @@ processEntities(
       const cudaStream_t& s4 = pool.getStream( stream + 3 );
       for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongY.x; gridIdx.x++ )
       {
-         Devices::Cuda::setupGrid( cudaBlocksCountAlongY, cudaGridsCountAlongY, gridIdx, cudaGridSize );
+         Cuda::setupGrid( cudaBlocksCountAlongY, cudaGridsCountAlongY, gridIdx, cudaGridSize );
          GridTraverser2DBoundaryAlongY< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                <<< cudaGridSize, cudaBlockSize, 0, s3 >>>
                ( &gridPointer.template getData< Devices::Cuda >(),
@@ -483,15 +482,15 @@ processEntities(
       const IndexType maxFaceSize = max( entitiesAlongX, entitiesAlongY );
       const IndexType blocksPerFace = maxFaceSize / cudaBlockSize.x + ( maxFaceSize % cudaBlockSize.x != 0 );
       IndexType cudaThreadsCount = 4 * cudaBlockSize.x * blocksPerFace;
-      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, cudaThreadsCount );
+      Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, cudaThreadsCount );
       //std::cerr << "blocksPerFace = " << blocksPerFace << "Threads count = " << cudaThreadsCount 
       //          << "cudaBlockCount = " << cudaBlocksCount.x << std::endl;      
       dim3 gridIdx, cudaGridSize;
-      Devices::Cuda::synchronizeDevice();
+      Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
       for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x++ )
       {
-         Devices::Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
-         //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCountAlongX, cudaGridSize, cudaGridsCountAlongX );
+         Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
+         //Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCountAlongX, cudaGridSize, cudaGridsCountAlongX );
          GridTraverser2DBoundary< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                <<< cudaGridSize, cudaBlockSize >>>
                ( &gridPointer.template getData< Devices::Cuda >(),
@@ -512,20 +511,20 @@ processEntities(
    {
       dim3 cudaBlockSize( 16, 16 );
       dim3 cudaBlocksCount, cudaGridsCount;
-      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount,
-                                   end.x() - begin.x() + 1,
-                                   end.y() - begin.y() + 1 );
+      Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount,
+                          end.x() - begin.x() + 1,
+                          end.y() - begin.y() + 1 );
       
-      auto& pool = CudaStreamPool::getInstance();
+      auto& pool = Cuda::StreamPool::getInstance();
       const cudaStream_t& s = pool.getStream( stream );
 
-      Devices::Cuda::synchronizeDevice();
+      Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
       dim3 gridIdx, cudaGridSize;
       for( gridIdx.y = 0; gridIdx.y < cudaGridsCount.y; gridIdx.y ++ )
          for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x ++ )
          {
-            Devices::Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
-	    //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCount, cudaGridSize, cudaGridsCount );
+            Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
+	    //Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCount, cudaGridSize, cudaGridsCount );
             GridTraverser2D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                <<< cudaGridSize, cudaBlockSize, 0, s >>>
                ( &gridPointer.template getData< Devices::Cuda >(),
@@ -553,104 +552,5 @@ processEntities(
 #endif
 }
 
-
-/****
- * 2D traverser, MIC
- */
-template< typename Real,
-          typename Index >
-   template<
-      typename GridEntity,
-      typename EntitiesProcessor,
-      typename UserData,
-      bool processOnlyBoundaryEntities,
-         int XOrthogonalBoundary,
-         int YOrthogonalBoundary,
-      typename... GridEntityParameters >
-void
-GridTraverser< Meshes::Grid< 2, Real, Devices::MIC, Index > >::
-processEntities(
-   const GridPointer& gridPointer,
-   const CoordinatesType& begin,
-   const CoordinatesType& end,
-   UserData& userData,
-   GridTraverserMode mode,
-   const int& stream,
-   const GridEntityParameters&... gridEntityParameters )
-{
-        
-    
-#ifdef HAVE_MIC   
-   Devices::MIC::synchronizeDevice();
-
-    //TOHLE JE PRUSER -- nemim poslat vypustku -- 
-    //GridEntity entity( gridPointer.template getData< Devices::MIC >(), begin, gridEntityParameters... );
-
-
-    Devices::MICHider<const GridType> hMicGrid;
-    hMicGrid.pointer=& gridPointer.template getData< Devices::MIC >();
-    Devices::MICHider<UserData> hMicUserData;
-    hMicUserData.pointer=& userDataPointer.template modifyData<Devices::MIC>();
-    TNLMICSTRUCT(begin, const CoordinatesType);
-    TNLMICSTRUCT(end, const CoordinatesType);
-
-    #pragma offload target(mic) in(sbegin,send,hMicUserData,hMicGrid)  
-    {
-        
-        #pragma omp parallel firstprivate( sbegin, send )
-        {     
-            TNLMICSTRUCTUSE(begin, const CoordinatesType);
-            TNLMICSTRUCTUSE(end, const CoordinatesType);    
-            GridEntity entity( *(hMicGrid.pointer), *(kernelbegin) );
-          
-            if( processOnlyBoundaryEntities )
-             {      
-               if( YOrthogonalBoundary )
-                  #pragma omp for
-                  for( auto k = kernelbegin->x();
-                       k <= kernelend->x();
-                       k ++ )
-                  {
-                     entity.getCoordinates().x() = k;
-                     entity.getCoordinates().y() = kernelbegin->y();
-                     entity.refresh();
-                     EntitiesProcessor::processEntity( entity.getMesh(), *(hMicUserData.pointer), entity );
-                     entity.getCoordinates().y() = kernelend->y();
-                     entity.refresh();
-                     EntitiesProcessor::processEntity( entity.getMesh(), *(hMicUserData.pointer), entity );
-                  }
-               if( XOrthogonalBoundary )
-                  #pragma omp for
-                  for( auto k = kernelbegin->y();
-                       k <= kernelend->y();
-                       k ++ )
-                  {
-                     entity.getCoordinates().y() = k;
-                     entity.getCoordinates().x() = kernelbegin->x();
-                     entity.refresh();
-                     EntitiesProcessor::processEntity( entity.getMesh(), *(hMicUserData.pointer), entity );
-                     entity.getCoordinates().x() = kernelend->x();
-                     entity.refresh();
-                     EntitiesProcessor::processEntity( entity.getMesh(), *(hMicUserData.pointer), entity );
-                  }
-             }
-            else
-            {
-                  #pragma omp for
-                  for( IndexType y = kernelbegin->y(); y <= kernelend->y(); y ++ )
-                     for( IndexType x = kernelbegin->x(); x <= kernelend->x(); x ++ )
-                     {
-                        // std::cerr << x << "   " <<y << std::endl;
-                        entity.getCoordinates().x() = x;
-                        entity.getCoordinates().y() = y;
-                        entity.refresh();
-                        EntitiesProcessor::processEntity( entity.getMesh(), *(hMicUserData.pointer), entity );
-                     }      
-             }
-        }
-    }
-      
-#endif
-}
-   } // namespace Meshes
+} // namespace Meshes
 } // namespace TNL
diff --git a/src/TNL/Meshes/GridDetails/GridTraverser_3D.hpp b/src/TNL/Meshes/GridDetails/GridTraverser_3D.hpp
index 5a3cd568f93bcb20f40682e55959eadf50b5c67f..a9aad8c9533dfecdc6e5410be51705d24438725c 100644
--- a/src/TNL/Meshes/GridDetails/GridTraverser_3D.hpp
+++ b/src/TNL/Meshes/GridDetails/GridTraverser_3D.hpp
@@ -10,10 +10,9 @@
 
 #pragma once
 
-#include <TNL/Devices/MIC.h>
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Pointers/SharedPointer.h>
-#include <TNL/CudaStreamPool.h>
+#include <TNL/Cuda/StreamPool.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
 #include <TNL/Meshes/GridDetails/GridTraverser.h>
 #include <TNL/Exceptions/NotImplementedError.h>
@@ -178,9 +177,9 @@ GridTraverser3D(
    typedef Meshes::Grid< 3, Real, Devices::Cuda, Index > GridType;
    typename GridType::CoordinatesType coordinates;
 
-   coordinates.x() = begin.x() + Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
-   coordinates.y() = begin.y() + Devices::Cuda::getGlobalThreadIdx_y( gridIdx );
-   coordinates.z() = begin.z() + Devices::Cuda::getGlobalThreadIdx_z( gridIdx );
+   coordinates.x() = begin.x() + Cuda::getGlobalThreadIdx_x( gridIdx );
+   coordinates.y() = begin.y() + Cuda::getGlobalThreadIdx_y( gridIdx );
+   coordinates.z() = begin.z() + Cuda::getGlobalThreadIdx_z( gridIdx );
 
    if( coordinates <= end )
    {
@@ -218,8 +217,8 @@ GridTraverser3DBoundaryAlongXY(
    typedef Meshes::Grid< 3, Real, Devices::Cuda, Index > GridType;
    typename GridType::CoordinatesType coordinates;
 
-   coordinates.x() = beginX + Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
-   coordinates.y() = beginY + Devices::Cuda::getGlobalThreadIdx_y( gridIdx );
+   coordinates.x() = beginX + Cuda::getGlobalThreadIdx_x( gridIdx );
+   coordinates.y() = beginY + Cuda::getGlobalThreadIdx_y( gridIdx );
    coordinates.z() = fixedZ;  
    
    if( coordinates.x() <= endX && coordinates.y() <= endY )
@@ -255,9 +254,9 @@ GridTraverser3DBoundaryAlongXZ(
    typedef Meshes::Grid< 3, Real, Devices::Cuda, Index > GridType;
    typename GridType::CoordinatesType coordinates;
 
-   coordinates.x() = beginX + Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
+   coordinates.x() = beginX + Cuda::getGlobalThreadIdx_x( gridIdx );
    coordinates.y() = fixedY;
-   coordinates.z() = beginZ + Devices::Cuda::getGlobalThreadIdx_y( gridIdx );
+   coordinates.z() = beginZ + Cuda::getGlobalThreadIdx_y( gridIdx );
    
    if( coordinates.x() <= endX && coordinates.z() <= endZ )
    {
@@ -293,8 +292,8 @@ GridTraverser3DBoundaryAlongYZ(
    typename GridType::CoordinatesType coordinates;
 
    coordinates.x() = fixedX;
-   coordinates.y() = beginY + Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
-   coordinates.z() = beginZ + Devices::Cuda::getGlobalThreadIdx_y( gridIdx );
+   coordinates.y() = beginY + Cuda::getGlobalThreadIdx_x( gridIdx );
+   coordinates.z() = beginZ + Cuda::getGlobalThreadIdx_y( gridIdx );
    
    if( coordinates.y() <= endY && coordinates.z() <= endZ )
    {
@@ -342,13 +341,13 @@ processEntities(
       dim3 cudaBlocksCountAlongXY, cudaBlocksCountAlongXZ, cudaBlocksCountAlongYZ,
            cudaGridsCountAlongXY, cudaGridsCountAlongXZ, cudaGridsCountAlongYZ;
       
-      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongXY, cudaGridsCountAlongXY, entitiesAlongX, entitiesAlongY );
-      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongXZ, cudaGridsCountAlongXZ, entitiesAlongX, entitiesAlongZ - 2 );
-      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongYZ, cudaGridsCountAlongYZ, entitiesAlongY - 2, entitiesAlongZ - 2 );
+      Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongXY, cudaGridsCountAlongXY, entitiesAlongX, entitiesAlongY );
+      Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongXZ, cudaGridsCountAlongXZ, entitiesAlongX, entitiesAlongZ - 2 );
+      Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongYZ, cudaGridsCountAlongYZ, entitiesAlongY - 2, entitiesAlongZ - 2 );
+
+      auto& pool = Cuda::StreamPool::getInstance();
+      Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
 
-      auto& pool = CudaStreamPool::getInstance();
-      Devices::Cuda::synchronizeDevice();
-      
       const cudaStream_t& s1 = pool.getStream( stream );
       const cudaStream_t& s2 = pool.getStream( stream + 1 );
       const cudaStream_t& s3 = pool.getStream( stream + 2 );
@@ -360,7 +359,7 @@ processEntities(
       for( gridIdx.y = 0; gridIdx.y < cudaGridsCountAlongXY.y; gridIdx.y++ )
          for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongXY.x; gridIdx.x++ )
          {
-            Devices::Cuda::setupGrid( cudaBlocksCountAlongXY, cudaGridsCountAlongXY, gridIdx, gridSize );
+            Cuda::setupGrid( cudaBlocksCountAlongXY, cudaGridsCountAlongXY, gridIdx, gridSize );
             GridTraverser3DBoundaryAlongXY< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                   <<< cudaBlocksCountAlongXY, cudaBlockSize, 0 , s1 >>>
                   ( &gridPointer.template getData< Devices::Cuda >(),
@@ -387,7 +386,7 @@ processEntities(
       for( gridIdx.y = 0; gridIdx.y < cudaGridsCountAlongXZ.y; gridIdx.y++ )
          for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongXZ.x; gridIdx.x++ )
          {
-            Devices::Cuda::setupGrid( cudaBlocksCountAlongXZ, cudaGridsCountAlongXZ, gridIdx, gridSize );
+            Cuda::setupGrid( cudaBlocksCountAlongXZ, cudaGridsCountAlongXZ, gridIdx, gridSize );
             GridTraverser3DBoundaryAlongXZ< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                   <<< cudaBlocksCountAlongXZ, cudaBlockSize, 0, s3 >>>
                   ( &gridPointer.template getData< Devices::Cuda >(),
@@ -414,7 +413,7 @@ processEntities(
       for( gridIdx.y = 0; gridIdx.y < cudaGridsCountAlongYZ.y; gridIdx.y++ )
          for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongYZ.x; gridIdx.x++ )
          {
-            Devices::Cuda::setupGrid( cudaBlocksCountAlongYZ, cudaGridsCountAlongYZ, gridIdx, gridSize );
+            Cuda::setupGrid( cudaBlocksCountAlongYZ, cudaGridsCountAlongYZ, gridIdx, gridSize );
             GridTraverser3DBoundaryAlongYZ< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                   <<< cudaBlocksCountAlongYZ, cudaBlockSize, 0, s5 >>>
                   ( &gridPointer.template getData< Devices::Cuda >(),
@@ -451,21 +450,21 @@ processEntities(
       dim3 cudaBlockSize( 8, 8, 8 );
       dim3 cudaBlocksCount, cudaGridsCount;
       
-      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount,
-                                   end.x() - begin.x() + 1,
-                                   end.y() - begin.y() + 1,
-                                   end.z() - begin.z() + 1 );
+      Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount,
+                          end.x() - begin.x() + 1,
+                          end.y() - begin.y() + 1,
+                          end.z() - begin.z() + 1 );
 
-      auto& pool = CudaStreamPool::getInstance();
+      auto& pool = Cuda::StreamPool::getInstance();
       const cudaStream_t& s = pool.getStream( stream );
 
-      Devices::Cuda::synchronizeDevice();
+      Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
       dim3 gridIdx, gridSize;
       for( gridIdx.z = 0; gridIdx.z < cudaGridsCount.z; gridIdx.z ++ )
          for( gridIdx.y = 0; gridIdx.y < cudaGridsCount.y; gridIdx.y ++ )
             for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x ++ )
             {
-               Devices::Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, gridSize );
+               Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, gridSize );
                GridTraverser3D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                   <<< gridSize, cudaBlockSize, 0, s >>>
                   ( &gridPointer.template getData< Devices::Cuda >(),
@@ -488,68 +487,5 @@ processEntities(
 #endif
 }
 
-/****
- * 3D traverser, MIC
- */
-template< typename Real,
-          typename Index >
-   template<
-      typename GridEntity,
-      typename EntitiesProcessor,
-      typename UserData,
-      bool processOnlyBoundaryEntities,
-         int XOrthogonalBoundary,
-         int YOrthogonalBoundary,
-         int ZOrthogonalBoundary,
-      typename... GridEntityParameters >
-void
-GridTraverser< Meshes::Grid< 3, Real, Devices::MIC, Index > >::
-processEntities(
-   const GridPointer& gridPointer,
-   const CoordinatesType& begin,
-   const CoordinatesType& end,
-   UserData& userData,
-   GridTraverserMode mode,
-   const int& stream,
-   const GridEntityParameters&... gridEntityParameters )
-{
-    throw Exceptions::NotImplementedError("Not Implemented yet Grid Traverser <3, Real, Device::MIC>");
-    
-/* HAVE_CUDA   
-   dim3 cudaBlockSize( 8, 8, 8 );
-   dim3 cudaBlocks;
-   cudaBlocks.x = Devices::Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x );
-   cudaBlocks.y = Devices::Cuda::getNumberOfBlocks( end.y() - begin.y() + 1, cudaBlockSize.y );
-   cudaBlocks.z = Devices::Cuda::getNumberOfBlocks( end.z() - begin.z() + 1, cudaBlockSize.z );
-   const IndexType cudaXGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.x );
-   const IndexType cudaYGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.y );
-   const IndexType cudaZGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.z );
-
-   auto& pool = CudaStreamPool::getInstance();
-   const cudaStream_t& s = pool.getStream( stream );
-
-   Devices::Cuda::synchronizeDevice();
-   for( IndexType gridZIdx = 0; gridZIdx < cudaZGrids; gridZIdx ++ )
-      for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ )
-         for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
-            GridTraverser3D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
-               <<< cudaBlocks, cudaBlockSize, 0, s >>>
-               ( &gridPointer.template getData< Devices::Cuda >(),
-                 userData,
-                 begin,
-                 end,
-                 gridXIdx,
-                 gridYIdx,
-                 gridZIdx,
-                 gridEntityParameters... );
-
-   // only launches into the stream 0 are synchronized
-   if( stream == 0 )
-   {
-      cudaStreamSynchronize( s );
-      TNL_CHECK_CUDA_DEVICE;
-   }
- */
-}
-   } // namespace Meshes
+} // namespace Meshes
 } // namespace TNL
diff --git a/src/TNL/Meshes/GridDetails/NeighborGridEntitiesStorage.h b/src/TNL/Meshes/GridDetails/NeighborGridEntitiesStorage.h
index dd9562add377b02c3ab9ca91fa4804762182b46c..29fe8ffd67d0e7e0550abd6540391d24d5a0205d 100644
--- a/src/TNL/Meshes/GridDetails/NeighborGridEntitiesStorage.h
+++ b/src/TNL/Meshes/GridDetails/NeighborGridEntitiesStorage.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/CudaCallable.h>
 #include <TNL/Meshes/DimensionTag.h>
 #include <TNL/Meshes/GridEntityConfig.h>
 #include <TNL/Meshes/GridDetails/NeighborGridEntityGetter.h>
diff --git a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter.h b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter.h
index 84a9c56d9389f31c013206c10f63fceb81ec2e0c..f7a3cc180fa3da51b8830f8001e8d616e621f891 100644
--- a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter.h
+++ b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter.h
@@ -11,7 +11,7 @@
 #pragma once
 
 #include <TNL/Assert.h>
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/CudaCallable.h>
 #include <TNL/Meshes/GridEntityConfig.h>
 
 namespace TNL {
diff --git a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter1D_impl.h b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter1D_impl.h
index b8983feed92441a040ab8cab310afa36e8ef1b84..840a201c6021448e4f0de99552c4918364f92874 100644
--- a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter1D_impl.h
+++ b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter1D_impl.h
@@ -14,7 +14,7 @@
 #include <TNL/Meshes/GridDetails/Grid1D.h>
 #include <TNL/Meshes/GridDetails/Grid2D.h>
 #include <TNL/Meshes/GridDetails/Grid3D.h>
-#include <TNL/TemplateStaticFor.h>
+#include <TNL/Algorithms/TemplateStaticFor.h>
 
 namespace TNL {
 namespace Meshes {
@@ -176,7 +176,7 @@ class NeighborGridEntityGetter<
       void refresh( const GridType& grid, const IndexType& entityIndex )
       {
 #ifndef HAVE_CUDA  // TODO: fix it -- does not work with nvcc
-         TemplateStaticFor< IndexType, -stencilSize, stencilSize + 1, StencilRefresher >::exec( *this, entityIndex );
+         Algorithms::TemplateStaticFor< IndexType, -stencilSize, stencilSize + 1, StencilRefresher >::exec( *this, entityIndex );
 #endif
       };
  
diff --git a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter2D_impl.h b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter2D_impl.h
index 5b0e48767f6b530388fd0dd92612c12b3b0fcbb8..d6f4ab24eddf9b016060925dcd822469b9fcb741 100644
--- a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter2D_impl.h
+++ b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter2D_impl.h
@@ -199,9 +199,9 @@ class NeighborGridEntityGetter<
       void refresh( const GridType& grid, const IndexType& entityIndex )
       {
 #ifndef HAVE_CUDA // TODO: fix this to work with CUDA
-         TemplateStaticFor< IndexType, -stencilSize, 0, StencilYRefresher >::exec( *this, entityIndex );
-         TemplateStaticFor< IndexType, 1, stencilSize + 1, StencilYRefresher >::exec( *this, entityIndex );
-         TemplateStaticFor< IndexType, -stencilSize, stencilSize + 1, StencilXRefresher >::exec( *this, entityIndex );
+         Algorithms::TemplateStaticFor< IndexType, -stencilSize, 0, StencilYRefresher >::exec( *this, entityIndex );
+         Algorithms::TemplateStaticFor< IndexType, 1, stencilSize + 1, StencilYRefresher >::exec( *this, entityIndex );
+         Algorithms::TemplateStaticFor< IndexType, -stencilSize, stencilSize + 1, StencilXRefresher >::exec( *this, entityIndex );
 #endif
       };
  
diff --git a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter3D_impl.h b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter3D_impl.h
index 5fe5329bb3284ed578f502ad6828a84842fcc5eb..3cf2bb8d13e9121cfa4cd683282d78687bf4e446 100644
--- a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter3D_impl.h
+++ b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter3D_impl.h
@@ -14,7 +14,7 @@
 #include <TNL/Meshes/GridDetails/Grid1D.h>
 #include <TNL/Meshes/GridDetails/Grid2D.h>
 #include <TNL/Meshes/GridDetails/Grid3D.h>
-#include <TNL/TemplateStaticFor.h>
+#include <TNL/Algorithms/TemplateStaticFor.h>
 
 namespace TNL {
 namespace Meshes {
@@ -223,11 +223,11 @@ class NeighborGridEntityGetter<
       void refresh( const GridType& grid, const IndexType& entityIndex )
       {
 #ifndef HAVE_CUDA // TODO: fix this to work with CUDA
-         TemplateStaticFor< IndexType, -stencilSize, 0, StencilZRefresher >::exec( *this, entityIndex );
-         TemplateStaticFor< IndexType, 1, stencilSize + 1, StencilZRefresher >::exec( *this, entityIndex );
-         TemplateStaticFor< IndexType, -stencilSize, 0, StencilYRefresher >::exec( *this, entityIndex );
-         TemplateStaticFor< IndexType, 1, stencilSize + 1, StencilYRefresher >::exec( *this, entityIndex );
-         TemplateStaticFor< IndexType, -stencilSize, stencilSize + 1, StencilXRefresher >::exec( *this, entityIndex );
+         Algorithms::TemplateStaticFor< IndexType, -stencilSize, 0, StencilZRefresher >::exec( *this, entityIndex );
+         Algorithms::TemplateStaticFor< IndexType, 1, stencilSize + 1, StencilZRefresher >::exec( *this, entityIndex );
+         Algorithms::TemplateStaticFor< IndexType, -stencilSize, 0, StencilYRefresher >::exec( *this, entityIndex );
+         Algorithms::TemplateStaticFor< IndexType, 1, stencilSize + 1, StencilYRefresher >::exec( *this, entityIndex );
+         Algorithms::TemplateStaticFor< IndexType, -stencilSize, stencilSize + 1, StencilXRefresher >::exec( *this, entityIndex );
 #endif
       };
  
diff --git a/src/TNL/Meshes/Mesh.h b/src/TNL/Meshes/Mesh.h
index 589a862b9228bfbc7b1108c3caa51af6303532c0..4d71e3ac91231e3741ee61bafe06e12ff99320a2 100644
--- a/src/TNL/Meshes/Mesh.h
+++ b/src/TNL/Meshes/Mesh.h
@@ -79,10 +79,6 @@ class Mesh
       using RealType        = typename PointType::RealType;
       using GlobalIndexVector = Containers::Vector< GlobalIndexType, DeviceType, GlobalIndexType >;
 
-      // shortcuts, compatibility with grids
-      using HostType = Mesh< MeshConfig, Devices::Host >;
-      using CudaType = Mesh< MeshConfig, Devices::Cuda >;
-
       template< int Dimension >
       using EntityTraits = typename MeshTraitsType::template EntityTraits< Dimension >;
 
@@ -110,10 +106,6 @@ class Mesh
       using Face = EntityType< getMeshDimension() - 1 >;
       using Vertex = EntityType< 0 >;
 
-      static String getType();
-
-      virtual String getTypeVirtual() const;
-
       static String getSerializationType();
 
       virtual String getSerializationTypeVirtual() const;
diff --git a/src/TNL/Meshes/MeshDetails/EntityStorageRebinder.h b/src/TNL/Meshes/MeshDetails/EntityStorageRebinder.h
index 7630a2d64920de41dbbc3f3f2e17603d2d3aabc1..c956d3169a659dbb106a2a04e83ce39984a84646 100644
--- a/src/TNL/Meshes/MeshDetails/EntityStorageRebinder.h
+++ b/src/TNL/Meshes/MeshDetails/EntityStorageRebinder.h
@@ -28,8 +28,8 @@
 #include <TNL/Meshes/DimensionTag.h>
 #include <TNL/Meshes/Mesh.h>
 #include <TNL/Pointers/DevicePointer.h>
-#include <TNL/ParallelFor.h>
-#include <TNL/TemplateStaticFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Algorithms/TemplateStaticFor.h>
 
 namespace TNL {
 namespace Meshes {
@@ -69,10 +69,10 @@ public:
             subentity.template bindSuperentitiesStorageNetwork< SuperdimensionTag::value >( superentitiesStorage->getValues( i ) );
          };
 
-         ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount,
-                                          kernel,
-                                          &meshPointer.template modifyData< DeviceType >(),
-                                          &superentitiesStoragePointer.template modifyData< DeviceType >() );
+         Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount,
+                                                      kernel,
+                                                      &meshPointer.template modifyData< DeviceType >(),
+                                                      &superentitiesStoragePointer.template modifyData< DeviceType >() );
       }
    };
 
@@ -109,10 +109,10 @@ public:
             superentity.template bindSubentitiesStorageNetwork< DimensionTag::value >( subentitiesStorage->getValues( i ) );
          };
 
-         ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount,
-                                          kernel,
-                                          &meshPointer.template modifyData< DeviceType >(),
-                                          &subentitiesStoragePointer.template modifyData< DeviceType >() );
+         Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount,
+                                                      kernel,
+                                                      &meshPointer.template modifyData< DeviceType >(),
+                                                      &subentitiesStoragePointer.template modifyData< DeviceType >() );
       }
    };
 
@@ -144,14 +144,14 @@ public:
 
       static void exec( Mesh& mesh )
       {
-         TemplateStaticFor< int, Dimension + 1, Mesh::getMeshDimension() + 1, Inner >::execHost( mesh );
+         Algorithms::TemplateStaticFor< int, Dimension + 1, Mesh::getMeshDimension() + 1, Inner >::execHost( mesh );
       }
    };
 
 public:
    static void exec( Mesh& mesh )
    {
-      TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, OuterLoop >::execHost( mesh );
+      Algorithms::TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, OuterLoop >::execHost( mesh );
    }
 };
 
diff --git a/src/TNL/Meshes/MeshDetails/IndexPermutationApplier.h b/src/TNL/Meshes/MeshDetails/IndexPermutationApplier.h
index c98f145d0876b38c25ff7a77368de7b33f6f0656..bd9c02411283241b41a78fef4c008a71772f1ea6 100644
--- a/src/TNL/Meshes/MeshDetails/IndexPermutationApplier.h
+++ b/src/TNL/Meshes/MeshDetails/IndexPermutationApplier.h
@@ -156,27 +156,27 @@ public:
       };
 
       Pointers::DevicePointer< Mesh > meshPointer( mesh );
-      ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount,
-                                       kernel1,
-                                       &meshPointer.template getData< DeviceType >(),
-                                       entities.getData(),
-                                       perm.getData() );
-      ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount,
-                                       kernel2,
-                                       &meshPointer.template modifyData< DeviceType >(),
-                                       entities.getData() );
+      Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount,
+                                                   kernel1,
+                                                   &meshPointer.template getData< DeviceType >(),
+                                                   entities.getData(),
+                                                   perm.getData() );
+      Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount,
+                                                   kernel2,
+                                                   &meshPointer.template modifyData< DeviceType >(),
+                                                   entities.getData() );
 
       // permute superentities storage
-      TemplateStaticFor< int, 0, Dimension, SubentitiesStorageWorker >::execHost( mesh, perm );
+      Algorithms::TemplateStaticFor< int, 0, Dimension, SubentitiesStorageWorker >::execHost( mesh, perm );
 
       // permute subentities storage
-      TemplateStaticFor< int, Dimension + 1, Mesh::getMeshDimension() + 1, SuperentitiesStorageWorker >::execHost( mesh, perm );
+      Algorithms::TemplateStaticFor< int, Dimension + 1, Mesh::getMeshDimension() + 1, SuperentitiesStorageWorker >::execHost( mesh, perm );
 
       // update superentity indices from the subentities
-      TemplateStaticFor< int, 0, Dimension, SubentitiesWorker >::execHost( mesh, iperm );
+      Algorithms::TemplateStaticFor< int, 0, Dimension, SubentitiesWorker >::execHost( mesh, iperm );
 
       // update subentity indices from the superentities
-      TemplateStaticFor< int, Dimension + 1, Mesh::getMeshDimension() + 1, SuperentitiesWorker >::execHost( mesh, iperm );
+      Algorithms::TemplateStaticFor< int, Dimension + 1, Mesh::getMeshDimension() + 1, SuperentitiesWorker >::execHost( mesh, iperm );
    }
 };
 
diff --git a/src/TNL/Meshes/MeshDetails/MeshEntityIndex.h b/src/TNL/Meshes/MeshDetails/MeshEntityIndex.h
index 64485dc3c7ecb31ebdeb9891830244776550e315..110fa9eefc1ca498435ce2fd11d1d84df2ad4410 100644
--- a/src/TNL/Meshes/MeshDetails/MeshEntityIndex.h
+++ b/src/TNL/Meshes/MeshDetails/MeshEntityIndex.h
@@ -17,7 +17,7 @@
 #pragma once
 
 #include <TNL/Assert.h>
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/CudaCallable.h>
 
 namespace TNL {
 namespace Meshes {
diff --git a/src/TNL/Meshes/MeshDetails/MeshEntityReferenceOrientation.h b/src/TNL/Meshes/MeshDetails/MeshEntityReferenceOrientation.h
index e14e909a52d6824192e66e8d93d469a877ae70ca..80340c62cc0ff15104843b0ce7f3b1e6df7424e0 100644
--- a/src/TNL/Meshes/MeshDetails/MeshEntityReferenceOrientation.h
+++ b/src/TNL/Meshes/MeshDetails/MeshEntityReferenceOrientation.h
@@ -26,8 +26,8 @@ namespace Meshes {
 template< typename MeshConfig, typename EntityTopology >
 class MeshEntityReferenceOrientation
 {
-	typedef typename MeshTraits< MeshConfig >::LocalIndexType  LocalIndexType;
-	typedef typename MeshTraits< MeshConfig >::GlobalIndexType GlobalIndexType;
+   typedef typename MeshTraits< MeshConfig >::LocalIndexType  LocalIndexType;
+   typedef typename MeshTraits< MeshConfig >::GlobalIndexType GlobalIndexType;
 
    public:
       typedef EntitySeed< MeshConfig, EntityTopology >            SeedType;
@@ -45,8 +45,6 @@ class MeshEntityReferenceOrientation
             this->cornerIdsMap.insert( std::make_pair( referenceCornerIds[i], i ) );
          }
       }
- 
-      static String getType(){ return "MeshEntityReferenceOrientation"; };
 
       EntityOrientation createOrientation( const SeedType& seed ) const
       {
@@ -67,4 +65,3 @@ class MeshEntityReferenceOrientation
 
 } // namespace Meshes
 } // namespace TNL
-
diff --git a/src/TNL/Meshes/MeshDetails/MeshEntity_impl.h b/src/TNL/Meshes/MeshDetails/MeshEntity_impl.h
index 8bdd40570ab492484a2784791e1061a6f6fed6b5..5c7414b422976804cf819f266a312ad0d65cf40a 100644
--- a/src/TNL/Meshes/MeshDetails/MeshEntity_impl.h
+++ b/src/TNL/Meshes/MeshDetails/MeshEntity_impl.h
@@ -80,11 +80,11 @@ template< typename MeshConfig,
           typename EntityTopology >
 String
 MeshEntity< MeshConfig, Device, EntityTopology >::
-getType()
+getSerializationType()
 {
-   return String( "MeshEntity< " ) +
-          MeshConfig::getType() + ", " +
-          EntityTopology::getType() + " >";
+   return String( "MeshEntity<" ) +
+          TNL::getSerializationType< MeshConfig >() + ", " +
+          TNL::getSerializationType< EntityTopology >() + ">";
 }
 
 template< typename MeshConfig,
@@ -92,9 +92,9 @@ template< typename MeshConfig,
           typename EntityTopology >
 String
 MeshEntity< MeshConfig, Device, EntityTopology >::
-getTypeVirtual() const
+getSerializationTypeVirtual() const
 {
-   return this->getType();
+   return this->getSerializationType();
 }
 
 template< typename MeshConfig,
@@ -242,17 +242,19 @@ operator=( const MeshEntity< MeshConfig, Device_, Topologies::Vertex >& entity )
 template< typename MeshConfig, typename Device >
 String
 MeshEntity< MeshConfig, Device, Topologies::Vertex >::
-getType()
+getSerializationType()
 {
-   return String( "MeshEntity< ... >" );
+   return String( "MeshEntity<" ) +
+          TNL::getSerializationType< MeshConfig >() + ", " +
+          TNL::getSerializationType< Topologies::Vertex >() + ">";
 }
 
 template< typename MeshConfig, typename Device >
 String
 MeshEntity< MeshConfig, Device, Topologies::Vertex >::
-getTypeVirtual() const
+getSerializationTypeVirtual() const
 {
-   return this->getType();
+   return this->getSerializationType();
 }
 
 template< typename MeshConfig, typename Device >
diff --git a/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Initializer.h b/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Initializer.h
index 21ccd0ccd4c2b82979d4d636b7ec9a039fcd4175..f29fec33e8473afce3b90ccc12bcfd3c37cac7d9 100644
--- a/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Initializer.h
+++ b/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Initializer.h
@@ -10,8 +10,8 @@
 
 #pragma once
 
-#include <TNL/TemplateStaticFor.h>
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/TemplateStaticFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Pointers/DevicePointer.h>
 #include <TNL/Meshes/DimensionTag.h>
 #include <TNL/Meshes/MeshDetails/traits/MeshEntityTraits.h>
@@ -121,8 +121,8 @@ public:
    public:
       static void exec( Mesh& mesh )
       {
-         TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, SetEntitiesCount >::execHost( mesh );
-         TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, ResetBoundaryTags >::execHost( mesh );
+         Algorithms::TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, SetEntitiesCount >::execHost( mesh );
+         Algorithms::TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, ResetBoundaryTags >::execHost( mesh );
 
          auto kernel = [] __cuda_callable__
             ( GlobalIndexType faceIndex,
@@ -136,17 +136,17 @@ public:
                const GlobalIndexType cellIndex = face.template getSuperentityIndex< Mesh::getMeshDimension() >( 0 );
                mesh->template setIsBoundaryEntity< Mesh::getMeshDimension() >( cellIndex, true );
                // initialize all subentities
-               TemplateStaticFor< int, 0, Mesh::getMeshDimension() - 1, InitializeSubentities >::exec( *mesh, faceIndex, face );
+               Algorithms::TemplateStaticFor< int, 0, Mesh::getMeshDimension() - 1, InitializeSubentities >::exec( *mesh, faceIndex, face );
             }
          };
 
          const GlobalIndexType facesCount = mesh.template getEntitiesCount< Mesh::getMeshDimension() - 1 >();
          Pointers::DevicePointer< Mesh > meshPointer( mesh );
-         ParallelFor< DeviceType >::exec( (GlobalIndexType) 0, facesCount,
-                                          kernel,
-                                          &meshPointer.template modifyData< DeviceType >() );
+         Algorithms::ParallelFor< DeviceType >::exec( (GlobalIndexType) 0, facesCount,
+                                                      kernel,
+                                                      &meshPointer.template modifyData< DeviceType >() );
 
-         TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, UpdateBoundaryIndices >::execHost( mesh );
+         Algorithms::TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, UpdateBoundaryIndices >::execHost( mesh );
       }
    };
 
diff --git a/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Layer.h b/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Layer.h
index 873475bcab18a4bc1003843a527b2337d69328e3..e31c76dae0a4ebd5555cd0340c4d732e49451e02 100644
--- a/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Layer.h
+++ b/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Layer.h
@@ -116,8 +116,8 @@ public:
       }
       // TODO: parallelize directly on the device
       else {
-         using BoundaryTagsHostArray = typename BoundaryTagsArray::HostType;
-         using OrderingHostArray     = typename OrderingArray::HostType;
+         using BoundaryTagsHostArray = typename BoundaryTagsArray::template Self< typename BoundaryTagsArray::ValueType, Devices::Host >;
+         using OrderingHostArray     = typename OrderingArray::template Self< typename OrderingArray::ValueType, Devices::Host >;
 
          BoundaryTagsHostArray hostBoundaryTags;
          OrderingHostArray hostBoundaryIndices;
diff --git a/src/TNL/Meshes/MeshDetails/Mesh_impl.h b/src/TNL/Meshes/MeshDetails/Mesh_impl.h
index 4fbd3ba07a01b79d054720e39a3abf43327f493c..4b0488b2d05f1cb9a36cd4ee157f0b355d679d63 100644
--- a/src/TNL/Meshes/MeshDetails/Mesh_impl.h
+++ b/src/TNL/Meshes/MeshDetails/Mesh_impl.h
@@ -91,28 +91,12 @@ getMeshDimension()
    return MeshTraitsType::meshDimension;
 }
 
-template< typename MeshConfig, typename Device >
-String
-Mesh< MeshConfig, Device >::
-getType()
-{
-   return String( "Meshes::Mesh< ") + MeshConfig::getType() + " >";
-}
-
-template< typename MeshConfig, typename Device >
-String
-Mesh< MeshConfig, Device >::
-getTypeVirtual() const
-{
-   return this->getType();
-}
-
 template< typename MeshConfig, typename Device >
 String
 Mesh< MeshConfig, Device >::
 getSerializationType()
 {
-   return Mesh::getType();
+   return String( "Meshes::Mesh< ") + TNL::getType< MeshConfig >() + " >";
 }
 
 template< typename MeshConfig, typename Device >
diff --git a/src/TNL/Meshes/MeshDetails/Traverser_impl.h b/src/TNL/Meshes/MeshDetails/Traverser_impl.h
index 5dedf58fd2ceea521e51ece53e42d0efd65caec1..2ce07addfff74fc55fd26981cc97f8704341aaf2 100644
--- a/src/TNL/Meshes/MeshDetails/Traverser_impl.h
+++ b/src/TNL/Meshes/MeshDetails/Traverser_impl.h
@@ -11,8 +11,7 @@
 #pragma once
 
 #include <TNL/Meshes/Traverser.h>
-
-#include <TNL/Exceptions/CudaSupportMissing.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 namespace TNL {
 namespace Meshes {
@@ -20,221 +19,87 @@ namespace Meshes {
 template< typename Mesh,
           typename MeshEntity,
           int EntitiesDimension >
-   template< typename UserData,
-             typename EntitiesProcessor >
+   template< typename EntitiesProcessor,
+             typename UserData >
 void
 Traverser< Mesh, MeshEntity, EntitiesDimension >::
 processBoundaryEntities( const MeshPointer& meshPointer,
-                         UserData& userData ) const
+                         UserData userData ) const
 {
-   auto entitiesCount = meshPointer->template getBoundaryEntitiesCount< EntitiesDimension >();
-#ifdef HAVE_OPENMP
-#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
-#endif
-   for( decltype(entitiesCount) i = 0; i < entitiesCount; i++ ) {
-      const auto entityIndex = meshPointer->template getBoundaryEntityIndex< EntitiesDimension >( i );
-      auto& entity = meshPointer->template getEntity< EntitiesDimension >( entityIndex );
+   const GlobalIndexType entitiesCount = meshPointer->template getBoundaryEntitiesCount< MeshEntity::getEntityDimension() >();
+   auto kernel = [] __cuda_callable__
+      ( const GlobalIndexType i,
+        const Mesh* mesh,
+        UserData userData )
+   {
+      const GlobalIndexType entityIndex = mesh->template getBoundaryEntityIndex< MeshEntity::getEntityDimension() >( i );
+      auto& entity = mesh->template getEntity< MeshEntity::getEntityDimension() >( entityIndex );
       // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex
-      EntitiesProcessor::processEntity( *meshPointer, userData, entity );
-   }
+      EntitiesProcessor::processEntity( *mesh, userData, entity );
+   };
+   Pointers::synchronizeSmartPointersOnDevice< DeviceType >();
+   Algorithms::ParallelFor< DeviceType >::exec(
+         (GlobalIndexType) 0, entitiesCount,
+         kernel,
+         &meshPointer.template getData< DeviceType >(),
+         userData );
 }
 
 template< typename Mesh,
           typename MeshEntity,
           int EntitiesDimension >
-   template< typename UserData,
-             typename EntitiesProcessor >
+   template< typename EntitiesProcessor,
+             typename UserData >
 void
 Traverser< Mesh, MeshEntity, EntitiesDimension >::
 processInteriorEntities( const MeshPointer& meshPointer,
-                         UserData& userData ) const
+                         UserData userData ) const
 {
-   auto entitiesCount = meshPointer->template getInteriorEntitiesCount< EntitiesDimension >();
-#ifdef HAVE_OPENMP
-#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
-#endif
-   for( decltype(entitiesCount) i = 0; i < entitiesCount; i++ ) {
-      const auto entityIndex = meshPointer->template getInteriorEntityIndex< EntitiesDimension >( i );
-      auto& entity = meshPointer->template getEntity< EntitiesDimension >( entityIndex );
+   const auto entitiesCount = meshPointer->template getInteriorEntitiesCount< MeshEntity::getEntityDimension() >();
+   auto kernel = [] __cuda_callable__
+      ( const GlobalIndexType i,
+        const Mesh* mesh,
+        UserData userData )
+   {
+      const GlobalIndexType entityIndex = mesh->template getInteriorEntityIndex< MeshEntity::getEntityDimension() >( i );
+      auto& entity = mesh->template getEntity< MeshEntity::getEntityDimension() >( entityIndex );
       // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex
-      EntitiesProcessor::processEntity( *meshPointer, userData, entity );
-   }
+      EntitiesProcessor::processEntity( *mesh, userData, entity );
+   };
+   Pointers::synchronizeSmartPointersOnDevice< DeviceType >();
+   Algorithms::ParallelFor< DeviceType >::exec(
+         (GlobalIndexType) 0, entitiesCount,
+         kernel,
+         &meshPointer.template getData< DeviceType >(),
+         userData );
 }
 
 template< typename Mesh,
           typename MeshEntity,
           int EntitiesDimension >
-   template< typename UserData,
-             typename EntitiesProcessor >
+   template< typename EntitiesProcessor,
+             typename UserData >
 void
 Traverser< Mesh, MeshEntity, EntitiesDimension >::
 processAllEntities( const MeshPointer& meshPointer,
-                    UserData& userData ) const
+                    UserData userData ) const
 {
-   auto entitiesCount = meshPointer->template getEntitiesCount< EntitiesDimension >();
-#ifdef HAVE_OPENMP
-#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
-#endif
-   for( decltype(entitiesCount) entityIndex = 0; entityIndex < entitiesCount; entityIndex++ ) {
-      auto& entity = meshPointer->template getEntity< EntitiesDimension >( entityIndex );
-      // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex
-      EntitiesProcessor::processEntity( *meshPointer, userData, entity );
-   }
-}
-
-
-#ifdef HAVE_CUDA
-template< int EntitiesDimension,
-          typename EntitiesProcessor,
-          typename Mesh,
-          typename UserData >
-__global__ void
-MeshTraverserBoundaryEntitiesKernel( const Mesh* mesh,
-                                     UserData userData,
-                                     typename Mesh::GlobalIndexType entitiesCount )
-{
-   for( typename Mesh::GlobalIndexType i = blockIdx.x * blockDim.x + threadIdx.x;
-        i < entitiesCount;
-        i += blockDim.x * gridDim.x )
+   const auto entitiesCount = meshPointer->template getEntitiesCount< MeshEntity::getEntityDimension() >();
+   auto kernel = [] __cuda_callable__
+      ( const GlobalIndexType entityIndex,
+        const Mesh* mesh,
+        UserData userData )
    {
-      const auto entityIndex = mesh->template getBoundaryEntityIndex< EntitiesDimension >( i );
-      auto& entity = mesh->template getEntity< EntitiesDimension >( entityIndex );
+      auto& entity = mesh->template getEntity< MeshEntity::getEntityDimension() >( entityIndex );
       // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex
       EntitiesProcessor::processEntity( *mesh, userData, entity );
-   }
-}
-
-template< int EntitiesDimension,
-          typename EntitiesProcessor,
-          typename Mesh,
-          typename UserData >
-__global__ void
-MeshTraverserInteriorEntitiesKernel( const Mesh* mesh,
-                                     UserData userData,
-                                     typename Mesh::GlobalIndexType entitiesCount )
-{
-   for( typename Mesh::GlobalIndexType i = blockIdx.x * blockDim.x + threadIdx.x;
-        i < entitiesCount;
-        i += blockDim.x * gridDim.x )
-   {
-      const auto entityIndex = mesh->template getInteriorEntityIndex< EntitiesDimension >( i );
-      auto& entity = mesh->template getEntity< EntitiesDimension >( entityIndex );
-      // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex
-      EntitiesProcessor::processEntity( *mesh, userData, entity );
-   }
-}
-
-template< int EntitiesDimension,
-          typename EntitiesProcessor,
-          typename Mesh,
-          typename UserData >
-__global__ void
-MeshTraverserAllEntitiesKernel( const Mesh* mesh,
-                                UserData userData,
-                                typename Mesh::GlobalIndexType entitiesCount )
-{
-   for( typename Mesh::GlobalIndexType entityIndex = blockIdx.x * blockDim.x + threadIdx.x;
-        entityIndex < entitiesCount;
-        entityIndex += blockDim.x * gridDim.x )
-   {
-      auto& entity = mesh->template getEntity< EntitiesDimension >( entityIndex );
-      // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex
-      EntitiesProcessor::processEntity( *mesh, userData, entity );
-   }
-}
-#endif
-
-template< typename MeshConfig,
-          typename MeshEntity,
-          int EntitiesDimension >
-   template< typename UserData,
-             typename EntitiesProcessor >
-void
-Traverser< Mesh< MeshConfig, Devices::Cuda >, MeshEntity, EntitiesDimension >::
-processBoundaryEntities( const MeshPointer& meshPointer,
-                         UserData& userData ) const
-{
-#ifdef HAVE_CUDA
-   auto entitiesCount = meshPointer->template getBoundaryEntitiesCount< EntitiesDimension >();
-
-   dim3 blockSize( 256 );
-   dim3 gridSize;
-   const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
-   gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( entitiesCount, blockSize.x ) );
-
-   Devices::Cuda::synchronizeDevice();
-   MeshTraverserBoundaryEntitiesKernel< EntitiesDimension, EntitiesProcessor >
-      <<< gridSize, blockSize >>>
-      ( &meshPointer.template getData< Devices::Cuda >(),
-        userData,
-        entitiesCount );
-   cudaDeviceSynchronize();
-   TNL_CHECK_CUDA_DEVICE;
-#else
-   throw Exceptions::CudaSupportMissing();
-#endif
-}
-
-template< typename MeshConfig,
-          typename MeshEntity,
-          int EntitiesDimension >
-   template< typename UserData,
-             typename EntitiesProcessor >
-void
-Traverser< Mesh< MeshConfig, Devices::Cuda >, MeshEntity, EntitiesDimension >::
-processInteriorEntities( const MeshPointer& meshPointer,
-                         UserData& userData ) const
-{
-#ifdef HAVE_CUDA
-   auto entitiesCount = meshPointer->template getInteriorEntitiesCount< EntitiesDimension >();
-
-   dim3 blockSize( 256 );
-   dim3 gridSize;
-   const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
-   gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( entitiesCount, blockSize.x ) );
-
-   Devices::Cuda::synchronizeDevice();
-   MeshTraverserInteriorEntitiesKernel< EntitiesDimension, EntitiesProcessor >
-      <<< gridSize, blockSize >>>
-      ( &meshPointer.template getData< Devices::Cuda >(),
-        userData,
-        entitiesCount );
-   cudaDeviceSynchronize();
-   TNL_CHECK_CUDA_DEVICE;
-#else
-   throw Exceptions::CudaSupportMissing();
-#endif
-}
-
-template< typename MeshConfig,
-          typename MeshEntity,
-          int EntitiesDimension >
-   template< typename UserData,
-             typename EntitiesProcessor >
-void
-Traverser< Mesh< MeshConfig, Devices::Cuda >, MeshEntity, EntitiesDimension >::
-processAllEntities( const MeshPointer& meshPointer,
-                    UserData& userData ) const
-{
-#ifdef HAVE_CUDA
-   auto entitiesCount = meshPointer->template getEntitiesCount< EntitiesDimension >();
-
-   dim3 blockSize( 256 );
-   dim3 gridSize;
-   const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
-   gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( entitiesCount, blockSize.x ) );
-
-   Devices::Cuda::synchronizeDevice();
-   MeshTraverserAllEntitiesKernel< EntitiesDimension, EntitiesProcessor >
-      <<< gridSize, blockSize >>>
-      ( &meshPointer.template getData< Devices::Cuda >(),
-        userData,
-        entitiesCount );
-   cudaDeviceSynchronize();
-   TNL_CHECK_CUDA_DEVICE;
-#else
-   throw Exceptions::CudaSupportMissing();
-#endif
+   };
+   Pointers::synchronizeSmartPointersOnDevice< DeviceType >();
+   Algorithms::ParallelFor< DeviceType >::exec(
+         (GlobalIndexType) 0, entitiesCount,
+         kernel,
+         &meshPointer.template getData< DeviceType >(),
+         userData );
 }
 
 } // namespace Meshes
diff --git a/src/TNL/Meshes/MeshDetails/initializer/EntityInitializer.h b/src/TNL/Meshes/MeshDetails/initializer/EntityInitializer.h
index 7058691a301d569fdb8e230ba2889f4929e409c9..ada83b5fb0cf3a3af6d67e5559746d4743c5f71d 100644
--- a/src/TNL/Meshes/MeshDetails/initializer/EntityInitializer.h
+++ b/src/TNL/Meshes/MeshDetails/initializer/EntityInitializer.h
@@ -16,8 +16,6 @@
 
 #pragma once
 
-#include <TNL/TemplateStaticFor.h>
-
 #include <TNL/Meshes/MeshDetails/initializer/EntitySeed.h>
 #include <TNL/Meshes/MeshDetails/initializer/SubentitySeedsCreator.h>
 #include <TNL/Meshes/MeshDetails/initializer/SuperentityStorageInitializer.h>
@@ -68,8 +66,6 @@ class EntityInitializer
    using InitializerType  = Initializer< MeshConfig >;
 
 public:
-   static String getType() { return "EntityInitializer"; };
-
    static void initEntity( EntityType& entity, const GlobalIndexType& entityIndex, const SeedType& entitySeed, InitializerType& initializer)
    {
       initializer.setEntityIndex( entity, entityIndex );
@@ -91,8 +87,6 @@ public:
    using PointType       = typename MeshTraits< MeshConfig >::PointType;
    using InitializerType = Initializer< MeshConfig >;
 
-   static String getType() { return "EntityInitializer"; };
-
    static void initEntity( VertexType& entity, const GlobalIndexType& entityIndex, const PointType& point, InitializerType& initializer)
    {
       initializer.setEntityIndex( entity, entityIndex );
diff --git a/src/TNL/Meshes/MeshDetails/initializer/EntitySeed.h b/src/TNL/Meshes/MeshDetails/initializer/EntitySeed.h
index b9d34d070114bf40f71d79df1a0d266f545d7347..41439c4056905523596b9bbbf76df035f14ca4cd 100644
--- a/src/TNL/Meshes/MeshDetails/initializer/EntitySeed.h
+++ b/src/TNL/Meshes/MeshDetails/initializer/EntitySeed.h
@@ -40,8 +40,6 @@ class EntitySeed
       using HashType        = EntitySeedHash< EntitySeed >;
       using KeyEqual        = EntitySeedEq< EntitySeed >;
 
-      static String getType() { return String( "EntitySeed<>" ); }
-
       static constexpr LocalIndexType getCornersCount()
       {
          return SubvertexTraits::count;
@@ -82,8 +80,6 @@ class EntitySeed< MeshConfig, Topologies::Vertex >
       using HashType        = EntitySeedHash< EntitySeed >;
       using KeyEqual        = EntitySeedEq< EntitySeed >;
 
-      static String getType() { return String( "EntitySeed<>" ); }
-
       static constexpr LocalIndexType getCornersCount()
       {
          return 1;
diff --git a/src/TNL/Meshes/MeshDetails/initializer/SubentitySeedsCreator.h b/src/TNL/Meshes/MeshDetails/initializer/SubentitySeedsCreator.h
index 8ad08bd63d3b9a6a1be186ff73baa34370da2d28..30cbb31e6a960b2cd00e8faafe5262b7ef17e472 100644
--- a/src/TNL/Meshes/MeshDetails/initializer/SubentitySeedsCreator.h
+++ b/src/TNL/Meshes/MeshDetails/initializer/SubentitySeedsCreator.h
@@ -16,7 +16,7 @@
 
 #pragma once
 
-#include <TNL/TemplateStaticFor.h>
+#include <TNL/Algorithms/TemplateStaticFor.h>
 #include <TNL/Meshes/MeshDetails/traits/MeshTraits.h>
 
 namespace TNL {
@@ -47,7 +47,7 @@ public:
    static SubentitySeedArray create( const SubvertexAccessorType& subvertices )
    {
       SubentitySeedArray subentitySeeds;
-      TemplateStaticFor< LocalIndexType, 0, SUBENTITIES_COUNT, CreateSubentitySeeds >::execHost( subentitySeeds, subvertices );
+      Algorithms::TemplateStaticFor< LocalIndexType, 0, SUBENTITIES_COUNT, CreateSubentitySeeds >::execHost( subentitySeeds, subvertices );
 
       return subentitySeeds;
    }
@@ -61,7 +61,7 @@ private:
       public:
          static void exec( SubentitySeedArray& subentitySeeds, const SubvertexAccessorType& subvertices )
          {
-            TemplateStaticFor< LocalIndexType, 0, SUBENTITY_VERTICES_COUNT, SetSubentitySeedVertex >::execHost( subentitySeeds[ subentityIndex ], subvertices );
+            Algorithms::TemplateStaticFor< LocalIndexType, 0, SUBENTITY_VERTICES_COUNT, SetSubentitySeedVertex >::execHost( subentitySeeds[ subentityIndex ], subvertices );
          }
 
       private:
diff --git a/src/TNL/Meshes/MeshEntity.h b/src/TNL/Meshes/MeshEntity.h
index 6e0970ade32564f03aa16380f86164c109a79cb7..b1c8afea57430ae06a7b59c6d9aba6c495b26017 100644
--- a/src/TNL/Meshes/MeshEntity.h
+++ b/src/TNL/Meshes/MeshEntity.h
@@ -72,9 +72,9 @@ class MeshEntity
       MeshEntity& operator=( const MeshEntity< MeshConfig, Device_, EntityTopology >& entity );
 
 
-      static String getType();
+      static String getSerializationType();
 
-      String getTypeVirtual() const;
+      String getSerializationTypeVirtual() const;
 
       void save( File& file ) const;
 
@@ -166,9 +166,9 @@ class MeshEntity< MeshConfig, Device, Topologies::Vertex >
       MeshEntity& operator=( const MeshEntity< MeshConfig, Device_, EntityTopology >& entity );
 
 
-      static String getType();
+      static String getSerializationType();
 
-      String getTypeVirtual() const;
+      String getSerializationTypeVirtual() const;
 
       void save( File& file ) const;
 
diff --git a/src/TNL/Meshes/Topologies/Edge.h b/src/TNL/Meshes/Topologies/Edge.h
index 4c4b8269e51a7de9f709f28868b6fc09d517f757..298e638c0badc54f50921e058a348a4f829557bb 100644
--- a/src/TNL/Meshes/Topologies/Edge.h
+++ b/src/TNL/Meshes/Topologies/Edge.h
@@ -22,15 +22,10 @@
 namespace TNL {
 namespace Meshes {
 namespace Topologies {
-   
+
 struct Edge
 {
    static constexpr int dimension = 1;
-
-   static String getType()
-   {
-      return "Topologies::Edge";
-   }
 };
 
 
diff --git a/src/TNL/Meshes/Topologies/Hexahedron.h b/src/TNL/Meshes/Topologies/Hexahedron.h
index db922f3b5bc487a7160e74c471597826e1461584..af0765db510d569fb264f5845074298d5ca69809 100644
--- a/src/TNL/Meshes/Topologies/Hexahedron.h
+++ b/src/TNL/Meshes/Topologies/Hexahedron.h
@@ -25,11 +25,6 @@ namespace Topologies {
 struct Hexahedron
 {
    static constexpr int dimension = 3;
-
-   static String getType()
-   {
-      return "Topologies::Hexahedron";
-   }
 };
 
 template<>
diff --git a/src/TNL/Meshes/Topologies/Quadrilateral.h b/src/TNL/Meshes/Topologies/Quadrilateral.h
index 6b5d4eb54b68f01f6fef9932cd66c3c897d1d72d..50be274e2e4d9671c79cf974ac4ac509893f2fb0 100644
--- a/src/TNL/Meshes/Topologies/Quadrilateral.h
+++ b/src/TNL/Meshes/Topologies/Quadrilateral.h
@@ -25,11 +25,6 @@ namespace Topologies {
 struct Quadrilateral
 {
    static constexpr int dimension = 2;
-
-   static String getType()
-   {
-      return "Topologies::Quadrilateral";
-   }
 };
 
 
diff --git a/src/TNL/Meshes/Topologies/Simplex.h b/src/TNL/Meshes/Topologies/Simplex.h
index a9cbee72ba890f971611066c4908a0ffb0f55f29..3b61f09fdec62d0f9613f8c56db5cc22e1080180 100644
--- a/src/TNL/Meshes/Topologies/Simplex.h
+++ b/src/TNL/Meshes/Topologies/Simplex.h
@@ -25,15 +25,9 @@ namespace Meshes {
 namespace Topologies {
 
 template< int dimension_ >
-class Simplex
+struct Simplex
 {
-   public:
-      static constexpr int dimension = dimension_;
-
-      static String getType()
-      {
-         return String( "Topologies::Simplex< " ) + convertToString( dimension ) + " >";
-      }
+   static constexpr int dimension = dimension_;
 };
 
 namespace SimplexDetails {
diff --git a/src/TNL/Meshes/Topologies/Tetrahedron.h b/src/TNL/Meshes/Topologies/Tetrahedron.h
index 7722f5ef688ad41cd3b594e8ec3a5484aae17b9e..048daa1c3c3fe7cc112489e2d40411c4f4ad47b2 100644
--- a/src/TNL/Meshes/Topologies/Tetrahedron.h
+++ b/src/TNL/Meshes/Topologies/Tetrahedron.h
@@ -25,11 +25,6 @@ namespace Topologies {
 struct Tetrahedron
 {
    static constexpr int dimension = 3;
-
-   static String getType()
-   {
-      return "Topologies::Tetrahedron";
-   }
 };
 
 template<>
diff --git a/src/TNL/Meshes/Topologies/Triangle.h b/src/TNL/Meshes/Topologies/Triangle.h
index 11d1c8a846c360f22d46fb676f411146d33662bd..efe031059d1fa5e7705dc131d69de4a862743ed7 100644
--- a/src/TNL/Meshes/Topologies/Triangle.h
+++ b/src/TNL/Meshes/Topologies/Triangle.h
@@ -25,11 +25,6 @@ namespace Topologies {
 struct Triangle
 {
    static constexpr int dimension = 2;
-
-   static String getType()
-   {
-      return "Topologies::Triangle";
-   }
 };
 
 
diff --git a/src/TNL/Meshes/Topologies/Vertex.h b/src/TNL/Meshes/Topologies/Vertex.h
index cff78e37d6e64f3798791e26510864aaed44955c..f90127624806c4acbfb8e0ee2aa01b5988f0c3f4 100644
--- a/src/TNL/Meshes/Topologies/Vertex.h
+++ b/src/TNL/Meshes/Topologies/Vertex.h
@@ -25,11 +25,6 @@ namespace Topologies {
 struct Vertex
 {
    static constexpr int dimension = 0;
-
-   static String getType()
-   {
-      return "Topologies::Vertex";
-   }
 };
 
 } // namespace Topologies
diff --git a/src/TNL/Meshes/Traverser.h b/src/TNL/Meshes/Traverser.h
index 017084ae8d39f03927b5a038a62776abe7a1f588..f157e3afcd56fb6ffebf95865d7546490fb00668 100644
--- a/src/TNL/Meshes/Traverser.h
+++ b/src/TNL/Meshes/Traverser.h
@@ -18,54 +18,30 @@ namespace Meshes {
 
 template< typename Mesh,
           typename MeshEntity,
+          // extra parameter which is used only for specializations implementing grid traversers
           int EntitiesDimension = MeshEntity::getEntityDimension() >
 class Traverser
 {
    public:
       using MeshType = Mesh;
-      using MeshPointer = Pointers::SharedPointer<  MeshType >;
+      using MeshPointer = Pointers::SharedPointer< MeshType >;
       using DeviceType = typename MeshType::DeviceType;
+      using GlobalIndexType = typename MeshType::GlobalIndexType;
 
-      template< typename UserData,
-                typename EntitiesProcessor >
+      template< typename EntitiesProcessor,
+                typename UserData >
       void processBoundaryEntities( const MeshPointer& meshPointer,
-                                    UserData& userData ) const;
+                                    UserData userData ) const;
 
-      template< typename UserData,
-                typename EntitiesProcessor >
+      template< typename EntitiesProcessor,
+                typename UserData >
       void processInteriorEntities( const MeshPointer& meshPointer,
-                                    UserData& userData ) const;
+                                    UserData userData ) const;
 
-      template< typename UserData,
-                typename EntitiesProcessor >
+      template< typename EntitiesProcessor,
+                typename UserData >
       void processAllEntities( const MeshPointer& meshPointer,
-                               UserData& userData ) const;
-};
-
-template< typename MeshConfig,
-          typename MeshEntity,
-          int EntitiesDimension >
-class Traverser< Mesh< MeshConfig, Devices::Cuda >, MeshEntity, EntitiesDimension >
-{
-   public:
-      using MeshType = Mesh< MeshConfig, Devices::Cuda >;
-      using MeshPointer = Pointers::SharedPointer<  MeshType >;
-      using DeviceType = typename MeshType::DeviceType;
-
-      template< typename UserData,
-                typename EntitiesProcessor >
-      void processBoundaryEntities( const MeshPointer& meshPointer,
-                                       UserData& userData ) const;
-
-      template< typename UserData,
-                typename EntitiesProcessor >
-      void processInteriorEntities( const MeshPointer& meshPointer,
-                                    UserData& userData ) const;
-
-      template< typename UserData,
-                typename EntitiesProcessor >
-      void processAllEntities( const MeshPointer& meshPointer,
-                               UserData& userData ) const;
+                               UserData userData ) const;
 };
 
 } // namespace Meshes
diff --git a/src/TNL/Meshes/TypeResolver/MeshTypeResolver_impl.h b/src/TNL/Meshes/TypeResolver/MeshTypeResolver_impl.h
index 46c4e9c58f30f3d7abf9c04de6071d64f5e34977..b92148fa90ddf5c0d8a46e90aa817d9b8531ffd2 100644
--- a/src/TNL/Meshes/TypeResolver/MeshTypeResolver_impl.h
+++ b/src/TNL/Meshes/TypeResolver/MeshTypeResolver_impl.h
@@ -74,7 +74,7 @@ MeshTypeResolver< Reader, ConfigTag, Device, ProblemSetter, ProblemSetterArgs...
 resolveWorldDimension( const Reader& reader,
                        ProblemSetterArgs&&... problemSetterArgs )
 {
-   std::cerr << "The cell topology " << CellTopology::getType() << " is disabled in the build configuration." << std::endl;
+   std::cerr << "The cell topology " << getType< CellTopology >() << " is disabled in the build configuration." << std::endl;
    return false;
 }
 
@@ -334,9 +334,9 @@ MeshTypeResolver< Reader, ConfigTag, Device, ProblemSetter, ProblemSetterArgs...
 resolveTerminate( const Reader& reader,
                   ProblemSetterArgs&&... problemSetterArgs )
 {
-   std::cerr << "The mesh config type " << TNL::getType< MeshConfig >() << " is disabled in the build configuration for device " << Device::getDeviceType() << "." << std::endl;
+   std::cerr << "The mesh config type " << getType< MeshConfig >() << " is disabled in the build configuration for device " << getType< Device >() << "." << std::endl;
    return false;
-};
+}
 
 template< typename Reader,
           typename ConfigTag,
@@ -352,7 +352,7 @@ resolveTerminate( const Reader& reader,
 {
    using MeshType = Meshes::Mesh< MeshConfig, Device >;
    return ProblemSetter< MeshType >::run( std::forward<ProblemSetterArgs>(problemSetterArgs)... );
-};
+}
 
 } // namespace Meshes
 } // namespace TNL
diff --git a/src/TNL/Meshes/TypeResolver/TypeResolver_impl.h b/src/TNL/Meshes/TypeResolver/TypeResolver_impl.h
index 6c60126457d1d4aa33426631b836407be6a412d7..d45016af193656cb6094f3782b304a92c224bf2a 100644
--- a/src/TNL/Meshes/TypeResolver/TypeResolver_impl.h
+++ b/src/TNL/Meshes/TypeResolver/TypeResolver_impl.h
@@ -149,7 +149,7 @@ loadMesh( const String& fileName,
       std::cerr << "I am not able to load the mesh from the file " << fileName << ". "
                    "Perhaps the mesh stored in the file is not supported by the mesh "
                    "passed to the loadMesh function? The mesh type is "
-                << mesh.getType() << std::endl;
+                << getType< decltype(mesh) >() << std::endl;
       return false;
    }
    return true;
diff --git a/src/TNL/Meshes/Writers/VTKWriter_impl.h b/src/TNL/Meshes/Writers/VTKWriter_impl.h
index e6c3eca442aed486ade87326480f0e7070b04b1c..83cf95ec4ca18f9c1b1375b9e89197884cdd8a5e 100644
--- a/src/TNL/Meshes/Writers/VTKWriter_impl.h
+++ b/src/TNL/Meshes/Writers/VTKWriter_impl.h
@@ -407,10 +407,10 @@ VTKWriter< Mesh >::writeAllEntities( const Mesh& mesh, std::ostream& str )
    const Index cellsListSize = __impl::getCellsListSize( mesh );
 
    str << std::endl << "CELLS " << allEntitiesCount << " " << cellsListSize << std::endl;
-   TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, EntitiesWriter >::exec( mesh, str );
+   Algorithms::TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, EntitiesWriter >::exec( mesh, str );
 
    str << std::endl << "CELL_TYPES " << allEntitiesCount << std::endl;
-   TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, EntityTypesWriter >::exec( mesh, str );
+   Algorithms::TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, EntityTypesWriter >::exec( mesh, str );
 }
 
 template< typename Mesh >
diff --git a/src/TNL/Object.h b/src/TNL/Object.h
index 24ced9a5c2ee9ac97f8d2a7a86e51e88085d13a4..ff7432635a13379c83bf49af758f1da7dd8f6b28 100644
--- a/src/TNL/Object.h
+++ b/src/TNL/Object.h
@@ -12,7 +12,6 @@
 
 #include <vector>
 
-#include <TNL/Devices/CudaCallable.h>
 #include <TNL/String.h>
 #include <TNL/File.h>
 
@@ -25,8 +24,7 @@ namespace TNL {
  * \brief Basic class for majority of TNL objects like matrices, meshes, grids, solvers, etc..
  *
  * Objects like numerical meshes, matrices large vectors etc. are inherited by
- * this class. This class introduces virtual method \ref getType which is
- * supposed to tell the object type in a C++ style.
+ * this class.
  *
  * Since the virtual destructor is not defined as \ref __cuda_callable__,
  * objects inherited from Object should not be created in CUDA kernels.
@@ -42,28 +40,6 @@ class Object
 {
    public:
 
-      /**
-       * \brief Static type getter.
-       *
-       * Returns the type in C++ style - for example the returned value
-       * may look as \c "Array< double, Devices::Cuda, int >".
-       *
-       * \par Example
-       * \include ObjectExample_getType.cpp
-       * \par Output
-       * \include ObjectExample_getType.out
-       */
-      static String getType();
-
-      /***
-       * \brief Virtual type getter.
-       *
-       * Returns the type in C++ style - for example the returned value
-       * may look as \c "Array< double, Devices::Cuda, int >".
-       * See example at \ref Object::getType.
-       */
-      virtual String getTypeVirtual() const;
-
       /**
        * \brief Static serialization type getter.
        *
@@ -71,7 +47,6 @@ class Object
        * is supposed to return the object type but with the device type replaced
        * by Devices::Host. For example \c Array< double, Devices::Cuda > is
        * saved as \c Array< double, Devices::Host >.
-       * See example at \ref Object::getType.
        */
       static String getSerializationType();
 
@@ -82,7 +57,6 @@ class Object
        * is supposed to return the object type but with the device type replaced
        * by Devices::Host. For example \c Array< double, Devices::Cuda > is
        * saved as \c Array< double, Devices::Host >.
-       * See example at \ref Object::getType.
        */
       virtual String getSerializationTypeVirtual() const;
 
@@ -128,9 +102,7 @@ class Object
        * Since it is not defined as \ref __cuda_callable__, objects inherited
        * from Object should not be created in CUDA kernels.
        */
-#ifndef HAVE_MIC
       virtual ~Object(){};
-#endif
 };
 
 /**
diff --git a/src/TNL/Object.hpp b/src/TNL/Object.hpp
index eeec8bf9840b2da7a72db00c49fd64ea660fcc6d..25c709212016025b6bdf18953f60380cb982edfc 100644
--- a/src/TNL/Object.hpp
+++ b/src/TNL/Object.hpp
@@ -20,16 +20,6 @@ namespace TNL {
 
 static constexpr char magic_number[] = "TNLMN";
 
-inline String Object::getType()
-{
-   return String( "Object" );
-}
-
-inline String Object::getTypeVirtual() const
-{
-   return this->getType();
-}
-
 inline String Object::getSerializationType()
 {
    return String( "Object" );
diff --git a/src/TNL/Operators/Advection/LaxFridrichs.h b/src/TNL/Operators/Advection/LaxFridrichs.h
index d1fbd399e52737404063f6c016b19ac3743c8587..45a8abae727dc67bba817cadfc6e0deec7a2d853 100644
--- a/src/TNL/Operators/Advection/LaxFridrichs.h
+++ b/src/TNL/Operators/Advection/LaxFridrichs.h
@@ -61,8 +61,6 @@ class LaxFridrichs< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index,
          return true;
       }
 
-      static String getType();
-      
       void setViscosity(const Real& artificalViscosity)
       {
          this->artificialViscosity = artificalViscosity;
@@ -153,8 +151,6 @@ class LaxFridrichs< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index,
          return true;
       }
 
-      static String getType();
-      
       void setViscosity(const Real& artificalViscosity)
       {
          this->artificialViscosity = artificalViscosity;
@@ -251,8 +247,6 @@ class LaxFridrichs< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index,
          return true;
       }
 
-      static String getType();
-      
       void setViscosity(const Real& artificalViscosity)
       {
          this->artificialViscosity = artificalViscosity;
diff --git a/src/TNL/Operators/Advection/Upwind.h b/src/TNL/Operators/Advection/Upwind.h
index e41768e571082b9e7be7b547d915b4bf1e91340f..942ec29566235b0a7f5a7476f06e0e72e5356519 100644
--- a/src/TNL/Operators/Advection/Upwind.h
+++ b/src/TNL/Operators/Advection/Upwind.h
@@ -61,8 +61,6 @@ class Upwind< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index, Veloc
          return true;
       }
 
-      static String getType();
-      
       void setViscosity(const Real& artificalViscosity)
       {
          this->artificialViscosity = artificalViscosity;
@@ -154,8 +152,6 @@ class Upwind< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index, Veloc
          return true;
       }
 
-      static String getType();
-      
       void setViscosity(const Real& artificalViscosity)
       {
          this->artificialViscosity = artificalViscosity;
@@ -256,8 +252,6 @@ class Upwind< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index, Veloc
          return true;
       }
 
-      static String getType();
-      
       void setViscosity(const Real& artificalViscosity)
       {
          this->artificialViscosity = artificalViscosity;
diff --git a/src/TNL/Operators/ExactFunctionInverseOperator.h b/src/TNL/Operators/ExactFunctionInverseOperator.h
index 7833d05445fd6c49f0971a5c57c0d7c4cdcea37c..6fe0b2f127c8e3c86f3d61a2e1605ef5ea1a1d90 100644
--- a/src/TNL/Operators/ExactFunctionInverseOperator.h
+++ b/src/TNL/Operators/ExactFunctionInverseOperator.h
@@ -25,12 +25,6 @@ class ExactFunctionInverseOperator
 {
    public:
  
-      static String getType()
-      {
-         return String( "ExactFunctionInverseOperator< " ) +
-                String( Dimension) + " >";
-      }
- 
       InnerOperator& getInnerOperator()
       {
          return this->innerOperator;
diff --git a/src/TNL/Operators/ExactIdentityOperator.h b/src/TNL/Operators/ExactIdentityOperator.h
index 7c39938df87fb6a1863d82ea13d54e2e4e482c1a..22d7bbdd86c81bb4ce41eb3aa0b0058c95082b1a 100644
--- a/src/TNL/Operators/ExactIdentityOperator.h
+++ b/src/TNL/Operators/ExactIdentityOperator.h
@@ -23,12 +23,6 @@ class ExactIdentityOperator
 {
    public:
  
-      static String getType()
-      {
-         return String( "ExactIdentityOperator< " ) +
-                String( Dimension) + " >";
-      }
- 
       template< typename Function >
       __cuda_callable__
       typename Function::RealType
diff --git a/src/TNL/Operators/FunctionInverseOperator.h b/src/TNL/Operators/FunctionInverseOperator.h
index 1265f3b70bdc7b5c5264289e1a39b71aa0801533..5bfb5c37f5eb0ef9a2f407c0cf6c81333a227075 100644
--- a/src/TNL/Operators/FunctionInverseOperator.h
+++ b/src/TNL/Operators/FunctionInverseOperator.h
@@ -36,11 +36,6 @@ class FunctionInverseOperator
       FunctionInverseOperator( const OperatorType& operator_ )
       : operator_( operator_ ) {};
  
-      static String getType()
-      {
-         return String( "FunctionInverseOperator< " ) + OperatorType::getType() + " >";
-      }
- 
       const OperatorType& getOperator() const { return this->operator_; }
  
       template< typename MeshFunction,
diff --git a/src/TNL/Operators/diffusion/ExactLinearDiffusion.h b/src/TNL/Operators/diffusion/ExactLinearDiffusion.h
index 790fa0777996839904b2c8270720dde20afc8946..f0a927d3465aa9168acd793eb43158264283489d 100644
--- a/src/TNL/Operators/diffusion/ExactLinearDiffusion.h
+++ b/src/TNL/Operators/diffusion/ExactLinearDiffusion.h
@@ -32,8 +32,6 @@ class ExactLinearDiffusion< 1 > : public Functions::Domain< 1, Functions::SpaceD
 
       static const int Dimension = 1;
  
-      static String getType();
- 
       template< typename Function >
       __cuda_callable__ inline
       typename Function::RealType operator()( const Function& function,
@@ -47,8 +45,6 @@ class ExactLinearDiffusion< 2 > : public Functions::Domain< 2, Functions::SpaceD
    public:
  
       static const int Dimension = 2;
- 
-      static String getType();
 
       template< typename Function >
       __cuda_callable__ inline
@@ -63,8 +59,6 @@ class ExactLinearDiffusion< 3 > : public Functions::Domain< 3 >
    public:
  
       static const int Dimension = 3;
- 
-      static String getType();
 
       template< typename Function >
       __cuda_callable__ inline
diff --git a/src/TNL/Operators/diffusion/ExactLinearDiffusion_impl.h b/src/TNL/Operators/diffusion/ExactLinearDiffusion_impl.h
index 0aabb1027e38d3390ca009813e7ac1bf54cb006d..60a27d9c07799c180124e87faad6c2d3fb4df4de 100644
--- a/src/TNL/Operators/diffusion/ExactLinearDiffusion_impl.h
+++ b/src/TNL/Operators/diffusion/ExactLinearDiffusion_impl.h
@@ -19,13 +19,6 @@
 namespace TNL {
 namespace Operators {
 
-String
-ExactLinearDiffusion< 1 >::
-getType()
-{
-   return "ExactLinearDiffusion< 1 >";
-}
-
 template< typename Function >
 __cuda_callable__ inline
 typename Function::RealType
@@ -37,13 +30,6 @@ operator()( const Function& function,
    return function.template getPartialDerivative< 2, 0, 0 >( v, time );
 }
 
-String
-ExactLinearDiffusion< 2 >::
-getType()
-{
-   return "ExactLinearDiffusion< 2 >";
-}
-
 template< typename Function >
 __cuda_callable__ inline
 typename Function::RealType
@@ -56,13 +42,6 @@ operator()( const Function& function,
           function.template getPartialDerivative< 0, 2, 0 >( v, time );
 }
 
-String
-ExactLinearDiffusion< 3 >::
-getType()
-{
-   return "ExactLinearDiffusion< 3 >";
-}
-
 template< typename Function >
 __cuda_callable__ inline
 typename Function::RealType
diff --git a/src/TNL/Operators/diffusion/ExactMeanCurvature.h b/src/TNL/Operators/diffusion/ExactMeanCurvature.h
index fbc2260efad49c99337d7af994bc3c61ef89c90e..a96d5a090acb0a1798409a665c1f98c001dbbc5b 100644
--- a/src/TNL/Operators/diffusion/ExactMeanCurvature.h
+++ b/src/TNL/Operators/diffusion/ExactMeanCurvature.h
@@ -34,13 +34,6 @@ class ExactMeanCurvature
       typedef ExactFunctionInverseOperator< Dimension, ExactGradientNormType > FunctionInverse;
       typedef ExactNonlinearDiffusion< Dimension, FunctionInverse > NonlinearDiffusion;
  
-      static String getType()
-      {
-         return String( "ExactMeanCurvature< " ) +
-                String( Dimension) + ", " +
-                InnerOperator::getType() + " >";
-      }
- 
       template< typename Real >
       void setRegularizationEpsilon( const Real& eps)
       {
diff --git a/src/TNL/Operators/diffusion/ExactNonlinearDiffusion.h b/src/TNL/Operators/diffusion/ExactNonlinearDiffusion.h
index 25381e2bb48bcdbdbde8f419c1cac856f621d7fd..826796751fd72328a7bd5213faa2c631842cf8bd 100644
--- a/src/TNL/Operators/diffusion/ExactNonlinearDiffusion.h
+++ b/src/TNL/Operators/diffusion/ExactNonlinearDiffusion.h
@@ -37,11 +37,6 @@ class ExactNonlinearDiffusion< 1, Nonlinearity, InnerOperator >
 {
    public:
 
-      static String getType()
-      {
-         return "ExactNonlinearDiffusion< 1, " + Nonlinearity::getType() + " >";
-      };
- 
       Nonlinearity& getNonlinearity()
       {
          return this->nonlinearity;
@@ -91,11 +86,6 @@ class ExactNonlinearDiffusion< 2, Nonlinearity, InnerOperator >
 {
    public:
  
-      static String getType()
-      {
-         return "ExactNonlinearDiffusion< " + Nonlinearity::getType() + ", 2 >";
-      };
- 
       Nonlinearity& getNonlinearity()
       {
          return this->nonlinearity;
@@ -150,11 +140,6 @@ class ExactNonlinearDiffusion< 3, Nonlinearity, InnerOperator >
 {
    public:
  
-      static String getType()
-      {
-         return "ExactNonlinearDiffusion< " + Nonlinearity::getType() + ", 3 >";
-      }
- 
       Nonlinearity& getNonlinearity()
       {
          return this->nonlinearity;
diff --git a/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator.h b/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator.h
index efb17555547b53da1b850b789b6956674c1abbdf..e98d21c11d3975aa3306df35e9812e7a1e3420f3 100644
--- a/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator.h
+++ b/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator.h
@@ -49,8 +49,6 @@ class FiniteVolumeNonlinearOperator< Meshes::Grid< 1,MeshReal, Device, MeshIndex
    typedef Index IndexType;
    typedef OperatorQ OperatorQType;
 
-   static String getType();
-   
    template< typename MeshEntity,
              typename Vector >
    __cuda_callable__
@@ -102,8 +100,6 @@ class FiniteVolumeNonlinearOperator< Meshes::Grid< 2, MeshReal, Device, MeshInde
    typedef OperatorQ OperatorQType;
    
 
-   static String getType();
-   
    template< typename MeshEntity,
              typename Vector >
    __cuda_callable__
@@ -155,8 +151,6 @@ class FiniteVolumeNonlinearOperator< Meshes::Grid< 3, MeshReal, Device, MeshInde
    typedef Index IndexType;
    typedef OperatorQ OperatorQType;
 
-   static String getType();
-   
    template< typename MeshEntity, 
              typename Vector >
    __cuda_callable__
diff --git a/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator_impl.h b/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator_impl.h
index 083160467875cc0e4f40b15c63b7cf59c222a68b..3d496bd52ea4f144122014422dabd46cca678801 100644
--- a/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator_impl.h
+++ b/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator_impl.h
@@ -21,23 +21,6 @@
 namespace TNL {
 namespace Operators {   
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index,
-          typename OperatorQ >
-String
-FiniteVolumeNonlinearOperator< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, OperatorQ, Real, Index >::
-getType()
-{
-   return String( "FiniteVolumeNonlinearOperator< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + ", " +
-	  OperatorQ::getType() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -98,23 +81,6 @@ setMatrixElements( const RealType& time,
    typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
 }
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index,
-	  typename OperatorQ >
-String
-FiniteVolumeNonlinearOperator< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, OperatorQ, Real, Index >::
-getType()
-{
-   return String( "FiniteVolumeNonlinearOperator< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + ", " +
-	  OperatorQ::getType() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -201,23 +167,6 @@ setMatrixElements( const RealType& time,
    matrixRow.setElement( 4, neighborEntities.template getEntityIndex<  0,  1 >(), eCoef );
 }
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index,
- 	  typename OperatorQ >
-String
-FiniteVolumeNonlinearOperator< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, OperatorQ, Real, Index >::
-getType()
-{
-   return String( "FiniteVolumeNonlinearOperator< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + ", " +
-	  OperatorQ::getType() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
diff --git a/src/TNL/Operators/diffusion/LinearDiffusion.h b/src/TNL/Operators/diffusion/LinearDiffusion.h
index e31113800f1c790ee630e7a99ae8d046fe039a5c..33e493d02c5e80bdd0d9ec6fdb40a519c4082f81 100644
--- a/src/TNL/Operators/diffusion/LinearDiffusion.h
+++ b/src/TNL/Operators/diffusion/LinearDiffusion.h
@@ -55,8 +55,6 @@ class LinearDiffusion< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, Real, Inde
       static const int Dimension = MeshType::getMeshDimension();
  
       static constexpr int getMeshDimension() { return Dimension; }
- 
-      static String getType();
 
       template< typename PreimageFunction,
                 typename MeshEntity >
@@ -107,8 +105,6 @@ class LinearDiffusion< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
  
       static constexpr int getMeshDimension() { return Dimension; }
 
-      static String getType();
-
       template< typename PreimageFunction, typename EntityType >
       __cuda_callable__
       inline Real operator()( const PreimageFunction& u,
@@ -157,8 +153,6 @@ class LinearDiffusion< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Ind
  
       static constexpr int getMeshDimension() { return Dimension; }
 
-      static String getType();
-
       template< typename PreimageFunction,
                 typename EntityType >
       __cuda_callable__
diff --git a/src/TNL/Operators/diffusion/LinearDiffusion_impl.h b/src/TNL/Operators/diffusion/LinearDiffusion_impl.h
index 83a20829ccc4f46a56eb80b1e474990db23856da..51bdf8a62372f82acf85b941f9de580b6d69c6a2 100644
--- a/src/TNL/Operators/diffusion/LinearDiffusion_impl.h
+++ b/src/TNL/Operators/diffusion/LinearDiffusion_impl.h
@@ -22,21 +22,6 @@
 namespace TNL {
 namespace Operators {   
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-LinearDiffusion< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "LinearDiffusion< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -109,21 +94,6 @@ setMatrixElements( const PreimageFunction& u,
    matrixRow.setElement( 2, neighborEntities.template getEntityIndex< 1 >(),       - lambdaX );
 }
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-LinearDiffusion< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "LinearDiffusion< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -203,22 +173,6 @@ setMatrixElements( const PreimageFunction& u,
    matrixRow.setElement( 4, neighborEntities.template getEntityIndex< 0, 1 >(),   -lambdaY );
 }
 
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-LinearDiffusion< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "LinearDiffusion< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
diff --git a/src/TNL/Operators/diffusion/NonlinearDiffusion_impl.h b/src/TNL/Operators/diffusion/NonlinearDiffusion_impl.h
index 0dbc269883acadf563cf6fa6f5c28c185e24436f..97f9ec2be1b51137d3a891daabe4ea4213fdfd4a 100644
--- a/src/TNL/Operators/diffusion/NonlinearDiffusion_impl.h
+++ b/src/TNL/Operators/diffusion/NonlinearDiffusion_impl.h
@@ -23,23 +23,6 @@
 namespace TNL {
 namespace Operators {
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index,
-          typename NonlinearDiffusionOperator >
-String
-NonlinearDiffusion< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, NonlinearDiffusionOperator, Real, Index >::
-getType()
-{
-   return String( "NonlinearDiffusion< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + "," +
-          NonlinearDiffusionOperator::getType() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -100,23 +83,6 @@ setMatrixElements( const RealType& time,
     nonlinearDiffusionOperator.setMatrixElements( time, tau, mesh, index, entity, u, b, matrix );
 }
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index,
-          typename NonlinearDiffusionOperator >
-String
-NonlinearDiffusion< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, NonlinearDiffusionOperator, Real, Index >::
-getType()
-{
-   return String( "NonlinearDiffusion< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + "," +
-          NonlinearDiffusionOperator::getType() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -177,23 +143,6 @@ setMatrixElements( const RealType& time,
     nonlinearDiffusionOperator.setMatrixElements( time, tau, mesh, index, entity, u, b, matrix );
 }
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index,
-          typename NonlinearDiffusionOperator >
-String
-NonlinearDiffusion< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, NonlinearDiffusionOperator, Real, Index >::
-getType()
-{
-   return String( "NonlinearDiffusion< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + "," +
-          NonlinearDiffusionOperator::getType() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
diff --git a/src/TNL/Operators/diffusion/OneSidedMeanCurvature.h b/src/TNL/Operators/diffusion/OneSidedMeanCurvature.h
index 6e99d2f053bd40774356518ceac3e4428738eb0c..0c8767981189ea02f87fdf5da89a4c48203bee0a 100644
--- a/src/TNL/Operators/diffusion/OneSidedMeanCurvature.h
+++ b/src/TNL/Operators/diffusion/OneSidedMeanCurvature.h
@@ -55,14 +55,6 @@ class OneSidedMeanCurvature
         nonlinearity( nonlinearityOperator, nonlinearityBoundaryConditions, meshPointer ),
         nonlinearDiffusion( nonlinearity ){}
  
-      static String getType()
-      {
-         return String( "OneSidedMeanCurvature< " ) +
-            MeshType::getType() + ", " +
-           TNL::getType< Real >() + ", " +
-           TNL::getType< Index >() + " >";
-      }
- 
       void setRegularizationEpsilon( const RealType& eps )
       {
          this->gradientNorm.setEps( eps );
diff --git a/src/TNL/Operators/diffusion/OneSidedNonlinearDiffusion.h b/src/TNL/Operators/diffusion/OneSidedNonlinearDiffusion.h
index b74abe0b06de5898e86679dcd537d9656cbfcc9b..9691ab32b169e3ff82cd48f04827611859c3e1ef 100644
--- a/src/TNL/Operators/diffusion/OneSidedNonlinearDiffusion.h
+++ b/src/TNL/Operators/diffusion/OneSidedNonlinearDiffusion.h
@@ -52,15 +52,6 @@ class OneSidedNonlinearDiffusion< Meshes::Grid< 1,MeshReal, Device, MeshIndex >,
 
       OneSidedNonlinearDiffusion( const Nonlinearity& nonlinearity )
       : nonlinearity( nonlinearity ){}
- 
-      static String getType()
-      {
-         return String( "OneSidedNonlinearDiffusion< " ) +
-            MeshType::getType() + ", " +
-            Nonlinearity::getType() + "," +
-           TNL::getType< Real >() + ", " +
-           TNL::getType< Index >() + " >";
-      }
 
       template< typename MeshFunction,
                 typename MeshEntity >
@@ -146,15 +137,6 @@ class OneSidedNonlinearDiffusion< Meshes::Grid< 2, MeshReal, Device, MeshIndex >
 
       OneSidedNonlinearDiffusion( const Nonlinearity& nonlinearity )
       : nonlinearity( nonlinearity ){}
- 
-      static String getType()
-      {
-         return String( "OneSidedNonlinearDiffusion< " ) +
-            MeshType::getType() + ", " +
-            Nonlinearity::getType() + "," +
-           TNL::getType< Real >() + ", " +
-           TNL::getType< Index >() + " >";
-      }
 
       template< typename MeshFunction,
                 typename MeshEntity >
@@ -255,15 +237,6 @@ class OneSidedNonlinearDiffusion< Meshes::Grid< 3, MeshReal, Device, MeshIndex >
 
       OneSidedNonlinearDiffusion( const Nonlinearity& nonlinearity )
       : nonlinearity( nonlinearity ){}
- 
-      static String getType()
-      {
-         return String( "OneSidedNonlinearDiffusion< " ) +
-            MeshType::getType() + ", " +
-            Nonlinearity::getType() + "," +
-           TNL::getType< Real >() + ", " +
-           TNL::getType< Index >() + " >";
-      }
 
       template< typename MeshFunction,
                 typename MeshEntity >
diff --git a/src/TNL/Operators/euler/fvm/LaxFridrichs.h b/src/TNL/Operators/euler/fvm/LaxFridrichs.h
index aaf44f5214def6e0cea92f0c49e1534f0915a1cf..10d1c7995d8564d0f3d9dbea087f8bf6200be620 100644
--- a/src/TNL/Operators/euler/fvm/LaxFridrichs.h
+++ b/src/TNL/Operators/euler/fvm/LaxFridrichs.h
@@ -42,8 +42,6 @@ class LaxFridrichs< Meshes::Grid< 2, Real, Device, Index, GridGeometry >, Pressu
 
    LaxFridrichs();
 
-   static String getType();
-
    void getExplicitUpdate( const IndexType centralVolume,
                         RealType& rho_t,
                         RealType& rho_u1_t,
diff --git a/src/TNL/Operators/euler/fvm/LaxFridrichs_impl.h b/src/TNL/Operators/euler/fvm/LaxFridrichs_impl.h
index aa9c8059a51b0925ce3474a1169e64ff6cd8e0b1..963ef76019d4a8e26980b829941025a6e10f30df 100644
--- a/src/TNL/Operators/euler/fvm/LaxFridrichs_impl.h
+++ b/src/TNL/Operators/euler/fvm/LaxFridrichs_impl.h
@@ -27,19 +27,6 @@ LaxFridrichs< Meshes::Grid< 2, Real, Device, Index, GridGeometry >,
 {
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename PressureGradient,
-          template< int, typename, typename, typename > class GridGeometry >
-String LaxFridrichs< Meshes::Grid< 2, Real, Device, Index, GridGeometry >,
-                           PressureGradient > :: getType()
-{
-   return String( "LaxFridrichs< " ) +
-          Meshes::Grid< 2, Real, Device, Index, GridGeometry > :: getType() + ", " +
-          PressureGradient :: getType() + " >";
-}
-
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/TNL/Operators/fdm/BackwardFiniteDifference.h b/src/TNL/Operators/fdm/BackwardFiniteDifference.h
index cceaa807f49961d3efe1cfda844c13ff3daa435e..16282c73e75f9cfa2f7538bc8dd2cefdb1f3a096 100644
--- a/src/TNL/Operators/fdm/BackwardFiniteDifference.h
+++ b/src/TNL/Operators/fdm/BackwardFiniteDifference.h
@@ -50,17 +50,6 @@ class BackwardFiniteDifference< Meshes::Grid< Dimension, MeshReal, MeshDevice, M
  
       static constexpr int getMeshDimension() { return Dimension; }
  
-      static String getType()
-      {
-         return String( "BackwardFiniteDifference< " ) +
-            MeshType::getType() + ", " +
-            String( XDifference ) + ", " +
-            String( YDifference ) + ", " +
-            String( ZDifference ) + ", " +
-           TNL::getType< RealType >() + ", " +
-           TNL::getType< IndexType >() + " >";
-      }
- 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
       inline Real operator()( const MeshFunction& u,
diff --git a/src/TNL/Operators/fdm/CentralFiniteDifference.h b/src/TNL/Operators/fdm/CentralFiniteDifference.h
index feecc62e77800c94b63417aad765b0c404d58a22..51a645be12d42812cdbf9622f5e5d7aa40b8f581 100644
--- a/src/TNL/Operators/fdm/CentralFiniteDifference.h
+++ b/src/TNL/Operators/fdm/CentralFiniteDifference.h
@@ -50,18 +50,6 @@ class CentralFiniteDifference< Meshes::Grid< Dimension, MeshReal, MeshDevice, Me
  
       //static constexpr int getMeshDimension() { return Dimension; }
  
-      static String getType()
-      {
-         return String( "CentralFiniteDifference< " ) +
-            MeshType::getType() + ", " +
-            String( XDifference ) + ", " +
-            String( YDifference ) + ", " +
-            String( ZDifference ) + ", " +
-           TNL::getType< RealType >() + ", " +
-           TNL::getType< IndexType >() + " >";
-      }
-
- 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
       inline Real operator()( const MeshFunction& u,
diff --git a/src/TNL/Operators/fdm/ExactDifference.h b/src/TNL/Operators/fdm/ExactDifference.h
index 5efffc1b8436a529a4478561fde9326bc323787c..58c6936445ed2fe847672dc8aa5285e46cedac43 100644
--- a/src/TNL/Operators/fdm/ExactDifference.h
+++ b/src/TNL/Operators/fdm/ExactDifference.h
@@ -22,15 +22,6 @@ class ExactDifference
 {
    public:
  
-      static String getType()
-      {
-         return String( "ExactDifference< " ) +
-            String( Dimension ) + ", " +
-            String( XDerivative ) + ", " +
-            String( YDerivative ) + ", " +
-            String( ZDerivative ) + " >";
-      }
- 
       template< typename Function >
       __cuda_callable__
       typename Function::RealType operator()(
diff --git a/src/TNL/Operators/fdm/ForwardFiniteDifference.h b/src/TNL/Operators/fdm/ForwardFiniteDifference.h
index 53602afec21eb7bc1dc416c7647306297895643e..bbfe29bc10f9eb48ae011917788cdadf0444cc1a 100644
--- a/src/TNL/Operators/fdm/ForwardFiniteDifference.h
+++ b/src/TNL/Operators/fdm/ForwardFiniteDifference.h
@@ -51,18 +51,6 @@ class ForwardFiniteDifference< Meshes::Grid< Dimension, MeshReal, MeshDevice, Me
  
       static constexpr int getMeshDimension() { return Dimension; }
  
-      static String getType()
-      {
-         return String( "ForwardFiniteDifference< " ) +
-            MeshType::getType() + ", " +
-            String( XDifference ) + ", " +
-            String( YDifference ) + ", " +
-            String( ZDifference ) + ", " +
-           TNL::getType< RealType >() + ", " +
-           TNL::getType< IndexType >() + " >";
-      }
-
- 
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
       inline Real operator()( const MeshFunction& u,
diff --git a/src/TNL/Operators/geometric/CoFVMGradientNorm.h b/src/TNL/Operators/geometric/CoFVMGradientNorm.h
index 2af779a11fdb2664bdfb636425dc8218e09199c7..37fabed7d4934be55e1327c0482de5dc3620a9bf 100644
--- a/src/TNL/Operators/geometric/CoFVMGradientNorm.h
+++ b/src/TNL/Operators/geometric/CoFVMGradientNorm.h
@@ -58,15 +58,6 @@ class CoFVMGradientNorm< Meshes::Grid< MeshDimension, MeshReal, Device, MeshInde
       : BaseType( outerOperator, innerOperator, mesh )
       {}
  
-      static String getType()
-      {
-         return String( "CoFVMGradientNorm< " ) +
-            MeshType::getType() + ", " +
-            String( MeshDimension ) + ", " +
-           TNL::getType< Real >() + ", " +
-           TNL::getType< Index >() + " >";
-      }
- 
       void setEps( const RealType& eps )
       {
          this->getInnerOperator().setEps( eps );
@@ -100,14 +91,6 @@ class CoFVMGradientNorm< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, 0, Real,
    CoFVMGradientNorm()
    : epsSquare( 0.0 ){}
 
-   static String getType()
-   {
-      return String( "CoFVMGradientNorm< " ) +
-         MeshType::getType() + ", 0, " +
-        TNL::getType< Real >() + ", " +
-        TNL::getType< Index >() + " >";
-   }
-
    template< typename MeshFunction, typename MeshEntity >
    __cuda_callable__
    Real operator()( const MeshFunction& u,
@@ -160,16 +143,6 @@ class CoFVMGradientNorm< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 1, Real
    CoFVMGradientNorm()
    : epsSquare( 0.0 ){}
 
-
-   static String getType()
-   {
-      return String( "CoFVMGradientNorm< " ) +
-         MeshType::getType() + ", 1, " +
-        TNL::getType< Real >() + ", " +
-        TNL::getType< Index >() + " >";
-
-   }
- 
    template< typename MeshFunction, typename MeshEntity >
    __cuda_callable__
    Real operator()( const MeshFunction& u,
@@ -278,14 +251,6 @@ class CoFVMGradientNorm< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 2, Real
    CoFVMGradientNorm()
    : epsSquare( 0.0 ){}
 
-   static String getType()
-   {
-      return String( "CoFVMGradientNorm< " ) +
-         MeshType::getType() + ", 2, " +
-        TNL::getType< Real >() + ", " +
-        TNL::getType< Index >() + " >";
-   }
-
    template< typename MeshFunction, typename MeshEntity >
    __cuda_callable__
    Real operator()( const MeshFunction& u,
diff --git a/src/TNL/Operators/geometric/ExactGradientNorm.h b/src/TNL/Operators/geometric/ExactGradientNorm.h
index cf7e3384820be1c2e8628fe655211037c6dde1dd..121e0181b7df9f309c6087428a4f2e2d8ff8790d 100644
--- a/src/TNL/Operators/geometric/ExactGradientNorm.h
+++ b/src/TNL/Operators/geometric/ExactGradientNorm.h
@@ -30,11 +30,6 @@ class ExactGradientNorm< 1, Real >
 {
    public:
 
-      static String getType()
-      {
-         return "ExactGradientNorm< 1 >";
-      }
- 
       ExactGradientNorm()
       : epsilonSquare( 0.0 ){};
 
@@ -98,11 +93,6 @@ class ExactGradientNorm< 2, Real >
 {
    public:
 
-      static String getType()
-      {
-         return "ExactGradientNorm< 2 >";
-      }
- 
       ExactGradientNorm()
       : epsilonSquare( 0.0 ){};
 
@@ -172,11 +162,6 @@ class ExactGradientNorm< 3, Real >
 {
    public:
 
-      static String getType()
-      {
-         return "ExactGradientNorm< 3 >";
-      }
- 
       ExactGradientNorm()
       : epsilonSquare( 0.0 ){};
 
diff --git a/src/TNL/Operators/geometric/FDMGradientNorm.h b/src/TNL/Operators/geometric/FDMGradientNorm.h
index a5eb4536317a0ff5258a681585f67557ff029d59..f42216a433e24d4be7dd25ff3beacb476612c26a 100644
--- a/src/TNL/Operators/geometric/FDMGradientNorm.h
+++ b/src/TNL/Operators/geometric/FDMGradientNorm.h
@@ -50,14 +50,6 @@ class FDMGradientNorm< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, Difference
    FDMGradientNorm()
    : epsSquare( 0.0 ){}
 
-   static String getType()
-   {
-      return String( "FDMGradientNorm< " ) +
-         MeshType::getType() + ", " +
-        TNL::getType< Real >() + ", " +
-        TNL::getType< Index >() + " >";
-   }
-
    template< typename MeshFunction, typename MeshEntity >
    __cuda_callable__
    Real operator()( const MeshFunction& u,
@@ -107,16 +99,6 @@ class FDMGradientNorm< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, Difference
       FDMGradientNorm()
       : epsSquare( 0.0 ){}
 
-
-      static String getType()
-      {
-         return String( "FDMGradientNorm< " ) +
-            MeshType::getType() + ", " +
-           TNL::getType< Real >() + ", " +
-           TNL::getType< Index >() + " >";
-
-      }
-
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
       Real operator()( const MeshFunction& u,
@@ -173,14 +155,6 @@ class FDMGradientNorm< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Differenc
       FDMGradientNorm()
       : epsSquare( 0.0 ){}
 
-      static String getType()
-      {
-         return String( "FDMGradientNorm< " ) +
-            MeshType::getType() + ", " +
-           TNL::getType< Real >() + ", " +
-           TNL::getType< Index >() + " >";
-      }
-
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
       Real operator()( const MeshFunction& u,
diff --git a/src/TNL/Operators/geometric/TwoSidedGradientNorm.h b/src/TNL/Operators/geometric/TwoSidedGradientNorm.h
index 2d86167b1c1ee3a9466635c2605931248d80eb56..a1624b4089092a6402ff03e772c9de9ad24533e1 100644
--- a/src/TNL/Operators/geometric/TwoSidedGradientNorm.h
+++ b/src/TNL/Operators/geometric/TwoSidedGradientNorm.h
@@ -46,14 +46,6 @@ class TwoSidedGradientNorm< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, Real,
    TwoSidedGradientNorm()
    : epsSquare( 0.0 ){}
 
-   static String getType()
-   {
-      return String( "TwoSidedGradientNorm< " ) +
-         MeshType::getType() + ", " +
-        TNL::getType< Real >() + ", " +
-        TNL::getType< Index >() + " >";
-   }
-
    template< typename MeshFunction, typename MeshEntity >
    __cuda_callable__
    Real operator()( const MeshFunction& u,
@@ -99,16 +91,6 @@ class TwoSidedGradientNorm< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, Real,
    TwoSidedGradientNorm()
    : epsSquare( 0.0 ){}
 
-
-   static String getType()
-   {
-      return String( "TwoSidedGradientNorm< " ) +
-         MeshType::getType() + ", " +
-        TNL::getType< Real >() + ", " +
-        TNL::getType< Index >() + " >";
-
-   }
- 
    template< typename MeshFunction, typename MeshEntity >
    __cuda_callable__
    Real operator()( const MeshFunction& u,
@@ -162,14 +144,6 @@ class TwoSidedGradientNorm< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real
    TwoSidedGradientNorm()
    : epsSquare( 0.0 ){}
 
-   static String getType()
-   {
-      return String( "TwoSidedGradientNorm< " ) +
-         MeshType::getType() + ", " +
-        TNL::getType< Real >() + ", " +
-        TNL::getType< Index >() + " >";
-   }
-
    template< typename MeshFunction, typename MeshEntity >
    __cuda_callable__
    Real operator()( const MeshFunction& u,
diff --git a/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ.h b/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ.h
index 7f145198f049c2062e0e53d33b215e0fe8c0a3b8..bfe41697f3d7a7bf21b0ac9dfcd7ac7ed0a57f13 100644
--- a/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ.h
+++ b/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ.h
@@ -42,8 +42,6 @@ class tnlFiniteVolumeOperatorQ< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, R
    typedef Device DeviceType;
    typedef Index IndexType;
 
-   static String getType();
-
    template< typename Vector >
    IndexType bind( Vector& u) 
    { return 0; }
@@ -96,8 +94,6 @@ class tnlFiniteVolumeOperatorQ< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, R
    typedef Device DeviceType;
    typedef Index IndexType;
 
-   static String getType(); 
-
    template< typename Vector >
    IndexType bind( Vector& u)
    { return 0; }
@@ -149,8 +145,6 @@ class tnlFiniteVolumeOperatorQ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, R
    typedef Device DeviceType;
    typedef Index IndexType;
 
-   static String getType();
-
    template< typename Vector >
    IndexType bind( Vector& u)
    { return 0; }
@@ -202,8 +196,6 @@ class tnlFiniteVolumeOperatorQ< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, R
    typedef Device DeviceType;
    typedef Index IndexType;
 
-   static String getType();
-
    template< typename Vector >
    Index bind( Vector& u);
 
@@ -255,8 +247,6 @@ class tnlFiniteVolumeOperatorQ< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, R
    typedef Device DeviceType;
    typedef Index IndexType;
    typedef SharedVector< RealType, DeviceType, IndexType > DofVectorType;
-   
-   static String getType(); 
 
    template< typename Vector >
    Index bind( Vector& u);
@@ -309,8 +299,6 @@ class tnlFiniteVolumeOperatorQ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, R
    typedef Real RealType;
    typedef Device DeviceType;
    typedef Index IndexType;
-   
-   static String getType();
 
    template< typename Vector >
    Index bind( Vector& u);
diff --git a/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ_impl.h b/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ_impl.h
index 0fae70006b3b9f69c8157cfcf2ad5538489b8f1d..184f1955d420b6b033128f484161aad3643a0750 100644
--- a/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ_impl.h
+++ b/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ_impl.h
@@ -16,36 +16,6 @@
 namespace TNL {
 namespace Operators {   
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-tnlFiniteVolumeOperatorQ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index, 0 >::
-getType()
-{
-   return String( "tnlFiniteVolumeOperatorQ< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + ", 0 >";
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-tnlFiniteVolumeOperatorQ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index, 1 >::
-getType()
-{
-   return String( "tnlFiniteVolumeOperatorQ< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + ", 1 >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -165,36 +135,6 @@ operator()(
     return 0.0;
 }
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-tnlFiniteVolumeOperatorQ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index, 0 >::
-getType()
-{
-   return String( "tnlFiniteVolumeOperatorQ< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + ", 0 >";
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-tnlFiniteVolumeOperatorQ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index, 1 >::
-getType()
-{
-   return String( "tnlFiniteVolumeOperatorQ< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + ", 1 >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -372,36 +312,6 @@ operator()( const MeshType& mesh,
    return q.getElement( entity.getIndex() );
 }
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-tnlFiniteVolumeOperatorQ< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index, 0 >::
-getType()
-{
-   return String( "tnlFiniteVolumeOperatorQ< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + ", 0 >";
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-tnlFiniteVolumeOperatorQ< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index, 1 >::
-getType()
-{
-   return String( "tnlFiniteVolumeOperatorQ< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + ", 1 >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
diff --git a/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ.h b/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ.h
index a96d22f5134029fb9686150713696da47ff05bfc..aff3917c31c05476ecb325627a0f002aa0a3bf44 100644
--- a/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ.h
+++ b/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ.h
@@ -37,8 +37,6 @@ class tnlOneSideDiffOperatorQ< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, Re
    typedef Device DeviceType;
    typedef Index IndexType;
 
-   static String getType();
-
    template< typename MeshFunction, typename MeshEntity >
    __cuda_callable__
    Real operator()( const MeshFunction& u,
@@ -74,8 +72,6 @@ class tnlOneSideDiffOperatorQ< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, Re
    typedef Device DeviceType;
    typedef Index IndexType;
 
-   static String getType(); 
-      
    template< typename MeshFunction, typename MeshEntity >
    __cuda_callable__
    Real operator()( const MeshFunction& u,
@@ -112,8 +108,6 @@ class tnlOneSideDiffOperatorQ< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, R
    typedef Device DeviceType;
    typedef Index IndexType;
 
-   static String getType();
-
    template< typename MeshFunction, typename MeshEntity >
    __cuda_callable__
    Real operator()( const MeshFunction& u,
diff --git a/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ_impl.h b/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ_impl.h
index 21f5e44f08ec29fe365de11cfcbb5fb898f9af26..6291e0a5314a78765f13c11d9ae748281c5c6d0b 100644
--- a/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ_impl.h
+++ b/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ_impl.h
@@ -16,21 +16,6 @@
 namespace TNL {
 namespace Operators {
 
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-tnlOneSideDiffOperatorQ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "tnlOneSideDiffOperatorQ< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -92,21 +77,6 @@ getValueStriped( const MeshFunction& u,
 /***
  * 2D
  */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-tnlOneSideDiffOperatorQ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "tnlOneSideDiffOperatorQ< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -177,21 +147,6 @@ getValueStriped( const MeshFunction& u,
 /***
  * 3D
  */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String
-tnlOneSideDiffOperatorQ< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{
-   return String( "tnlOneSideDiffOperatorQ< " ) +
-          MeshType::getType() + ", " +
-         TNL::getType< Real >() + ", " +
-         TNL::getType< Index >() + " >";
-}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
diff --git a/src/TNL/Operators/operator-curvature/ExactOperatorCurvature.h b/src/TNL/Operators/operator-curvature/ExactOperatorCurvature.h
index 33a20e255ce950bfa9714504185c1f236ad4abed..f3a73c88a40c3dd8b4f99e6cb1fbc602009a5124 100644
--- a/src/TNL/Operators/operator-curvature/ExactOperatorCurvature.h
+++ b/src/TNL/Operators/operator-curvature/ExactOperatorCurvature.h
@@ -29,8 +29,6 @@ class ExactOperatorCurvature< OperatorQ, 1 >
 
       enum { Dimension = 1 };
 
-      static String getType();
-
       template< int XDiffOrder = 0, int YDiffOrder = 0, int ZDiffOrder = 0, typename Function, typename Point, typename Real = typename Point::RealType >
       __cuda_callable__
       static Real getValue( const Function& function,
@@ -46,8 +44,6 @@ class ExactOperatorCurvature< ExactOperatorQ, 2 >
 
       enum { Dimension = 2 };
 
-      static String getType();
-         
       template< int XDiffOrder = 0, int YDiffOrder = 0, int ZDiffOrder = 0, typename Function, typename Point, typename Real = typename Point::RealType >
       __cuda_callable__
       static Real getValue( const Function& function,
@@ -62,8 +58,6 @@ class ExactOperatorCurvature< ExactOperatorQ, 3 >
 
       enum { Dimension = 3 };
 
-      static String getType();
-   
       template< int XDiffOrder = 0, int YDiffOrder = 0, int ZDiffOrder = 0, typename Function, typename Point, typename Real = typename Point::RealType >
       __cuda_callable__
       static Real getValue( const Function& function,
diff --git a/src/TNL/Operators/operator-curvature/ExactOperatorCurvature_impl.h b/src/TNL/Operators/operator-curvature/ExactOperatorCurvature_impl.h
index dfc03a72799708b3e70741f8cbd4a354b8b076d6..8408cfe17c0e74dce6afa0550019304b018eaa89 100644
--- a/src/TNL/Operators/operator-curvature/ExactOperatorCurvature_impl.h
+++ b/src/TNL/Operators/operator-curvature/ExactOperatorCurvature_impl.h
@@ -15,14 +15,6 @@
 namespace TNL {
 namespace Operators {   
 
-template< typename ExactOperatorQ >
-String
-ExactOperatorCurvature< ExactOperatorQ, 1 >::
-getType()
-{
-   return "ExactOperatorCurvature< " + ExactOperatorQ::getType() + ",1 >";
-}
-
 template< typename OperatorQ >
 template< int XDiffOrder, int YDiffOrder, int ZDiffOrder, typename Function, typename Point, typename Real >
 __cuda_callable__
@@ -41,14 +33,6 @@ getValue( const Function& function,
    return 0;
 }
 
-template< typename ExactOperatorQ >
-String
-ExactOperatorCurvature< ExactOperatorQ, 2 >::
-getType()
-{
-   return "ExactOperatorCurvature< " + ExactOperatorQ::getType() + ",2 >";
-}
-
 template< int XDiffOrder, int YDiffOrder, int ZDiffOrder, typename Function, typename Point, typename Real >
 __cuda_callable__
 Real
@@ -68,13 +52,5 @@ getValue( const Function& function,
    return 0;
 }
 
-template< typename ExactOperatorQ >
-String
-ExactOperatorCurvature< ExactOperatorQ, 3 >::
-getType()
-{
-   return "ExactOperatorCurvature< " + ExactOperatorQ::getType() + ",3 >";
-}
-
 } // namespace Operators
 } // namespace TNL
diff --git a/src/TNL/Pointers/DevicePointer.h b/src/TNL/Pointers/DevicePointer.h
index b0c0a934fa0dee01ebe4bb2d93abe3e6d0d36b68..5276c3ed465938e7e7fcdfde2885dc8986cac3b5 100644
--- a/src/TNL/Pointers/DevicePointer.h
+++ b/src/TNL/Pointers/DevicePointer.h
@@ -15,8 +15,10 @@
 #include <TNL/Allocators/Default.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/Devices/MIC.h>
 #include <TNL/Pointers/SmartPointer.h>
+#include <TNL/Pointers/SmartPointersRegister.h>
+#include <TNL/TypeInfo.h>
+#include <TNL/Cuda/MemoryHelpers.h>
 
 #include <cstring>  // std::memcpy, std::memcmp
 
@@ -405,7 +407,7 @@ class DevicePointer< Object, Devices::Cuda > : public SmartPointer
       ~DevicePointer()
       {
          this->free();
-         Devices::Cuda::removeSmartPointer( this );
+         getSmartPointersRegister< DeviceType >().remove( this );
       }
 
    protected:
@@ -422,10 +424,10 @@ class DevicePointer< Object, Devices::Cuda > : public SmartPointer
          this->pointer = &obj;
          this->pd = new PointerData();
          // pass to device
-         this->cuda_pointer = Devices::Cuda::passToDevice( *this->pointer );
+         this->cuda_pointer = Cuda::passToDevice( *this->pointer );
          // set last-sync state
          this->set_last_sync_state();
-         Devices::Cuda::insertSmartPointer( this );
+         getSmartPointersRegister< DeviceType >().insert( this );
          return true;
       }
 
@@ -456,7 +458,7 @@ class DevicePointer< Object, Devices::Cuda > : public SmartPointer
                delete this->pd;
                this->pd = nullptr;
                if( this->cuda_pointer )
-                  Devices::Cuda::freeFromDevice( this->cuda_pointer );
+                  Cuda::freeFromDevice( this->cuda_pointer );
             }
          }
       }
@@ -470,288 +472,9 @@ class DevicePointer< Object, Devices::Cuda > : public SmartPointer
       Object* cuda_pointer;
 };
 
-/****
- * Specialization for MIC
- */
-
-#ifdef HAVE_MIC
-template< typename Object >
-class DevicePointer< Object, Devices::MIC > : public SmartPointer
-{
-   private:
-      // Convenient template alias for controlling the selection of copy- and
-      // move-constructors and assignment operators using SFINAE.
-      // The type Object_ is "enabled" iff Object_ and Object are not the same,
-      // but after removing const and volatile qualifiers they are the same.
-      template< typename Object_ >
-      using Enabler = std::enable_if< ! std::is_same< Object_, Object >::value &&
-                                      std::is_same< typename std::remove_cv< Object >::type, Object_ >::value >;
-
-      // friend class will be needed for templated assignment operators
-      template< typename Object_, typename Device_ >
-      friend class DevicePointer;
-
-   public:
-
-      typedef Object ObjectType;
-      typedef Devices::MIC DeviceType;
-
-      explicit  DevicePointer( ObjectType& obj )
-      : pointer( nullptr ),
-        pd( nullptr ),
-        mic_pointer( nullptr )
-      {
-         this->allocate( obj );
-      }
-
-      // this is needed only to avoid the default compiler-generated constructor
-      DevicePointer( const DevicePointer& pointer )
-      : pointer( pointer.pointer ),
-        pd( (PointerData*) pointer.pd ),
-        mic_pointer( pointer.mic_pointer )
-      {
-         this->pd->counter += 1;
-      }
-
-      // conditional constructor for non-const -> const data
-      template< typename Object_,
-                typename = typename Enabler< Object_ >::type >
-      DevicePointer( const DevicePointer< Object_, DeviceType >& pointer )
-      : pointer( pointer.pointer ),
-        pd( (PointerData*) pointer.pd ),
-        mic_pointer( pointer.mic_pointer )
-      {
-         this->pd->counter += 1;
-      }
-
-      // this is needed only to avoid the default compiler-generated constructor
-      DevicePointer( DevicePointer&& pointer )
-      : pointer( pointer.pointer ),
-        pd( (PointerData*) pointer.pd ),
-        mic_pointer( pointer.mic_pointer )
-      {
-         pointer.pointer = nullptr;
-         pointer.pd = nullptr;
-         pointer.mic_pointer = nullptr;
-      }
-
-      // conditional constructor for non-const -> const data
-      template< typename Object_,
-                typename = typename Enabler< Object_ >::type >
-      DevicePointer( DevicePointer< Object_, DeviceType >&& pointer )
-      : pointer( pointer.pointer ),
-        pd( (PointerData*) pointer.pd ),
-        mic_pointer( pointer.mic_pointer )
-      {
-         pointer.pointer = nullptr;
-         pointer.pd = nullptr;
-         pointer.mic_pointer = nullptr;
-      }
-
-      const Object* operator->() const
-      {
-         return this->pointer;
-      }
-
-      Object* operator->()
-      {
-         this->pd->maybe_modified = true;
-         return this->pointer;
-      }
-
-      const Object& operator *() const
-      {
-         return *( this->pointer );
-      }
-
-      Object& operator *()
-      {
-         this->pd->maybe_modified = true;
-         return *( this->pointer );
-      }
-
-      operator bool()
-      {
-         return this->pd;
-      }
-
-      template< typename Device = Devices::Host >
-      __cuda_callable__
-      const Object& getData() const
-      {
-         static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
-         TNL_ASSERT( this->pointer, );
-         TNL_ASSERT( this->pd, );
-         TNL_ASSERT( this->mic_pointer, );
-         if( std::is_same< Device, Devices::Host >::value )
-            return *( this->pointer );
-         if( std::is_same< Device, Devices::MIC >::value )
-            return *( this->mic_pointer );
-      }
-
-      template< typename Device = Devices::Host >
-      __cuda_callable__
-      Object& modifyData()
-      {
-         static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
-         TNL_ASSERT( this->pointer, );
-         TNL_ASSERT( this->pd, );
-         TNL_ASSERT( this->mic_pointer, );
-         if( std::is_same< Device, Devices::Host >::value )
-         {
-            this->pd->maybe_modified = true;
-            return *( this->pointer );
-         }
-         if( std::is_same< Device, Devices::MIC >::value )
-            return *( this->mic_pointer );
-      }
-
-      // this is needed only to avoid the default compiler-generated operator
-      const DevicePointer& operator=( const DevicePointer& ptr )
-      {
-         this->free();
-         this->pointer = ptr.pointer;
-         this->pd = (PointerData*) ptr.pd;
-         this->mic_pointer = ptr.mic_pointer;
-         this->pd->counter += 1;
-         return *this;
-      }
-
-      // conditional operator for non-const -> const data
-      template< typename Object_,
-                typename = typename Enabler< Object_ >::type >
-      const DevicePointer& operator=( const DevicePointer< Object_, DeviceType >& ptr )
-      {
-         this->free();
-         this->pointer = ptr.pointer;
-         this->pd = (PointerData*) ptr.pd;
-         this->mic_pointer = ptr.mic_pointer;
-         this->pd->counter += 1;
-         return *this;
-      }
-
-      // this is needed only to avoid the default compiler-generated operator
-      const DevicePointer& operator=( DevicePointer&& ptr )
-      {
-         this->free();
-         this->pointer = ptr.pointer;
-         this->pd = (PointerData*) ptr.pd;
-         this->mic_pointer = ptr.mic_pointer;
-         ptr.pointer = nullptr;
-         ptr.pd = nullptr;
-         ptr.mic_pointer = nullptr;
-         return *this;
-      }
-
-      // conditional operator for non-const -> const data
-      template< typename Object_,
-                typename = typename Enabler< Object_ >::type >
-      const DevicePointer& operator=( DevicePointer< Object_, DeviceType >&& ptr )
-      {
-         this->free();
-         this->pointer = ptr.pointer;
-         this->pd = (PointerData*) ptr.pd;
-         this->mic_pointer = ptr.mic_pointer;
-         ptr.pointer = nullptr;
-         ptr.pd = nullptr;
-         ptr.mic_pointer = nullptr;
-         return *this;
-      }
-
-      bool synchronize()
-      {
-         if( ! this->pd )
-            return true;
-         if( this->modified() )
-         {
-            TNL_ASSERT( this->pointer, );
-            TNL_ASSERT( this->mic_pointer, );
-            Devices::MIC::CopyToMIC((void*) this->mic_pointer, (void*) this->pointer, sizeof( ObjectType ));
-            this->set_last_sync_state();
-            return true;
-         }
-         return true;
-
-      }
-
-      ~DevicePointer()
-      {
-         this->free();
-         Devices::MIC::removeSmartPointer( this );
-      }
-
-   protected:
-
-      struct PointerData
-      {
-         char data_image[ sizeof(Object) ];
-         int counter = 1;
-         bool maybe_modified = false;
-      };
-
-      bool allocate( ObjectType& obj )
-      {
-         this->pointer = &obj;
-         this->pd = new PointerData();
-         if( ! this->pd )
-            return false;
-         // pass to device
-         this->mic_pointer = Allocators:::MIC< ObjectType >().allocate(1);
-         if( ! this->mic_pointer )
-            return false;
-         Devices::MIC::CopyToMIC((void*)this->mic_pointer,(void*)this->pointer,sizeof(ObjectType));
-
-         // set last-sync state
-         this->set_last_sync_state();
-         Devices::MIC::insertSmartPointer( this );
-         return true;
-      }
-
-      void set_last_sync_state()
-      {
-         TNL_ASSERT( this->pointer, );
-         TNL_ASSERT( this->pd, );
-         std::memcpy( (void*) &this->pd->data_image, (void*) this->pointer, sizeof( Object ) );
-         this->pd->maybe_modified = false;
-      }
-
-      bool modified()
-      {
-         TNL_ASSERT( this->pointer, );
-         TNL_ASSERT( this->pd, );
-         // optimization: skip bitwise comparison if we're sure that the data is the same
-         if( ! this->pd->maybe_modified )
-            return false;
-         return std::memcmp( (void*) &this->pd->data_image, (void*) this->pointer, sizeof( Object ) ) != 0;
-      }
-
-      void free()
-      {
-         if( this->pd )
-         {
-            if( ! --this->pd->counter )
-            {
-               delete this->pd;
-               this->pd = nullptr;
-               if( this->mic_pointer )
-                  Allocators:::MIC< ObjectType >().deallocate(this->mic_pointer, 1);
-            }
-         }
-      }
-
-      Object* pointer;
-
-      PointerData* pd;
-
-      // mic_pointer can't be part of PointerData structure, since we would be
-      // unable to dereference this-pd on the device
-      Object* mic_pointer;
-};
-#endif
-
 } // namespace Pointers
 
-#if (!defined(NDEBUG)) && (!defined(HAVE_MIC))
+#ifndef NDEBUG
 namespace Assert {
 
 template< typename Object, typename Device >
@@ -761,8 +484,8 @@ struct Formatter< Pointers::DevicePointer< Object, Device > >
    printToString( const Pointers::DevicePointer< Object, Device >& value )
    {
       ::std::stringstream ss;
-      ss << "(DevicePointer< " << Object::getType() << ", " << Device::getDeviceType()
-         << " > object at " << &value << ")";
+      ss << "(" + getType< Pointers::DevicePointer< Object, Device > >()
+         << " object at " << &value << ")";
       return ss.str();
    }
 };
diff --git a/src/TNL/Pointers/SharedPointer.h b/src/TNL/Pointers/SharedPointer.h
index e6908e47953b330b612ea9ec8a2421d8c11bc8a9..93f63f807c5038795c53cc0c5182571ab2d8a9c4 100644
--- a/src/TNL/Pointers/SharedPointer.h
+++ b/src/TNL/Pointers/SharedPointer.h
@@ -15,28 +15,10 @@
 #include <cstring>
 #include <type_traits>
 #include <TNL/Assert.h>
+#include <TNL/TypeInfo.h>
 
 //#define TNL_DEBUG_SHARED_POINTERS
 
-#ifdef TNL_DEBUG_SHARED_POINTERS
-   #include <typeinfo>
-   #include <cxxabi.h>
-   #include <iostream>
-   #include <string>
-   #include <memory>
-   #include <cstdlib>
-
-   inline
-   std::string demangle(const char* mangled)
-   {
-      int status;
-      std::unique_ptr<char[], void (*)(void*)> result(
-         abi::__cxa_demangle(mangled, 0, 0, &status), std::free);
-      return result.get() ? std::string(result.get()) : "error occurred";
-   }
-#endif
-
-
 namespace TNL {
 namespace Pointers {
 
@@ -49,7 +31,7 @@ class SharedPointer
 
 } // namespace Pointers
 
-#if (!defined(NDEBUG)) && (!defined(HAVE_MIC))
+#ifndef NDEBUG
 namespace Assert {
 
 template< typename Object, typename Device >
@@ -59,7 +41,7 @@ struct Formatter< Pointers::SharedPointer< Object, Device > >
    printToString( const Pointers::SharedPointer< Object, Device >& value )
    {
       ::std::stringstream ss;
-      ss << "(SharedPointer< " << Object::getType() << ", " << Device::getDeviceType()
+      ss << "(" + getType< Pointers::SharedPointer< Object, Device > >()
          << " > object at " << &value << ")";
       return ss.str();
    }
@@ -72,4 +54,3 @@ struct Formatter< Pointers::SharedPointer< Object, Device > >
 
 #include <TNL/Pointers/SharedPointerHost.h>
 #include <TNL/Pointers/SharedPointerCuda.h>
-#include <TNL/Pointers/SharedPointerMic.h>
diff --git a/src/TNL/Pointers/SharedPointerCuda.h b/src/TNL/Pointers/SharedPointerCuda.h
index 2cf1b297f8f1dbdbbb95d7bf3630df8e48242988..54dd4ee3c71c7eb461de7d8c906fd42dc96af1c6 100644
--- a/src/TNL/Pointers/SharedPointerCuda.h
+++ b/src/TNL/Pointers/SharedPointerCuda.h
@@ -16,32 +16,13 @@
 
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Pointers/SmartPointer.h>
+#include <TNL/Pointers/SmartPointersRegister.h>
+#include <TNL/Cuda/MemoryHelpers.h>
 
 #include <cstring>   // std::memcpy, std::memcmp
 #include <cstddef>   // std::nullptr_t
 #include <algorithm> // swap
 
-//#define TNL_DEBUG_SHARED_POINTERS
-
-#ifdef TNL_DEBUG_SHARED_POINTERS
-   #include <typeinfo>
-   #include <cxxabi.h>
-   #include <iostream>
-   #include <string>
-   #include <memory>
-   #include <cstdlib>
-
-   inline
-   std::string demangle(const char* mangled)
-   {
-      int status;
-      std::unique_ptr<char[], void (*)(void*)> result(
-         abi::__cxa_demangle(mangled, 0, 0, &status), std::free);
-      return result.get() ? std::string(result.get()) : "error occurred";
-   }
-#endif
-
-
 namespace TNL {
 namespace Pointers {
 
@@ -78,7 +59,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
       : pd( nullptr )
       {
 #ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Creating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl;
+         std::cerr << "Creating shared pointer to " << getType< ObjectType >() << std::endl;
 #endif
          this->allocate( args... );
       }
@@ -119,7 +100,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
       bool recreate( Args... args )
       {
 #ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Recreating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl;
+         std::cerr << "Recreating shared pointer to " << getType< ObjectType >() << std::endl;
 #endif
          if( ! this->counter )
             return this->allocate( args... );
@@ -377,7 +358,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
       bool recreate( Args... args )
       {
 #ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Recreating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl;
+         std::cerr << "Recreating shared pointer to " << getType< ObjectType >() << std::endl;
 #endif
          if( ! this->pd )
             return this->allocate( args... );
@@ -478,7 +459,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
          if( this->pd != nullptr )
             this->pd->counter += 1;
 #ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
+         std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl;
 #endif
          return *this;
       }
@@ -494,7 +475,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
          if( this->pd != nullptr )
             this->pd->counter += 1;
 #ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
+         std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl;
 #endif
          return *this;
       }
@@ -508,7 +489,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
          ptr.pd = nullptr;
          ptr.cuda_pointer = nullptr;
 #ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
+         std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl;
 #endif
          return *this;
       }
@@ -524,7 +505,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
          ptr.pd = nullptr;
          ptr.cuda_pointer = nullptr;
 #ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
+         std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl;
 #endif
          return *this;
       }
@@ -537,7 +518,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
          if( this->modified() )
          {
 #ifdef TNL_DEBUG_SHARED_POINTERS
-            std::cerr << "Synchronizing shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(Object).name()) << std::endl;
+            std::cerr << "Synchronizing shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl;
             std::cerr << "   ( " << sizeof( Object ) << " bytes, CUDA adress " << this->cuda_pointer << " )" << std::endl;
 #endif
             TNL_ASSERT( this->cuda_pointer, );
@@ -566,7 +547,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
       ~SharedPointer()
       {
          this->free();
-         Devices::Cuda::removeSmartPointer( this );
+         getSmartPointersRegister< DeviceType >().remove( this );
       }
 
    protected:
@@ -591,13 +572,13 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
       {
          this->pd = new PointerData( args... );
          // pass to device
-         this->cuda_pointer = Devices::Cuda::passToDevice( this->pd->data );
+         this->cuda_pointer = Cuda::passToDevice( this->pd->data );
          // set last-sync state
          this->set_last_sync_state();
 #ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Created shared pointer to " << demangle(typeid(ObjectType).name()) << " (cuda_pointer = " << this->cuda_pointer << ")" << std::endl;
+         std::cerr << "Created shared pointer to " << getType< ObjectType >() << " (cuda_pointer = " << this->cuda_pointer << ")" << std::endl;
 #endif
-         Devices::Cuda::insertSmartPointer( this );
+         getSmartPointersRegister< DeviceType >().insert( this );
          return true;
       }
 
@@ -622,14 +603,14 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
          if( this->pd )
          {
 #ifdef TNL_DEBUG_SHARED_POINTERS
-            std::cerr << "Freeing shared pointer: counter = " << this->pd->counter << ", cuda_pointer = " << this->cuda_pointer << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
+            std::cerr << "Freeing shared pointer: counter = " << this->pd->counter << ", cuda_pointer = " << this->cuda_pointer << ", type: " << getType< ObjectType >() << std::endl;
 #endif
             if( ! --this->pd->counter )
             {
                delete this->pd;
                this->pd = nullptr;
                if( this->cuda_pointer )
-                  Devices::Cuda::freeFromDevice( this->cuda_pointer );
+                  Cuda::freeFromDevice( this->cuda_pointer );
 #ifdef TNL_DEBUG_SHARED_POINTERS
                std::cerr << "...deleted data." << std::endl;
 #endif
diff --git a/src/TNL/Pointers/SharedPointerHost.h b/src/TNL/Pointers/SharedPointerHost.h
index 48d83c93890a12ef2b4d0dfb7a466490578f24b9..39a6d4da4a2b8ab8b964110173d1716fade1ac71 100644
--- a/src/TNL/Pointers/SharedPointerHost.h
+++ b/src/TNL/Pointers/SharedPointerHost.h
@@ -15,7 +15,7 @@
 #include "SharedPointer.h"
 
 #include <TNL/Devices/Host.h>
-#include <TNL/Devices/CudaCallable.h>
+#include <TNL/Cuda/CudaCallable.h>
 #include <TNL/Pointers/SmartPointer.h>
 
 #include <cstddef>   // std::nullptr_t
@@ -54,7 +54,7 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer
       : pd( nullptr )
       {
 #ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Creating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl;
+         std::cerr << "Creating shared pointer to " << getType< ObjectType >() << std::endl;
 #endif
          this->allocate( args... );
       }
@@ -95,7 +95,7 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer
       bool recreate( Args... args )
       {
 #ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Recreating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl;
+         std::cerr << "Recreating shared pointer to " << getType< ObjectType >() << std::endl;
 #endif
          if( ! this->counter )
             return this->allocate( args... );
diff --git a/src/TNL/Pointers/SharedPointerMic.h b/src/TNL/Pointers/SharedPointerMic.h
deleted file mode 100644
index 0c2958b4ad7c6552f58363c98dca5104908f04cc..0000000000000000000000000000000000000000
--- a/src/TNL/Pointers/SharedPointerMic.h
+++ /dev/null
@@ -1,373 +0,0 @@
-/***************************************************************************
-                          SharedPointerMic.h  -  description
-                             -------------------
-    begin                : Aug 22, 2018
-    copyright            : (C) 2018 by Tomas Oberhuber et al.
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-// Implemented by: Tomas Oberhuber, Jakub Klinkovsky
-
-#pragma once
-
-#include "SharedPointer.h"
-
-#include <TNL/Allocators/Default.h>
-#include <TNL/Devices/MIC.h>
-#include <TNL/Pointers/SmartPointer.h>
-
-#include <cstring>   // std::memcpy, std::memcmp
-#include <cstddef>   // std::nullptr_t
-#include <algorithm> // swap
-
-namespace TNL {
-namespace Pointers {
-
-#ifdef HAVE_MIC
-template< typename Object>
-class SharedPointer< Object, Devices::MIC > : public SmartPointer
-{
-   private:
-      // Convenient template alias for controlling the selection of copy- and
-      // move-constructors and assignment operators using SFINAE.
-      // The type Object_ is "enabled" iff Object_ and Object are not the same,
-      // but after removing const and volatile qualifiers they are the same.
-      template< typename Object_ >
-      using Enabler = std::enable_if< ! std::is_same< Object_, Object >::value &&
-                                      std::is_same< typename std::remove_cv< Object >::type, Object_ >::value >;
-
-      // friend class will be needed for templated assignment operators
-      template< typename Object_, typename Device_>
-      friend class SharedPointer;
-
-   public:
-
-      using ObjectType = Object;
-      using DeviceType = Devices::MIC; 
-
-      SharedPointer( std::nullptr_t )
-      : pd( nullptr ),
-        mic_pointer( nullptr )
-      {}
-
-      template< typename... Args >
-      explicit  SharedPointer( Args... args )
-      : pd( nullptr ),
-        mic_pointer( nullptr )
-      {
-            this->allocate( args... );
-      }
-
-      // this is needed only to avoid the default compiler-generated constructor
-      SharedPointer( const SharedPointer& pointer )
-      : pd( (PointerData*) pointer.pd ),
-        mic_pointer( pointer.mic_pointer )
-      {
-         this->pd->counter += 1;
-      }
-
-      // conditional constructor for non-const -> const data
-      template< typename Object_,
-                typename = typename Enabler< Object_ >::type >
-      SharedPointer( const SharedPointer< Object_, DeviceType >& pointer )
-      : pd( (PointerData*) pointer.pd ),
-        mic_pointer( pointer.mic_pointer )
-      {
-         this->pd->counter += 1;
-      }
-
-      // this is needed only to avoid the default compiler-generated constructor
-      SharedPointer( SharedPointer&& pointer )
-      : pd( (PointerData*) pointer.pd ),
-        mic_pointer( pointer.mic_pointer )
-      {
-         pointer.pd = nullptr;
-         pointer.mic_pointer = nullptr;
-      }
-
-      // conditional constructor for non-const -> const data
-      template< typename Object_,
-                typename = typename Enabler< Object_ >::type >
-      SharedPointer( SharedPointer< Object_, DeviceType >&& pointer )
-      : pd( (PointerData*) pointer.pd ),
-        mic_pointer( pointer.mic_pointer )
-      {
-         pointer.pd = nullptr;
-         pointer.mic_pointer = nullptr;
-      }
-
-      template< typename... Args >
-      bool recreate( Args... args )
-      {
-#ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Recreating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl;
-#endif
-         if( ! this->pd )
-            return this->allocate( args... );
-
-         if( this->pd->counter == 1 )
-         {
-            /****
-             * The object is not shared -> recreate it in-place, without reallocation
-             */
-            this->pd->data.~Object();
-            new ( &this->pd->data ) Object( args... );
-            Devices::MIC::CopyToMIC(this->mic_pointer,(void*) &this->pd->data,sizeof(Object));
-            this->set_last_sync_state();
-            return true;
-         }
-
-         // free will just decrement the counter
-         this->free();
-
-         return this->allocate( args... );
-      }
-
-      const Object* operator->() const
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         return &this->pd->data;
-      }
-
-      Object* operator->()
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         this->pd->maybe_modified = true;
-         return &this->pd->data;
-      }
-
-      const Object& operator *() const
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         return this->pd->data;
-      }
-
-      Object& operator *()
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         this->pd->maybe_modified = true;
-         return this->pd->data;
-      }
-
-      operator bool()
-      {
-         return this->pd;
-      }
-
-      template< typename Device = Devices::Host >
-      __cuda_callable__
-      const Object& getData() const
-      {
-         static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         TNL_ASSERT_TRUE( this->mic_pointer, "Attempt to dereference a null pointer" );
-         if( std::is_same< Device, Devices::Host >::value )
-            return this->pd->data;
-         if( std::is_same< Device, Devices::MIC >::value )
-            return *( this->mic_pointer );
-
-      }
-
-      template< typename Device = Devices::Host >
-      __cuda_callable__
-      Object& modifyData()
-      {
-         static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         TNL_ASSERT_TRUE( this->mic_pointer, "Attempt to dereference a null pointer" );
-         if( std::is_same< Device, Devices::Host >::value )
-         {
-            this->pd->maybe_modified = true;
-            return this->pd->data;
-         }
-         if( std::is_same< Device, Devices::MIC >::value )
-            return *( this->mic_pointer );
-
-      }
-
-      // this is needed only to avoid the default compiler-generated operator
-      const SharedPointer& operator=( const SharedPointer& ptr )
-      {
-         this->free();
-         this->pd = (PointerData*) ptr.pd;
-         this->mic_pointer = ptr.mic_pointer;
-         if( this->pd != nullptr )
-            this->pd->counter += 1;
-#ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
-#endif
-         return *this;
-      }
-
-      // conditional operator for non-const -> const data
-      template< typename Object_,
-                typename = typename Enabler< Object_ >::type >
-      const SharedPointer& operator=( const SharedPointer< Object_, DeviceType >& ptr )
-      {
-         this->free();
-         this->pd = (PointerData*) ptr.pd;
-         this->mic_pointer = ptr.mic_pointer;
-         if( this->pd != nullptr )
-            this->pd->counter += 1;
-#ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
-#endif
-         return *this;
-      }
-
-      // this is needed only to avoid the default compiler-generated operator
-      const SharedPointer& operator=( SharedPointer&& ptr )
-      {
-         this->free();
-         this->pd = (PointerData*) ptr.pd;
-         this->mic_pointer = ptr.mic_pointer;
-         ptr.pd = nullptr;
-         ptr.mic_pointer = nullptr;
-#ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
-#endif
-         return *this;
-      }
-
-      // conditional operator for non-const -> const data
-      template< typename Object_,
-                typename = typename Enabler< Object_ >::type >
-      const SharedPointer& operator=( SharedPointer< Object_, DeviceType >&& ptr )
-      {
-         this->free();
-         this->pd = (PointerData*) ptr.pd;
-         this->mic_pointer = ptr.mic_pointer;
-         ptr.pd = nullptr;
-         ptr.mic_pointer = nullptr;
-#ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
-#endif
-         return *this;
-      }
-
-      bool synchronize()
-      {
-         if( ! this->pd )
-            return true;
-
-         if( this->modified() )
-         {
-#ifdef TNL_DEBUG_SHARED_POINTERS
-            std::cerr << "Synchronizing shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(Object).name()) << std::endl;
-            std::cerr << "   ( " << sizeof( Object ) << " bytes, MIC adress " << this->mic_pointer << " )" << std::endl;
-#endif
-            TNL_ASSERT( this->mic_pointer, );
-
-            Devices::MIC::CopyToMIC((void*)this->mic_pointer,(void*) &this->pd->data,sizeof(Object));
-            this->set_last_sync_state();
-            return true;
-         }
-         return false; //??
-      }
-
-      void clear()
-      {
-         this->free();
-      }
-
-      void swap( SharedPointer& ptr2 )
-      {
-         std::swap( this->pd, ptr2.pd );
-         std::swap( this->mic_pointer, ptr2.mic_pointer );
-      }
-
-      ~SharedPointer()
-      {
-         this->free();
-         Devices::MIC::removeSmartPointer( this );
-      }
-
-   protected:
-
-      struct PointerData
-      {
-         Object data;
-         uint8_t data_image[ sizeof(Object) ];
-         int counter;
-         bool maybe_modified;
-
-         template< typename... Args >
-         explicit PointerData( Args... args )
-         : data( args... ),
-           counter( 1 ),
-           maybe_modified( false )
-         {}
-      };
-
-      template< typename... Args >
-      bool allocate( Args... args )
-      {
-         this->pd = new PointerData( args... );
-         if( ! this->pd )
-            return false;
-
-         mic_pointer = Allocators::MIC< Object >().allocate(1);
-         Devices::MIC::CopyToMIC((void*)this->mic_pointer,(void*) &this->pd->data,sizeof(Object));
-
-         if( ! this->mic_pointer )
-            return false;
-         // set last-sync state
-         this->set_last_sync_state();
-#ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Created shared pointer to " << demangle(typeid(ObjectType).name()) << " (mic_pointer = " << this->mic_pointer << ")" << std::endl;
-#endif
-         Devices::MIC::insertSmartPointer( this );
-         return true;
-      }
-
-      void set_last_sync_state()
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         std::memcpy( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( Object ) );
-         this->pd->maybe_modified = false;
-      }
-
-      bool modified()
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         // optimization: skip bitwise comparison if we're sure that the data is the same
-         if( ! this->pd->maybe_modified )
-            return false;
-         return std::memcmp( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( Object ) ) != 0;
-      }
-
-      void free()
-      {
-         if( this->pd )
-         {
-#ifdef TNL_DEBUG_SHARED_POINTERS
-            std::cerr << "Freeing shared pointer: counter = " << this->pd->counter << ", mic_pointer = " << this->mic_pointer << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
-#endif
-            if( ! --this->pd->counter )
-            {
-               delete this->pd;
-               this->pd = nullptr;
-               if( this->mic_pointer )
-               {
-                  Allocators:::MIC< ObjectType >().deallocate(mic_pointer, 1);
-                  mic_pointer=nullptr;
-               }
-#ifdef TNL_DEBUG_SHARED_POINTERS
-               std::cerr << "...deleted data." << std::endl;
-#endif
-            }
-         }
-      }
-
-      PointerData* pd;
-
-      // cuda_pointer can't be part of PointerData structure, since we would be
-      // unable to dereference this-pd on the device -- NevÃm zda to platÃ pro MIC, asi jo
-      Object* mic_pointer;
-};
-#endif
-
-} // namespace Pointers
-} // namespace TNL
diff --git a/src/TNL/Pointers/SmartPointersRegister.h b/src/TNL/Pointers/SmartPointersRegister.h
index ad716b9c036ee011dca583b2557247ce0d110453..7f261a28e203a1f0ebfb47729742405e26b8f20d 100644
--- a/src/TNL/Pointers/SmartPointersRegister.h
+++ b/src/TNL/Pointers/SmartPointersRegister.h
@@ -2,7 +2,7 @@
                           SmartPointersRegister.h  -  description
                              -------------------
     begin                : Apr 29, 2016
-    copyright            : (C) 2016 by Tomas Oberhuber
+    copyright            : (C) 2016 by Tomas Oberhuber et al.
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
 
@@ -12,24 +12,45 @@
 
 #include <unordered_set>
 #include <unordered_map>
+
 #include <TNL/Pointers/SmartPointer.h>
-#include <TNL/Assert.h>
+#include <TNL/Timer.h>
+#include <TNL/Cuda/DeviceInfo.h>
+#include <TNL/Devices/Sequential.h>
+#include <TNL/Devices/Host.h>
 
 namespace TNL {
 namespace Pointers {
 
+// Since TNL currently supports only execution on host (which does not need
+// to register and synchronize smart pointers) and CUDA GPU's, the smart
+// pointers register is implemented only for CUDA. If more execution types
+// which need to register smart pointers are implemented in the future, this
+// should beome a class template specialization.
 class SmartPointersRegister
 {
 
    public:
 
-      void insert( SmartPointer* pointer, int deviceId )
+      /**
+       * Negative deviceId means that \ref Cuda::DeviceInfo::getActiveDevice will be
+       * called to get the device ID.
+       */
+      void insert( SmartPointer* pointer, int deviceId = -1 )
       {
+         if( deviceId < 0 )
+            deviceId = Cuda::DeviceInfo::getActiveDevice();
          pointersOnDevices[ deviceId ].insert( pointer );
       }
 
-      void remove( SmartPointer* pointer, int deviceId )
+      /**
+       * Negative deviceId means that \ref Cuda::DeviceInfo::getActiveDevice will be
+       * called to get the device ID.
+       */
+      void remove( SmartPointer* pointer, int deviceId = -1 )
       {
+         if( deviceId < 0 )
+            deviceId = Cuda::DeviceInfo::getActiveDevice();
          try {
             pointersOnDevices.at( deviceId ).erase( pointer );
          }
@@ -41,8 +62,14 @@ class SmartPointersRegister
          }
       }
 
-      bool synchronizeDevice( int deviceId )
+      /**
+       * Negative deviceId means that \ref Cuda::DeviceInfo::getActiveDevice will be
+       * called to get the device ID.
+       */
+      bool synchronizeDevice( int deviceId = -1 )
       {
+         if( deviceId < 0 )
+            deviceId = Cuda::DeviceInfo::getActiveDevice();
          try {
             const auto & set = pointersOnDevices.at( deviceId );
             for( auto&& it : set )
@@ -61,5 +88,38 @@ class SmartPointersRegister
       std::unordered_map< int, SetType > pointersOnDevices;
 };
 
+
+// TODO: Device -> Allocator (in all smart pointers)
+template< typename Device >
+SmartPointersRegister& getSmartPointersRegister()
+{
+   static SmartPointersRegister reg;
+   return reg;
+}
+
+template< typename Device >
+Timer& getSmartPointersSynchronizationTimer()
+{
+   static Timer timer;
+   return timer;
+}
+
+/**
+ * Negative deviceId means that the ID of the currently active device will be
+ * determined automatically.
+ */
+template< typename Device >
+bool synchronizeSmartPointersOnDevice( int deviceId = -1 )
+{
+   // TODO: better way to skip synchronization of host-only smart pointers
+   if( std::is_same< Device, Devices::Sequential >::value || std::is_same< Device, Devices::Host >::value )
+      return true;
+
+   getSmartPointersSynchronizationTimer< Device >().start();
+   bool b = getSmartPointersRegister< Device >().synchronizeDevice( deviceId );
+   getSmartPointersSynchronizationTimer< Device >().stop();
+   return b;
+}
+
 } // namespace Pointers
 } // namespace TNL
diff --git a/src/TNL/Pointers/UniquePointer.h b/src/TNL/Pointers/UniquePointer.h
index cfb7b543fc3e94858ad5c34d4bf8e8c0faf85462..071de4d51132fa6b71e0e6b86ab16acd3c8269c8 100644
--- a/src/TNL/Pointers/UniquePointer.h
+++ b/src/TNL/Pointers/UniquePointer.h
@@ -15,8 +15,9 @@
 #include <TNL/Allocators/Default.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/Devices/MIC.h>
 #include <TNL/Pointers/SmartPointer.h>
+#include <TNL/Pointers/SmartPointersRegister.h>
+#include <TNL/Cuda/MemoryHelpers.h>
 
 #include <cstring>  // std::memcpy, std::memcmp
 #include <cstddef>  // std::nullptr_t
@@ -250,7 +251,7 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer
       ~UniquePointer()
       {
          this->free();
-         Devices::Cuda::removeSmartPointer( this );
+         getSmartPointersRegister< DeviceType >().remove( this );
       }
 
    protected:
@@ -273,10 +274,10 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer
       {
          this->pd = new PointerData( args... );
          // pass to device
-         this->cuda_pointer = Devices::Cuda::passToDevice( this->pd->data );
+         this->cuda_pointer = Cuda::passToDevice( this->pd->data );
          // set last-sync state
          this->set_last_sync_state();
-         Devices::Cuda::insertSmartPointer( this );
+         getSmartPointersRegister< DeviceType >().insert( this );
          return true;
       }
 
@@ -301,7 +302,7 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer
          if( this->pd )
             delete this->pd;
          if( this->cuda_pointer )
-            Devices::Cuda::freeFromDevice( this->cuda_pointer );
+            Cuda::freeFromDevice( this->cuda_pointer );
       }
 
       PointerData* pd;
@@ -311,187 +312,9 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer
       Object* cuda_pointer;
 };
 
-#ifdef HAVE_MIC
-template< typename Object >
-class UniquePointer< Object, Devices::MIC > : public SmartPointer
-{
-   public:
-
-      typedef Object ObjectType;
-      typedef Devices::MIC DeviceType;
-
-      UniquePointer( std::nullptr_t )
-      : pd( nullptr ),
-        mic_pointer( nullptr )
-      {}
-
-      template< typename... Args >
-      explicit  UniquePointer( const Args... args )
-      : pd( nullptr ),
-        mic_pointer( nullptr )
-      {
-         this->allocate( args... );
-      }
-
-      const Object* operator->() const
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         return &this->pd->data;
-      }
-
-      Object* operator->()
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         this->pd->maybe_modified = true;
-         return &this->pd->data;
-      }
-
-      const Object& operator *() const
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         return this->pd->data;
-      }
-
-      Object& operator *()
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         this->pd->maybe_modified = true;
-         return this->pd->data;
-      }
-
-      operator bool()
-      {
-         return this->pd;
-      }
-
-      template< typename Device = Devices::Host >
-      const Object& getData() const
-      {
-         static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         TNL_ASSERT_TRUE( this->mic_pointer, "Attempt to dereference a null pointer" );
-         if( std::is_same< Device, Devices::Host >::value )
-            return this->pd->data;
-         if( std::is_same< Device, Devices::MIC >::value )
-            return *( this->mic_pointer );
-      }
-
-      template< typename Device = Devices::Host >
-      Object& modifyData()
-      {
-         static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         TNL_ASSERT_TRUE( this->mic_pointer, "Attempt to dereference a null pointer" );
-         if( std::is_same< Device, Devices::Host >::value )
-         {
-            this->pd->maybe_modified = true;
-            return this->pd->data;
-         }
-         if( std::is_same< Device, Devices::MIC >::value )
-            return *( this->mic_pointer );
-      }
-
-      const UniquePointer& operator=( UniquePointer& ptr )
-      {
-         this->free();
-         this->pd = ptr.pd;
-         this->mic_pointer = ptr.mic_pointer;
-         ptr.pd = nullptr;
-         ptr.mic_pointer = nullptr;
-         return *this;
-      }
-
-      const UniquePointer& operator=( UniquePointer&& ptr )
-      {
-         return this->operator=( ptr );
-      }
-
-      bool synchronize()
-      {
-         if( ! this->pd )
-            return true;
-         if( this->modified() )
-         {
-            Devices::MIC::CopyToMIC(this->mic_pointer,(void*) &this->pd->data,sizeof(Object));
-            this->set_last_sync_state();
-            return true;
-         }
-         return true;//??
-      }
-
-      ~UniquePointer()
-      {
-         this->free();
-         Devices::MIC::removeSmartPointer( this );
-      }
-
-   protected:
-
-      struct PointerData
-      {
-         Object data;
-         char data_image[ sizeof(Object) ];
-         bool maybe_modified;
-
-         template< typename... Args >
-         explicit PointerData( Args... args )
-         : data( args... ),
-           maybe_modified( false )
-         {}
-      };
-
-      template< typename... Args >
-      bool allocate( Args... args )
-      {
-         this->pd = new PointerData( args... );
-         if( ! this->pd )
-            return false;
-         // pass to device
-         this->mic_pointer = Allocators::MIC< Object >().allocate(1);
-         if( ! this->mic_pointer )
-            return false;
-         Devices::MIC::CopyToMIC((void*)mic_pointer,(void*)&this->pd->data,sizeof(Object));
-         // set last-sync state
-         this->set_last_sync_state();
-         Devices::MIC::insertSmartPointer( this );
-         return true;
-      }
-
-      void set_last_sync_state()
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         std::memcpy( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( ObjectType ) );
-         this->pd->maybe_modified = false;
-      }
-
-      bool modified()
-      {
-         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
-         // optimization: skip bitwise comparison if we're sure that the data is the same
-         if( ! this->pd->maybe_modified )
-            return false;
-         return std::memcmp( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( ObjectType ) ) != 0;
-      }
-
-      void free()
-      {
-         if( this->pd )
-            delete this->pd;
-         if( this->mic_pointer )
-            Allocators:::MIC< ObjectType >().deallocate(mic_pointer, 1);
-      }
-
-      PointerData* pd;
-
-      // mic_pointer can't be part of PointerData structure, since we would be
-      // unable to dereference this-pd on the device
-      Object* mic_pointer;
-};
-#endif
-
 } // namespace Pointers
 
-#if (!defined(NDEBUG)) && (!defined(HAVE_MIC))
+#ifndef NDEBUG
 namespace Assert {
 
 template< typename Object, typename Device >
@@ -501,7 +324,7 @@ struct Formatter< Pointers::UniquePointer< Object, Device > >
    printToString( const Pointers::UniquePointer< Object, Device >& value )
    {
       ::std::stringstream ss;
-      ss << "(UniquePointer< " << Object::getType() << ", " << Device::getDeviceType()
+      ss << "(" + getType< Pointers::UniquePointer< Object, Device > >()
          << " > object at " << &value << ")";
       return ss.str();
    }
diff --git a/src/TNL/Problems/HeatEquationEocProblem.h b/src/TNL/Problems/HeatEquationEocProblem.h
index 51990252ca731252a5c363870f7076fbe224363f..78dd640b353e302f3a97f8dd381db297e8b8483a 100644
--- a/src/TNL/Problems/HeatEquationEocProblem.h
+++ b/src/TNL/Problems/HeatEquationEocProblem.h
@@ -36,8 +36,6 @@ class HeatEquationEocProblem : public HeatEquationProblem< Mesh, BoundaryConditi
       
       using typename BaseType::MeshPointer;
 
-      static String getType();
-
       bool setup( const Config::ParameterContainer& parameters,
                   const String& prefix );
 };
diff --git a/src/TNL/Problems/HeatEquationEocProblem_impl.h b/src/TNL/Problems/HeatEquationEocProblem_impl.h
index ae062df74ec825f124961b5ff33f2223c32b7d54..f7c7aea5cd2a6604683346cb7a65db3b8be6ffab 100644
--- a/src/TNL/Problems/HeatEquationEocProblem_impl.h
+++ b/src/TNL/Problems/HeatEquationEocProblem_impl.h
@@ -20,19 +20,7 @@
 #include "HeatEquationProblem.h"
 
 namespace TNL {
-namespace Problems {   
-
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename Communicator,
-          typename DifferentialOperator >
-String
-HeatEquationEocProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
-getType()
-{
-   return String( "heatEquationEocSolver< " ) + Mesh :: getType() + " >";
-}
+namespace Problems {
 
 template< typename Mesh,
           typename BoundaryCondition,
diff --git a/src/TNL/Problems/HeatEquationProblem.h b/src/TNL/Problems/HeatEquationProblem.h
index cddd70746a295378450ab2b1cee16976587b0f83..26df28965ec42e855fd034de7dea748999381e67 100644
--- a/src/TNL/Problems/HeatEquationProblem.h
+++ b/src/TNL/Problems/HeatEquationProblem.h
@@ -62,8 +62,6 @@ class HeatEquationProblem : public PDEProblem< Mesh,
 
       typedef Communicator CommunicatorType;
 
-      static String getType();
-
       String getPrologHeader() const;
 
       void writeProlog( Logger& logger,
diff --git a/src/TNL/Problems/HeatEquationProblem_impl.h b/src/TNL/Problems/HeatEquationProblem_impl.h
index 64b4a2ca91f34b385961808dc89a6909686da9c2..bc339e9b3ba56eb9e4d3499d4954be57cda7d864 100644
--- a/src/TNL/Problems/HeatEquationProblem_impl.h
+++ b/src/TNL/Problems/HeatEquationProblem_impl.h
@@ -27,18 +27,6 @@
 namespace TNL {
 namespace Problems {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename Communicator,
-          typename DifferentialOperator >
-String
-HeatEquationProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
-getType()
-{
-   return String( "HeatEquationProblem< " ) + Mesh :: getType() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/TNL/Problems/MeanCurvatureFlowEocProblem.h b/src/TNL/Problems/MeanCurvatureFlowEocProblem.h
index e50afe7faf31327da9b84cda4661812280f469b6..7839dd8dda2e1e7115007126654e7249ceff23e3 100644
--- a/src/TNL/Problems/MeanCurvatureFlowEocProblem.h
+++ b/src/TNL/Problems/MeanCurvatureFlowEocProblem.h
@@ -33,8 +33,6 @@ class MeanCurvatureFlowEocProblem : public MeanCurvatureFlowProblem< Mesh, Bound
 {
    public:
 
-      static String getType();
-
       bool setup( const Config::ParameterContainer& parameters );
 };
 
diff --git a/src/TNL/Problems/MeanCurvatureFlowEocProblem_impl.h b/src/TNL/Problems/MeanCurvatureFlowEocProblem_impl.h
index 593028b3732dce7b4ead51af4e29feb8f85f5d56..71809f3cd4c7aeec164900aa5c217802a7d42435 100644
--- a/src/TNL/Problems/MeanCurvatureFlowEocProblem_impl.h
+++ b/src/TNL/Problems/MeanCurvatureFlowEocProblem_impl.h
@@ -19,17 +19,6 @@
 namespace TNL {
 namespace Problems {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename DifferentialOperator >
-String
-MeanCurvatureFlowEocProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator >::
-getType()
-{
-   return String( "HeatEquationEocProblem< " ) + Mesh :: getType() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
@@ -47,4 +36,4 @@ setup( const Config::ParameterContainer& parameters )
 }
 
 } // namespace Problems
-} // namespace TNL
\ No newline at end of file
+} // namespace TNL
diff --git a/src/TNL/Problems/MeanCurvatureFlowProblem.h b/src/TNL/Problems/MeanCurvatureFlowProblem.h
index 45e87025bd246d6c124215f0ef365030445f8a2c..415216dcea3e31474ac84ff75be57a04acc6a4e7 100644
--- a/src/TNL/Problems/MeanCurvatureFlowProblem.h
+++ b/src/TNL/Problems/MeanCurvatureFlowProblem.h
@@ -54,8 +54,6 @@ class MeanCurvatureFlowProblem : public PDEProblem< Mesh,
       using typename BaseType::MeshDependentDataType;
       using typename BaseType::MeshDependentDataPointer;
 
-      static String getType();
-
       String getPrologHeader() const;
 
       void writeProlog( Logger& logger,
diff --git a/src/TNL/Problems/MeanCurvatureFlowProblem_impl.h b/src/TNL/Problems/MeanCurvatureFlowProblem_impl.h
index 2cd7f9aaf123a32928b1d545b5390743972c4ec3..48807addfbe91837f55f47a4cb9fe60dafe3b023 100644
--- a/src/TNL/Problems/MeanCurvatureFlowProblem_impl.h
+++ b/src/TNL/Problems/MeanCurvatureFlowProblem_impl.h
@@ -30,17 +30,6 @@
 namespace TNL {
 namespace Problems {
 
-template< typename Mesh,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename DifferentialOperator >
-String
-MeanCurvatureFlowProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator >::
-getType()
-{
-   return String( "tnlMeanCurvativeFlowProblem< " ) + Mesh :: getType() + " >";
-}
-
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/TNL/Problems/PDEProblem.h b/src/TNL/Problems/PDEProblem.h
index 51e56d44aa47e35689a46543e37d3c23cc9f2a7b..69d95aaeee7e5db273940602b7f192c0b75b2591 100644
--- a/src/TNL/Problems/PDEProblem.h
+++ b/src/TNL/Problems/PDEProblem.h
@@ -50,8 +50,6 @@ class PDEProblem : public Problem< Real, Device, Index >
        * This means that the time stepper will be set from the command line arguments.
        */
       typedef void TimeStepper;
-      
-      static String getType();
 
       String getPrologHeader() const;
 
diff --git a/src/TNL/Problems/PDEProblem_impl.h b/src/TNL/Problems/PDEProblem_impl.h
index 151f1e2ac553e212d2f7adf64c99c73fbad1bf3a..6a3aa63e6d82bce68b9f549b413d275504f137aa 100644
--- a/src/TNL/Problems/PDEProblem_impl.h
+++ b/src/TNL/Problems/PDEProblem_impl.h
@@ -16,22 +16,6 @@
 namespace TNL {
 namespace Problems {
 
-template< typename Mesh,
-          typename Communicator,
-          typename Real,
-          typename Device,
-          typename Index >
-String
-PDEProblem< Mesh, Communicator, Real, Device, Index >::
-getType()
-{
-   return String( "PDEProblem< " ) +
-          Mesh::getType() + ", " +
-          TNL::getType< Real >() + ", " +
-          Device::getDeviceType() + ", " +
-          TNL::getType< Index >() + " >";
-}
-
 template< typename Mesh,
           typename Communicator,
           typename Real,
diff --git a/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver.h b/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver.h
index e725eb67ffd75b148cf3368029f51f14703b4f50..1ea084f4f172ce3aa33bdcc7adb16888a3d65fdb 100644
--- a/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver.h
+++ b/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver.h
@@ -34,8 +34,6 @@ class NavierStokesSolver
 
    NavierStokesSolver();
 
-   static String getType();
-
    void setAdvectionScheme( AdvectionSchemeType& advection );
 
    void setDiffusionScheme( DiffusionSchemeType& u1Viscosity,
@@ -148,4 +146,4 @@ class NavierStokesSolver
 
 } // namespace TNL
 
-#include <TNL/Solvers/cfd/navier-stokes/NavierStokesSolver_impl.h>
\ No newline at end of file
+#include <TNL/Solvers/cfd/navier-stokes/NavierStokesSolver_impl.h>
diff --git a/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver_impl.h b/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver_impl.h
index a12ec5cb100f1fe16e9f089ff42bc4da671e6bd0..a266938862c6f253a2318e8a0bab7b27d2ec2059 100644
--- a/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver_impl.h
+++ b/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver_impl.h
@@ -29,16 +29,6 @@ NavierStokesSolver< AdvectionScheme, DiffusionScheme, BoundaryConditions >::Navi
 {
 }
 
-template< typename AdvectionScheme,
-          typename DiffusionScheme,
-          typename BoundaryConditions >
-String NavierStokesSolver< AdvectionScheme, DiffusionScheme, BoundaryConditions >::getType()
-{
-   return String( "NavierStokesSolver< " ) +
-          AdvectionScheme::getType() + ", " +
-          DiffusionScheme::getType() + " >";
-}
-
 template< typename AdvectionScheme,
           typename DiffusionScheme,
           typename BoundaryConditions >
diff --git a/src/TNL/Solvers/BuildConfigTags.h b/src/TNL/Solvers/BuildConfigTags.h
index 19bb42129563b48a7e1e1ba9baf8f107d25ee661..bcd4cdafcacff729b51b827348bcd7703f4bec21 100644
--- a/src/TNL/Solvers/BuildConfigTags.h
+++ b/src/TNL/Solvers/BuildConfigTags.h
@@ -27,10 +27,6 @@ template< typename ConfigTag, typename Device > struct ConfigTagDevice{ enum { e
 template< typename ConfigTag > struct ConfigTagDevice< ConfigTag, Devices::Cuda >{ enum { enabled = false }; };
 #endif
 
-#ifndef HAVE_MIC
-template< typename ConfigTag > struct ConfigTagDevice< ConfigTag, Devices::MIC >{ enum { enabled = false }; };
-#endif
-
 /****
  * All real types are enabled by default.
  */
diff --git a/src/TNL/Solvers/Linear/BICGStab.h b/src/TNL/Solvers/Linear/BICGStab.h
index 686d6f4503d2cfc7d73fd74482c670f56db9a793..2cede824ad00c4ea8b4cb2f270d86882f5bfcfe3 100644
--- a/src/TNL/Solvers/Linear/BICGStab.h
+++ b/src/TNL/Solvers/Linear/BICGStab.h
@@ -28,8 +28,6 @@ public:
    using VectorViewType = typename Base::VectorViewType;
    using ConstVectorViewType = typename Base::ConstVectorViewType;
 
-   String getType() const;
-
    static void configSetup( Config::ConfigDescription& config,
                             const String& prefix = "" );
 
diff --git a/src/TNL/Solvers/Linear/BICGStabL.h b/src/TNL/Solvers/Linear/BICGStabL.h
index a35962d54b900ae0a50dfe1f42ff04d9235fc3a8..f2481b588bc92722e9795fea2a91b8676e5a307d 100644
--- a/src/TNL/Solvers/Linear/BICGStabL.h
+++ b/src/TNL/Solvers/Linear/BICGStabL.h
@@ -65,8 +65,6 @@ public:
    using ConstVectorViewType = typename Base::ConstVectorViewType;
    using VectorType = typename Traits::VectorType;
 
-   String getType() const;
-
    static void configSetup( Config::ConfigDescription& config,
                             const String& prefix = "" );
 
diff --git a/src/TNL/Solvers/Linear/BICGStabL_impl.h b/src/TNL/Solvers/Linear/BICGStabL_impl.h
index 1f20d4a30e582d4565bcf15ccb4fdaa07c91947e..3f41e5115d0043b168bb9425a567bf02c97159b0 100644
--- a/src/TNL/Solvers/Linear/BICGStabL_impl.h
+++ b/src/TNL/Solvers/Linear/BICGStabL_impl.h
@@ -20,16 +20,6 @@ namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix >
-String
-BICGStabL< Matrix >::
-getType() const
-{
-   return String( "BICGStabL< " ) +
-          this->matrix -> getType() + ", " +
-          this->preconditioner -> getType() + " >";
-}
-
 template< typename Matrix >
 void
 BICGStabL< Matrix >::
diff --git a/src/TNL/Solvers/Linear/BICGStab_impl.h b/src/TNL/Solvers/Linear/BICGStab_impl.h
index 735358622bd9875e825019793e0237f488f20bb4..baa4b6363e712ec4156e7a4bc79bc6e32bcc031c 100644
--- a/src/TNL/Solvers/Linear/BICGStab_impl.h
+++ b/src/TNL/Solvers/Linear/BICGStab_impl.h
@@ -18,14 +18,6 @@ namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix >
-String BICGStab< Matrix > :: getType() const
-{
-   return String( "BICGStab< " ) +
-          this->matrix -> getType() + ", " +
-          this->preconditioner -> getType() + " >";
-}
-
 template< typename Matrix >
 void
 BICGStab< Matrix >::
diff --git a/src/TNL/Solvers/Linear/CG.h b/src/TNL/Solvers/Linear/CG.h
index b87caf24784affb9e425fd7ca7748134995d10e0..375db25cb9db4e4d9c9b4e253ba72ecbb1923f0f 100644
--- a/src/TNL/Solvers/Linear/CG.h
+++ b/src/TNL/Solvers/Linear/CG.h
@@ -30,8 +30,6 @@ public:
    using VectorViewType = typename Base::VectorViewType;
    using ConstVectorViewType = typename Base::ConstVectorViewType;
 
-   String getType() const;
-
    bool solve( ConstVectorViewType b, VectorViewType x ) override;
 
 protected:
diff --git a/src/TNL/Solvers/Linear/CG_impl.h b/src/TNL/Solvers/Linear/CG_impl.h
index 07f8ea1979a691029a34fecfe1e4cf052d1a0f73..9c1b0458aab7fe0566767717826182c7f034bf02 100644
--- a/src/TNL/Solvers/Linear/CG_impl.h
+++ b/src/TNL/Solvers/Linear/CG_impl.h
@@ -16,14 +16,6 @@ namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix >
-String CG< Matrix > :: getType() const
-{
-   return String( "CG< " ) +
-          this->matrix -> getType() + ", " +
-          this->preconditioner -> getType() + " >";
-}
-
 template< typename Matrix >
 bool
 CG< Matrix >::
diff --git a/src/TNL/Solvers/Linear/GMRES.h b/src/TNL/Solvers/Linear/GMRES.h
index dd72e2832af81b65b9fdcc8d19090d669232bc62..f1a4b87328a630411c634bed6744494f76afea02 100644
--- a/src/TNL/Solvers/Linear/GMRES.h
+++ b/src/TNL/Solvers/Linear/GMRES.h
@@ -37,8 +37,6 @@ public:
    using ConstVectorViewType = typename Base::ConstVectorViewType;
    using VectorType = typename Traits::VectorType;
 
-   String getType() const;
-
    static void configSetup( Config::ConfigDescription& config,
                             const String& prefix = "" );
 
@@ -51,9 +49,9 @@ protected:
    // local vectors/views
    using ConstDeviceView = typename Traits::ConstLocalViewType;
    using DeviceView = typename Traits::LocalViewType;
-   using HostView = typename DeviceView::HostType;
    using DeviceVector = typename Traits::LocalVectorType;
-   using HostVector = typename DeviceVector::HostType;
+   using HostView = typename DeviceView::template Self< RealType, Devices::Host >;
+   using HostVector = typename DeviceVector::template Self< RealType, Devices::Host >;;
 
    enum class Variant { MGS, MGSR, CWY };
 
diff --git a/src/TNL/Solvers/Linear/GMRES_impl.h b/src/TNL/Solvers/Linear/GMRES_impl.h
index 5d7942f841100221bb3d16c807179c17468e604b..d6cb8fdd095120c35cbf303a734ed7c5667bb79d 100644
--- a/src/TNL/Solvers/Linear/GMRES_impl.h
+++ b/src/TNL/Solvers/Linear/GMRES_impl.h
@@ -15,7 +15,7 @@
 #include <type_traits>
 #include <cmath>
 
-#include <TNL/Containers/Algorithms/Multireduction.h>
+#include <TNL/Algorithms/Multireduction.h>
 #include <TNL/Matrices/MatrixOperations.h>
 
 #include "GMRES.h"
@@ -24,16 +24,6 @@ namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix >
-String
-GMRES< Matrix >::
-getType() const
-{
-   return String( "GMRES< " ) +
-          this->matrix -> getType() + ", " +
-          this->preconditioner -> getType() + " >";
-}
-
 template< typename Matrix >
 void
 GMRES< Matrix >::
@@ -390,7 +380,7 @@ hauseholder_generate( const int i,
          else
             y_i[ j ] = z[ j ];
       };
-      ParallelFor< DeviceType >::exec( (IndexType) 0, size, kernel_truncation );
+      Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, size, kernel_truncation );
    }
    else {
       ConstDeviceView z_local = Traits::getConstLocalView( z );
@@ -430,7 +420,7 @@ hauseholder_generate( const int i,
       const RealType* _y_i = Traits::getConstLocalView( y_i ).getData();
       const IndexType ldSize = this->ldSize;
       auto fetch = [_Y, _y_i, ldSize] __cuda_callable__ ( IndexType idx, int k ) { return _Y[ idx + k * ldSize ] * _y_i[ idx ]; };
-      Containers::Algorithms::Multireduction< DeviceType >::reduce
+      Algorithms::Multireduction< DeviceType >::reduce
                ( (RealType) 0,
                  fetch,
                  std::plus<>{},
@@ -461,7 +451,7 @@ hauseholder_apply_trunc( HostView out,
    // The upper (m+1)x(m+1) submatrix of Y is duplicated in the YL buffer,
    // which resides on host and is broadcasted from rank 0 to all processes.
    HostView YL_i( &YL[ i * (restarting_max + 1) ], restarting_max + 1 );
-   Containers::Algorithms::ArrayOperations< Devices::Host, DeviceType >::copy( YL_i.getData(), Traits::getLocalView( y_i ).getData(), YL_i.getSize() );
+   Algorithms::MultiDeviceMemoryOperations< Devices::Host, DeviceType >::copy( YL_i.getData(), Traits::getLocalView( y_i ).getData(), YL_i.getSize() );
    // no-op if the problem is not distributed
    CommunicatorType::Bcast( YL_i.getData(), YL_i.getSize(), 0, Traits::getCommunicationGroup( *this->matrix ) );
 
@@ -476,7 +466,7 @@ hauseholder_apply_trunc( HostView out,
       }
       if( std::is_same< DeviceType, Devices::Cuda >::value ) {
          RealType host_z[ i + 1 ];
-         Containers::Algorithms::ArrayOperations< Devices::Host, Devices::Cuda >::copy( host_z, Traits::getConstLocalView( z ).getData(), i + 1 );
+         Algorithms::MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( host_z, Traits::getConstLocalView( z ).getData(), i + 1 );
          for( int k = 0; k <= i; k++ )
             out[ k ] = host_z[ k ] - YL_i[ k ] * aux;
       }
@@ -530,7 +520,7 @@ hauseholder_cwy_transposed( VectorViewType z,
    const RealType* _w = Traits::getConstLocalView( w ).getData();
    const IndexType ldSize = this->ldSize;
    auto fetch = [_Y, _w, ldSize] __cuda_callable__ ( IndexType idx, int k ) { return _Y[ idx + k * ldSize ] * _w[ idx ]; };
-   Containers::Algorithms::Multireduction< DeviceType >::reduce
+   Algorithms::Multireduction< DeviceType >::reduce
             ( (RealType) 0,
               fetch,
               std::plus<>{},
diff --git a/src/TNL/Solvers/Linear/Jacobi.h b/src/TNL/Solvers/Linear/Jacobi.h
index 5288726713525cbc5911d497d219ee266bd89243..e4e74d5dff6c7037278e0455303c6713ed26276f 100644
--- a/src/TNL/Solvers/Linear/Jacobi.h
+++ b/src/TNL/Solvers/Linear/Jacobi.h
@@ -29,11 +29,6 @@ public:
    using VectorViewType = typename Base::VectorViewType;
    using ConstVectorViewType = typename Base::ConstVectorViewType;
 
-   String getType() const
-   {
-      return String( "Jacobi< " ) + this->matrix->getType() + ", " + this->preconditioner->getType() + " >";
-   }
-
    static void configSetup( Config::ConfigDescription& config,
                             const String& prefix = "" )
    {
diff --git a/src/TNL/Solvers/Linear/Preconditioners/Diagonal.h b/src/TNL/Solvers/Linear/Preconditioners/Diagonal.h
index 25aa1cd7c7aff9d65c863a0f3a8e1e2f578ef1cb..f88e315ccf734a12ec20e53fb930016aa0330b36 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/Diagonal.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/Diagonal.h
@@ -38,11 +38,6 @@ public:
 
    virtual void solve( ConstVectorViewType b, VectorViewType x ) const override;
 
-   String getType() const
-   {
-      return String( "Diagonal" );
-   }
-
 protected:
    VectorType diagonal;
 };
@@ -67,11 +62,6 @@ public:
 
    virtual void solve( ConstVectorViewType b, VectorViewType x ) const override;
 
-   String getType() const
-   {
-      return String( "Diagonal" );
-   }
-
 protected:
    VectorType diagonal;
 };
diff --git a/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h b/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h
index de4b9f3f1ee84b668d4daf2a6515ddf102973d95..c9751fe4f89f60a6115947aede1279da8dfcdb5f 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h
@@ -14,7 +14,7 @@
 
 #include "Diagonal.h"
 
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 namespace TNL {
 namespace Solvers {
@@ -39,7 +39,7 @@ update( const MatrixPointer& matrixPointer )
       diag_view[ i ] = kernel_matrix->getElementFast( i, i );
    };
 
-   ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
+   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
 }
 
 template< typename Matrix >
@@ -54,7 +54,7 @@ solve( ConstVectorViewType b, VectorViewType x ) const
       x[ i ] = b[ i ] / diag_view[ i ];
    };
 
-   ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
+   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
 }
 
 
@@ -77,7 +77,7 @@ update( const MatrixPointer& matrixPointer )
       diag_view[ i ] = kernel_matrix->getLocalMatrix().getElementFast( i, gi );
    };
 
-   ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
+   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
 }
 
 template< typename Matrix, typename Communicator >
@@ -94,7 +94,7 @@ solve( ConstVectorViewType b, VectorViewType x ) const
       x_view[ i ] = b_view[ i ] / diag_view[ i ];
    };
 
-   ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
+   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
 }
 
 } // namespace Preconditioners
diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
index 1fc2fa3fa69f964cb3486d6ee16dcf43fc8d3b9f..8a177df055c682ead7b7037aae4772ae2d1ef1ab 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
@@ -37,13 +37,7 @@ class ILU0_impl
 template< typename Matrix >
 class ILU0
 : public ILU0_impl< Matrix, typename Matrix::RealType, typename Matrix::DeviceType, typename Matrix::IndexType >
-{
-public:
-   String getType() const
-   {
-      return String( "ILU0" );
-   }
-};
+{};
 
 template< typename Matrix, typename Real, typename Index >
 class ILU0_impl< Matrix, Real, Devices::Host, Index >
@@ -199,29 +193,6 @@ public:
    }
 };
 
-template< typename Matrix, typename Real, typename Index >
-class ILU0_impl< Matrix, Real, Devices::MIC, Index >
-: public Preconditioner< Matrix >
-{
-public:
-   using RealType = Real;
-   using DeviceType = Devices::MIC;
-   using IndexType = Index;
-   using typename Preconditioner< Matrix >::VectorViewType;
-   using typename Preconditioner< Matrix >::ConstVectorViewType;
-   using typename Preconditioner< Matrix >::MatrixPointer;
-
-   virtual void update( const MatrixPointer& matrixPointer ) override
-   {
-      throw Exceptions::NotImplementedError("Not Iplemented yet for MIC");
-   }
-
-   virtual void solve( ConstVectorViewType b, VectorViewType x ) const override
-   {
-      throw Exceptions::NotImplementedError("Not Iplemented yet for MIC");
-   }
-};
-
 } // namespace Preconditioners
 } // namespace Linear
 } // namespace Solvers
diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h b/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h
index 626469920ff9e08d7f935e13017086b7cd583081..5ae255304f89eebb7a97fe2bfeac7ebc82b9c765 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h
@@ -16,7 +16,7 @@
 #include "TriangularSolve.h"
 
 #include <TNL/Exceptions/CudaSupportMissing.h>
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 namespace TNL {
 namespace Solvers {
@@ -282,7 +282,7 @@ allocate_LU()
    U->setDimensions( N, N );
 
    // extract raw pointer
-   Devices::Cuda::synchronizeDevice();
+   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
    const CSR* kernel_A = &A.template getData< DeviceType >();
 
    // copy row lengths
@@ -308,7 +308,7 @@ allocate_LU()
       L_rowLengths_view[ i ] = L_entries;
       U_rowLengths_view[ i ] = U_entries;
    };
-   ParallelFor< DeviceType >::exec( (IndexType) 0, N, kernel_copy_row_lengths );
+   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, N, kernel_copy_row_lengths );
    L->setCompressedRowLengths( L_rowLengths );
    U->setCompressedRowLengths( U_rowLengths );
 #else
@@ -329,7 +329,7 @@ copy_triangular_factors()
    const int N = A->getRows();
 
    // extract raw pointers
-   Devices::Cuda::synchronizeDevice();
+   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
    CSR* kernel_L = &L.template modifyData< DeviceType >();
    CSR* kernel_U = &U.template modifyData< DeviceType >();
    const CSR* kernel_A = &A.template getData< DeviceType >();
@@ -349,7 +349,7 @@ copy_triangular_factors()
             break;
       }
    };
-   ParallelFor< DeviceType >::exec( (IndexType) 0, N, kernel_copy_values );
+   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, N, kernel_copy_values );
 #else
    throw std::runtime_error("The program was not compiled with the CUSPARSE library. Pass -DHAVE_CUSPARSE -lcusparse to the compiler.");
 #endif
diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILUT.h b/src/TNL/Solvers/Linear/Preconditioners/ILUT.h
index 8f4c27d7abd8d65566916b7ac79f34d269bc84e1..cce3dc5c4bde030dc33c4762623124e1d3f65367 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/ILUT.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/ILUT.h
@@ -33,11 +33,6 @@ class ILUT
 : public ILUT_impl< Matrix, typename Matrix::RealType, typename Matrix::DeviceType, typename Matrix::IndexType >
 {
 public:
-   String getType() const
-   {
-      return String( "ILUT" );
-   }
-
    static void configSetup( Config::ConfigDescription& config,
                             const String& prefix = "" )
    {
@@ -111,29 +106,6 @@ public:
    }
 };
 
-template< typename Matrix, typename Real, typename Index >
-class ILUT_impl< Matrix, Real, Devices::MIC, Index >
-: public Preconditioner< Matrix >
-{
-public:
-   using RealType = Real;
-   using DeviceType = Devices::MIC;
-   using IndexType = Index;
-   using typename Preconditioner< Matrix >::VectorViewType;
-   using typename Preconditioner< Matrix >::ConstVectorViewType;
-   using typename Preconditioner< Matrix >::MatrixPointer;
-
-   virtual void update( const MatrixPointer& matrixPointer ) override
-   {
-      throw std::runtime_error("Not Iplemented yet for MIC");
-   }
-
-   virtual void solve( ConstVectorViewType b, VectorViewType x ) const override
-   {
-      throw std::runtime_error("Not Iplemented yet for MIC");
-   }
-};
-
 } // namespace Preconditioners
 } // namespace Linear
 } // namespace Solvers
diff --git a/src/TNL/Solvers/Linear/Preconditioners/Preconditioner.h b/src/TNL/Solvers/Linear/Preconditioners/Preconditioner.h
index 2e70be2b1b044ee45585ef689d443db1d1e8a8c1..67a62e74f1f2fc53203be81897163eba84cfc7ea 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/Preconditioner.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/Preconditioner.h
@@ -55,11 +55,6 @@ public:
       throw std::logic_error("The solve() method of a dummy preconditioner should not be called.");
    }
 
-   String getType() const
-   {
-      return String( "Preconditioner" );
-   }
-
    virtual ~Preconditioner() {}
 };
 
diff --git a/src/TNL/Solvers/Linear/SOR.h b/src/TNL/Solvers/Linear/SOR.h
index 7e94634cdf178abf9df838e533529c44ea5fb2cc..0d9aae433324aa3fd1346237c851fc5f6a192225 100644
--- a/src/TNL/Solvers/Linear/SOR.h
+++ b/src/TNL/Solvers/Linear/SOR.h
@@ -28,8 +28,6 @@ public:
    using VectorViewType = typename Base::VectorViewType;
    using ConstVectorViewType = typename Base::ConstVectorViewType;
 
-   String getType() const;
-
    static void configSetup( Config::ConfigDescription& config,
                             const String& prefix = "" );
 
diff --git a/src/TNL/Solvers/Linear/SOR_impl.h b/src/TNL/Solvers/Linear/SOR_impl.h
index 648ae8d419643eb170788c701da161040fa7220d..4a7d4fb9d3784e7079505405ad750def3e647630 100644
--- a/src/TNL/Solvers/Linear/SOR_impl.h
+++ b/src/TNL/Solvers/Linear/SOR_impl.h
@@ -17,14 +17,6 @@ namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix >
-String SOR< Matrix > :: getType() const
-{
-   return String( "SOR< " ) +
-          this->matrix -> getType() + ", " +
-          this->preconditioner -> getType() + " >";
-}
-
 template< typename Matrix >
 void
 SOR< Matrix >::
diff --git a/src/TNL/Solvers/Linear/TFQMR.h b/src/TNL/Solvers/Linear/TFQMR.h
index 73d0894aada0cc311146d6fff686fefd2e934e3e..2a94f44e7c94413edbe54203d87227ded8cc6983 100644
--- a/src/TNL/Solvers/Linear/TFQMR.h
+++ b/src/TNL/Solvers/Linear/TFQMR.h
@@ -28,8 +28,6 @@ public:
    using VectorViewType = typename Base::VectorViewType;
    using ConstVectorViewType = typename Base::ConstVectorViewType;
 
-   String getType() const;
-
    bool solve( ConstVectorViewType b, VectorViewType x ) override;
 
 protected:
diff --git a/src/TNL/Solvers/Linear/TFQMR_impl.h b/src/TNL/Solvers/Linear/TFQMR_impl.h
index 590aa35af0b9c9cf8cf73a0c66e02a265a1bacdf..0ea03e83c5fc06b41ef0bdb1e11e9fa3a6a4d164 100644
--- a/src/TNL/Solvers/Linear/TFQMR_impl.h
+++ b/src/TNL/Solvers/Linear/TFQMR_impl.h
@@ -18,14 +18,6 @@ namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix >
-String TFQMR< Matrix > :: getType() const
-{
-   return String( "TFQMR< " ) +
-          this->matrix -> getType() + ", " +
-          this->preconditioner -> getType() + " >";
-}
-
 template< typename Matrix >
 bool TFQMR< Matrix >::solve( ConstVectorViewType b, VectorViewType x )
 {
diff --git a/src/TNL/Solvers/Linear/UmfpackWrapper.h b/src/TNL/Solvers/Linear/UmfpackWrapper.h
index 1d4e67ea2771e6d389eb4e84ca199423d927fd16..0e2e5d7ac78a9d6264f1b4448beaf5728e7dce7f 100644
--- a/src/TNL/Solvers/Linear/UmfpackWrapper.h
+++ b/src/TNL/Solvers/Linear/UmfpackWrapper.h
@@ -81,8 +81,6 @@ public:
    using VectorViewType = typename Base::VectorViewType;
    using ConstVectorViewType = typename Base::ConstVectorViewType;
 
-   String getType() const;
-
    bool solve( ConstVectorViewType b, VectorViewType x ) override;
 };
 
diff --git a/src/TNL/Solvers/ODE/Euler.h b/src/TNL/Solvers/ODE/Euler.h
index 2ba128073ec65aba4f8e1bc5c7f6cad661f67303..1fd6ab3c11a8a786f9a713e11082d69ae9912f36 100644
--- a/src/TNL/Solvers/ODE/Euler.h
+++ b/src/TNL/Solvers/ODE/Euler.h
@@ -10,12 +10,10 @@
 
 #pragma once
 
-#include <math.h>
 #include <TNL/Config/ConfigDescription.h>
 #include <TNL/Solvers/ODE/ExplicitSolver.h>
 #include <TNL/Solvers/DummyProblem.h>
 #include <TNL/Config/ParameterContainer.h>
-#include <TNL/Timer.h>
 
 namespace TNL {
 namespace Solvers {
@@ -37,8 +35,6 @@ class Euler : public ExplicitSolver< Problem, SolverMonitor >
 
       Euler();
 
-      static String getType();
-
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" );
 
diff --git a/src/TNL/Solvers/ODE/Euler.hpp b/src/TNL/Solvers/ODE/Euler.hpp
index 12da6439bd15d4fdbe1e0a088910c940cfc90aa2..9dc6b6570329e2684e359c109dc6eb969ee0b9e7 100644
--- a/src/TNL/Solvers/ODE/Euler.hpp
+++ b/src/TNL/Solvers/ODE/Euler.hpp
@@ -10,9 +10,7 @@
 
 #pragma once
 
-#include <TNL/Devices/MIC.h>
-#include <TNL/Communicators/MpiCommunicator.h>
-#include <TNL/Communicators/NoDistrCommunicator.h>
+#include <TNL/Solvers/ODE/Euler.h>
 
 namespace TNL {
 namespace Solvers {
@@ -33,14 +31,6 @@ Euler< Problem, SolverMonitor > :: Euler()
 {
 };
 
-template< typename Problem, typename SolverMonitor >
-String Euler< Problem, SolverMonitor > :: getType()
-{
-   return String( "Euler< " ) +
-          Problem :: getType() +
-          String( " >" );
-};
-
 template< typename Problem, typename SolverMonitor >
 void Euler< Problem, SolverMonitor > :: configSetup( Config::ConfigDescription& config,
                                                const String& prefix )
@@ -77,7 +67,6 @@ bool Euler< Problem, SolverMonitor > :: solve( DofVectorPointer& _u )
    /****
     * First setup the supporting meshes k1...k5 and k_tmp.
     */
-   //timer.start();
    _k1->setLike( *_u );
    auto k1 = _k1->getView();
    auto u = _u->getView();
@@ -104,9 +93,7 @@ bool Euler< Problem, SolverMonitor > :: solve( DofVectorPointer& _u )
       /****
        * Compute the RHS
        */
-      //timer.stop();
       this->problem->getExplicitUpdate( time, currentTau, _u, _k1 );
-      //timer.start();
 
       RealType lastResidue = this->getResidue();
       RealType maxResidue( 0.0 );
diff --git a/src/TNL/Solvers/ODE/Merson.h b/src/TNL/Solvers/ODE/Merson.h
index 3ac978178cfe7f42050199f5cf9e8d722c504015..99ffc2409ed67540be41a48c9ab030fd3756d705 100644
--- a/src/TNL/Solvers/ODE/Merson.h
+++ b/src/TNL/Solvers/ODE/Merson.h
@@ -35,8 +35,6 @@ class Merson : public ExplicitSolver< Problem, SolverMonitor >
 
       Merson();
 
-      static String getType();
-
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" );
 
diff --git a/src/TNL/Solvers/ODE/Merson_impl.h b/src/TNL/Solvers/ODE/Merson_impl.h
index 3c88576e9e583c4c161782f630630573bd23be3a..4c7b21bc93c5bcfb5adff76e89d876778f1049aa 100644
--- a/src/TNL/Solvers/ODE/Merson_impl.h
+++ b/src/TNL/Solvers/ODE/Merson_impl.h
@@ -40,14 +40,6 @@ Merson< Problem, SolverMonitor >::Merson()
    }
 };
 
-template< typename Problem, typename SolverMonitor >
-String Merson< Problem, SolverMonitor >::getType()
-{
-   return String( "Merson< " ) +
-          Problem::getType() +
-          String( " >" );
-};
-
 template< typename Problem, typename SolverMonitor >
 void Merson< Problem, SolverMonitor >::configSetup( Config::ConfigDescription& config,
                                                 const String& prefix )
diff --git a/src/TNL/Solvers/PDE/BoundaryConditionsSetter.h b/src/TNL/Solvers/PDE/BoundaryConditionsSetter.h
index a28a64cf5567eaee40f5d4efca3fd24af3dd2819..d1b871c25c0a998b68a770d4c0629ad76d20dfb5 100644
--- a/src/TNL/Solvers/PDE/BoundaryConditionsSetter.h
+++ b/src/TNL/Solvers/PDE/BoundaryConditionsSetter.h
@@ -11,7 +11,7 @@
 
 #pragma once
 
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/CudaCallable.h>
 #include <TNL/Functions/FunctionAdapter.h>
 #include <TNL/Pointers/SharedPointer.h>
 #include <TNL/Meshes/Traverser.h>
diff --git a/src/TNL/Solvers/PDE/ExplicitTimeStepper.h b/src/TNL/Solvers/PDE/ExplicitTimeStepper.h
index 8a5f0db1e8f0effe43951a610e67bcd8d47d6548..d4f6992b37e27cccecf38a193be8435ea2d9fe96 100644
--- a/src/TNL/Solvers/PDE/ExplicitTimeStepper.h
+++ b/src/TNL/Solvers/PDE/ExplicitTimeStepper.h
@@ -42,8 +42,6 @@ class ExplicitTimeStepper
 
       static_assert( ProblemType::isTimeDependent(), "The problem is not time dependent." );
 
-      static String getType();
-      
       ExplicitTimeStepper();
 
       static void configSetup( Config::ConfigDescription& config,
diff --git a/src/TNL/Solvers/PDE/ExplicitTimeStepper_impl.h b/src/TNL/Solvers/PDE/ExplicitTimeStepper_impl.h
index 4024ff32671bef44ba72405e578c5c17067ca7e9..fa2d1f8066ed22ebc2626e93ff88deb1e6790177 100644
--- a/src/TNL/Solvers/PDE/ExplicitTimeStepper_impl.h
+++ b/src/TNL/Solvers/PDE/ExplicitTimeStepper_impl.h
@@ -14,21 +14,8 @@
 
 namespace TNL {
 namespace Solvers {
-namespace PDE {   
+namespace PDE {
 
-template< typename Problem,
-          template < typename OdeProblem, typename SolverMonitor > class OdeSolver >
-String
-ExplicitTimeStepper< Problem, OdeSolver >::
-getType()
-{
-   return String( "ExplicitTimeStepper< " ) +
-          Problem::getType() + ", " +
-          OdeSolverType::getType() + ", " +
-          String( " >" );
-};
-   
-   
 template< typename Problem,
           template < typename OdeProblem, typename SolverMonitor > class OdeSolver >
 ExplicitTimeStepper< Problem, OdeSolver >::
@@ -37,7 +24,7 @@ ExplicitTimeStepper()
   timeStep( 0 ),
   allIterations( 0 )
 {
-};
+}
 
 template< typename Problem,
           template < typename OdeProblem, typename SolverMonitor > class OdeSolver >
diff --git a/src/TNL/Solvers/SolverConfig_impl.h b/src/TNL/Solvers/SolverConfig_impl.h
index 701c5eb730b99e2487e56bd5e56a9ffdec0b916d..e5673d5c1ed45ea9a28f8615cd4f099284bb8875 100644
--- a/src/TNL/Solvers/SolverConfig_impl.h
+++ b/src/TNL/Solvers/SolverConfig_impl.h
@@ -67,12 +67,6 @@ bool SolverConfig< ConfigTag, ProblemConfig >::configSetup( Config::ConfigDescri
    if( ConfigTagDevice< ConfigTag, Devices::Cuda >::enabled )
       config.addEntryEnum( "cuda" );
 #endif
-   
-#ifdef HAVE_MIC
-   if( ConfigTagDevice< ConfigTag, Devices::MIC >::enabled )
-      config.addEntryEnum( "mic" );
-#endif
-   
 
    /****
     * Setup index type.
diff --git a/src/TNL/Solvers/SolverInitiator_impl.h b/src/TNL/Solvers/SolverInitiator_impl.h
index c6bc5ca7f494abd8922f1a0fcb45b4814277094f..e54a8fe308c4478a7242a24f8032473be8431d1e 100644
--- a/src/TNL/Solvers/SolverInitiator_impl.h
+++ b/src/TNL/Solvers/SolverInitiator_impl.h
@@ -12,7 +12,6 @@
 
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/Devices/MIC.h>
 #include <TNL/Config/ParameterContainer.h>
 #include <TNL/Meshes/TypeResolver/TypeResolver.h>
 #include <TNL/Solvers/BuildConfigTags.h>
@@ -92,8 +91,6 @@ class SolverInitiatorRealResolver< ProblemSetter, Real, ConfigTag, true >
             return SolverInitiatorDeviceResolver< ProblemSetter, Real, Devices::Host, ConfigTag >::run( parameters );
          if( device == "cuda" )
             return SolverInitiatorDeviceResolver< ProblemSetter, Real, Devices::Cuda, ConfigTag >::run( parameters );
-         if(device == "mic")
-             return SolverInitiatorDeviceResolver< ProblemSetter, Real, Devices::MIC, ConfigTag >::run( parameters );
          std::cerr << "The device '" << device << "' is not defined. " << std::endl;
          return false;
       }
diff --git a/src/TNL/Solvers/SolverStarter_impl.h b/src/TNL/Solvers/SolverStarter_impl.h
index e52d03a4f7377c5fc80ade50d7981a3a07a48f08..8b323d5d745e08216bc3477adc03d3495bce8dce 100644
--- a/src/TNL/Solvers/SolverStarter_impl.h
+++ b/src/TNL/Solvers/SolverStarter_impl.h
@@ -406,7 +406,7 @@ bool SolverStarter< ConfigTag > :: writeEpilog( std::ostream& str, const Solver&
    if( std::is_same< typename Solver::DeviceType, TNL::Devices::Cuda >::value )
    {
       logger.writeParameter< const char* >( "GPU synchronization time:", "" );
-      TNL::Devices::Cuda::getSmartPointersSynchronizationTimer().writeLog( logger, 1 );
+      Pointers::getSmartPointersSynchronizationTimer< Devices::Cuda >().writeLog( logger, 1 );
    }
    logger.writeParameter< const char* >( "I/O time:", "" );
    this->ioTimer.writeLog( logger, 1 );
diff --git a/src/TNL/String.h b/src/TNL/String.h
index a04802216c2dd14a6495b30146a49066f708820e..f35abc377177b6b061b68074714ce3e143b55d22 100644
--- a/src/TNL/String.h
+++ b/src/TNL/String.h
@@ -21,8 +21,6 @@
 
 namespace TNL {
 
-class String;
-
 /**
  * \brief Class for managing strings.
  *
@@ -39,8 +37,6 @@ class String;
  * 
  * \ref operator+
  * 
- * \ref operator<<
- * 
  * \ref mpiSend
  * 
  * \ref mpiReceive
@@ -101,11 +97,6 @@ class String
        */
       using std::string::operator=;
 
-      /**
-       * \brief Returns type of string: \c "String".
-       */
-      static String getType();
-
       /**
        * \brief Returns the number of characters in given string. Equivalent to \ref getSize.
        */
@@ -368,11 +359,6 @@ String operator+( const char* string1, const String& string2 );
  */
 String operator+( const std::string& string1, const String& string2 );
 
-/**
- * \brief Writes the string \e str to given \e stream
- */
-std::ostream& operator<<( std::ostream& stream, const String& str );
-
 /**
  * \brief Converts \e value of type \e T to a String.
  * 
diff --git a/src/TNL/String.hpp b/src/TNL/String.hpp
index 4cdeee7aceb62e93b27a91655938df494c9e3666..3c38fe6b0ca24a58242461788b2b417a94ab6a1e 100644
--- a/src/TNL/String.hpp
+++ b/src/TNL/String.hpp
@@ -19,11 +19,6 @@
 
 namespace TNL {
 
-inline String String::getType()
-{
-   return String( "String" );
-}
-
 inline int String::getLength() const
 {
    return getSize();
@@ -243,12 +238,6 @@ inline String operator+( const std::string& string1, const String& string2 )
    return String( string1 ) + string2;
 }
 
-inline std::ostream& operator<<( std::ostream& stream, const String& str )
-{
-   stream << str.getString();
-   return stream;
-}
-
 #ifdef HAVE_MPI
 inline void mpiSend( const String& str, int target, int tag, MPI_Comm mpi_comm )
 {
diff --git a/src/TNL/Devices/SystemInfo.h b/src/TNL/SystemInfo.h
similarity index 95%
rename from src/TNL/Devices/SystemInfo.h
rename to src/TNL/SystemInfo.h
index f62321d6f819303ec4c12d174a71305f26792ac1..e64418a7cc9a056a1748006aa8388ff8ebd28421 100644
--- a/src/TNL/Devices/SystemInfo.h
+++ b/src/TNL/SystemInfo.h
@@ -15,7 +15,6 @@
 #include <TNL/String.h>
 
 namespace TNL {
-namespace Devices {
 
 struct CacheSizes {
    int L1instruction = 0;
@@ -68,7 +67,6 @@ protected:
    }
 };
 
-} // namespace Devices
 } // namespace TNL
 
-#include <TNL/Devices/SystemInfo_impl.h>
+#include <TNL/SystemInfo.hpp>
diff --git a/src/TNL/Devices/SystemInfo_impl.h b/src/TNL/SystemInfo.hpp
similarity index 98%
rename from src/TNL/Devices/SystemInfo_impl.h
rename to src/TNL/SystemInfo.hpp
index 0bc42601191aad2fd8a09fa8080a0295ebe6c075..b46234418df9df58daf9ceb836ddd606d8e170a2 100644
--- a/src/TNL/Devices/SystemInfo_impl.h
+++ b/src/TNL/SystemInfo.hpp
@@ -18,10 +18,9 @@
 #include <sys/utsname.h>
 #include <sys/stat.h>
 
-#include <TNL/Devices/SystemInfo.h>
+#include <TNL/SystemInfo.h>
 
 namespace TNL {
-namespace Devices {
 
 inline String
 SystemInfo::getHostname( void )
@@ -215,5 +214,4 @@ SystemInfo::parseCPUInfo( void )
    return info;
 }
 
-} // namespace Devices
 } // namespace TNL
diff --git a/src/TNL/TypeInfo.h b/src/TNL/TypeInfo.h
new file mode 100644
index 0000000000000000000000000000000000000000..61377fbb8593ce9659c75dc355ad8abfe0838333
--- /dev/null
+++ b/src/TNL/TypeInfo.h
@@ -0,0 +1,107 @@
+/***************************************************************************
+                          TypeInfo.h  -  description
+                             -------------------
+    begin                : Aug 20, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <typeinfo>
+#include <string>
+
+#if defined( __has_include )
+   #if __has_include(<cxxabi.h>)
+      #define TNL_HAS_CXXABI_H
+   #endif
+#elif defined( __GLIBCXX__ ) || defined( __GLIBCPP__ )
+   #define TNL_HAS_CXXABI_H
+#endif
+
+#if defined( TNL_HAS_CXXABI_H )
+   #include <cxxabi.h>  // abi::__cxa_demangle
+   #include <memory>  // std::unique_ptr
+   #include <cstdlib>  // std::free
+#endif
+
+#include <TNL/TypeTraits.h>
+#include <TNL/String.h>
+
+namespace TNL {
+namespace __getType_impl {
+
+inline std::string
+demangle( const char* name )
+{
+#if defined( TNL_HAS_CXXABI_H )
+   int status = 0;
+   std::size_t size = 0;
+   std::unique_ptr<char[], void (*)(void*)> result(
+      abi::__cxa_demangle( name, NULL, &size, &status ),
+      std::free
+   );
+   if( result.get() )
+      return result.get();
+#endif
+   return name;
+}
+
+} // namespace __getType_impl
+
+/**
+ * \brief Returns a human-readable string representation of given type.
+ *
+ * Note that since we use the \ref typeid operator internally, the top-level
+ * cv-qualifiers are always ignored. See https://stackoverflow.com/a/8889143
+ * for details.
+ */
+template< typename T >
+String getType()
+{
+   return __getType_impl::demangle( typeid(T).name() );
+}
+
+/**
+ * \brief Returns a human-readable string representation of given object's type.
+ *
+ * Note that since we use the \ref typeid operator internally, the top-level
+ * cv-qualifiers are always ignored. See https://stackoverflow.com/a/8889143
+ * for details.
+ */
+template< typename T >
+String getType( T&& obj )
+{
+   return __getType_impl::demangle( typeid(obj).name() );
+}
+
+/**
+ * \brief Returns a string identifying a type for the purpose of serialization.
+ *
+ * By default, this function returns the same string as \ref getType. However,
+ * if a user-defined class has a static \e getSerializationType method, it is
+ * called instead. This is useful for overriding the default \ref typeid name,
+ * which may be necessary e.g. for class templates which should have the same
+ * serialization type for multiple devices.
+ */
+template< typename T,
+          std::enable_if_t< ! HasStaticGetSerializationType< T >::value, bool > = true >
+String getSerializationType()
+{
+   return getType< T >();
+}
+
+/**
+ * \brief Specialization of \ref getSerializationType for types which provide a
+ *        static \e getSerializationType method to override the default behaviour.
+ */
+template< typename T,
+          std::enable_if_t< HasStaticGetSerializationType< T >::value, bool > = true >
+String getSerializationType()
+{
+   return T::getSerializationType();
+}
+
+} // namespace TNL
diff --git a/src/TNL/TypeTraits.h b/src/TNL/TypeTraits.h
index d34f7d39fc223dc9a2b351c3885557357741f656..d617f2b42454bce2fad7a5d2ff69b685574723c9 100644
--- a/src/TNL/TypeTraits.h
+++ b/src/TNL/TypeTraits.h
@@ -76,7 +76,6 @@ public:
     static constexpr bool value = type::value;
 };
 
-
 /**
  * \brief Type trait for checking if T has operator[] taking one index argument.
  */
@@ -183,4 +182,31 @@ struct IsViewType
             std::is_same< typename T::ViewType, T >::value >
 {};
 
+/**
+ * \brief Type trait for checking if T has a static getSerializationType method.
+ */
+template< typename T >
+class HasStaticGetSerializationType
+{
+private:
+   template< typename U >
+   static constexpr auto check(U*)
+   -> typename
+      std::enable_if_t<
+         ! std::is_same<
+               decltype( U::getSerializationType() ),
+               void
+            >::value,
+         std::true_type
+      >;
+
+   template< typename >
+   static constexpr std::false_type check(...);
+
+   using type = decltype(check<T>(0));
+
+public:
+    static constexpr bool value = type::value;
+};
+
 } //namespace TNL
diff --git a/src/TNL/param-types.h b/src/TNL/param-types.h
deleted file mode 100644
index 228b742793243624e4c9c4d611c1e84e2e77c660..0000000000000000000000000000000000000000
--- a/src/TNL/param-types.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/***************************************************************************
-                          param-types.h  -  description
-                             -------------------
-    begin                : 2009/07/29
-    copyright            : (C) 2009 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <vector>
-#include <type_traits>
-
-#include <TNL/Experimental/Arithmetics/Real.h>
-#include <TNL/String.h>
-
-namespace TNL {
-
-namespace __getType_impl {
-
-template< typename T,
-          bool isEnum = std::is_enum< T >::value >
-struct getTypeHelper
-{
-   static String get() { return T::getType(); }
-};
-
-template<> struct getTypeHelper< void,                 false >{ static String get() { return String( "void" ); }; };
-template<> struct getTypeHelper< bool,                 false >{ static String get() { return String( "bool" ); }; };
-
-template<> struct getTypeHelper< char,                 false >{ static String get() { return String( "char" ); }; };
-template<> struct getTypeHelper< short int,            false >{ static String get() { return String( "short int" ); }; };
-template<> struct getTypeHelper< int,                  false >{ static String get() { return String( "int" ); }; };
-template<> struct getTypeHelper< long int,             false >{ static String get() { return String( "long int" ); }; };
-
-template<> struct getTypeHelper< unsigned char,        false >{ static String get() { return String( "unsigned char" ); }; };
-template<> struct getTypeHelper< unsigned short,       false >{ static String get() { return String( "unsigned short" ); }; };
-template<> struct getTypeHelper< unsigned int,         false >{ static String get() { return String( "unsigned int" ); }; };
-template<> struct getTypeHelper< unsigned long,        false >{ static String get() { return String( "unsigned long" ); }; };
-
-template<> struct getTypeHelper< signed char,          false >{ static String get() { return String( "signed char" ); }; };
-
-template<> struct getTypeHelper< float,                false >{ static String get() { return String( "float" ); }; };
-template<> struct getTypeHelper< double,               false >{ static String get() { return String( "double" ); }; };
-template<> struct getTypeHelper< long double,          false >{ static String get() { return String( "long double" ); }; };
-template<> struct getTypeHelper< tnlFloat,             false >{ static String get() { return String( "tnlFloat" ); }; };
-template<> struct getTypeHelper< tnlDouble,            false >{ static String get() { return String( "tnlDouble" ); }; };
-
-// const specializations
-template<> struct getTypeHelper< const void,           false >{ static String get() { return String( "const void" ); }; };
-template<> struct getTypeHelper< const bool,           false >{ static String get() { return String( "const bool" ); }; };
-
-template<> struct getTypeHelper< const char,           false >{ static String get() { return String( "const char" ); }; };
-template<> struct getTypeHelper< const short int,      false >{ static String get() { return String( "const short int" ); }; };
-template<> struct getTypeHelper< const int,            false >{ static String get() { return String( "const int" ); }; };
-template<> struct getTypeHelper< const long int,       false >{ static String get() { return String( "const long int" ); }; };
-
-template<> struct getTypeHelper< const unsigned char,  false >{ static String get() { return String( "const unsigned char" ); }; };
-template<> struct getTypeHelper< const unsigned short, false >{ static String get() { return String( "const unsigned short" ); }; };
-template<> struct getTypeHelper< const unsigned int,   false >{ static String get() { return String( "const unsigned int" ); }; };
-template<> struct getTypeHelper< const unsigned long,  false >{ static String get() { return String( "const unsigned long" ); }; };
-
-template<> struct getTypeHelper< const signed char,    false >{ static String get() { return String( "const signed char" ); }; };
-
-template<> struct getTypeHelper< const float,          false >{ static String get() { return String( "const float" ); }; };
-template<> struct getTypeHelper< const double,         false >{ static String get() { return String( "const double" ); }; };
-template<> struct getTypeHelper< const long double,    false >{ static String get() { return String( "const long double" ); }; };
-template<> struct getTypeHelper< const tnlFloat,       false >{ static String get() { return String( "const tnlFloat" ); }; };
-template<> struct getTypeHelper< const tnlDouble,      false >{ static String get() { return String( "const tnlDouble" ); }; };
-
-template< typename T >
-struct getTypeHelper< T, true >
-{
-   static String get() { return getTypeHelper< typename std::underlying_type< T >::type, false >::get(); };
-};
-
-// wrappers for STL containers
-template< typename T >
-struct getTypeHelper< std::vector< T >, false >
-{
-   static String get() { return String( "std::vector< " ) + getTypeHelper< T >::get() + " >"; }
-};
-
-} // namespace __getType_impl
-
-template< typename T >
-String getType() { return __getType_impl::getTypeHelper< T >::get(); }
-
-} // namespace TNL
diff --git a/src/Tools/tnl-dicom-reader.cpp b/src/Tools/tnl-dicom-reader.cpp
index f6931e5f47f21b7ce404e6bc90c384b2aeb749d7..c0f770e497b95ba2758cb852a36a3c3ccf562069 100644
--- a/src/Tools/tnl-dicom-reader.cpp
+++ b/src/Tools/tnl-dicom-reader.cpp
@@ -37,7 +37,7 @@ bool processDicomFiles( const Config::ParameterContainer& parameters )
 
 bool processDicomSeries( const Config::ParameterContainer& parameters )
 {
-   const Containers::List< String >& dicomSeriesNames = parameters.getParameter< Containers::List< String > >( "dicom-series" );
+   const std::vector< String >& dicomSeriesNames = parameters.getParameter< std::vector< String > >( "dicom-series" );
    String meshFile = parameters.getParameter< String >( "mesh-file" );
    bool verbose = parameters.getParameter< bool >( "verbose" );
 
@@ -45,7 +45,7 @@ bool processDicomSeries( const Config::ParameterContainer& parameters )
    GridType grid;
    Containers::Vector< double, Devices::Host, int > vector;
    Images::RegionOfInterest< int > roi;
-   for( int i = 0; i < dicomSeriesNames.getSize(); i++ )
+   for( std::size_t i = 0; i < dicomSeriesNames.size(); i++ )
    {
       const String& seriesName = dicomSeriesNames[ i ];
       std::cout << "Reading a file " << seriesName << std::endl;
diff --git a/src/Tools/tnl-lattice-init.h b/src/Tools/tnl-lattice-init.h
index 203054f58e4afc34323e2fed4ddec25f511cdc00..71a09636c5c5bec19c10e71ce5869bd2a790f9a2 100644
--- a/src/Tools/tnl-lattice-init.h
+++ b/src/Tools/tnl-lattice-init.h
@@ -246,9 +246,9 @@ bool resolveProfileReal( const Config::ParameterContainer& parameters )
       std::cerr << "MeshFunction is required in profile file " << profileFile << "." << std::endl;
       return false;
    }
-   if( parsedMeshFunctionType[ 1 ] != ProfileMesh::getType() )
+   if( parsedMeshFunctionType[ 1 ] != getType< ProfileMesh >() )
    {
-      std::cerr << "The mesh function in the profile file must be defined on " << ProfileMesh::getType() 
+      std::cerr << "The mesh function in the profile file must be defined on " << getType< ProfileMesh >()
                 << " but it is defined on " << parsedMeshFunctionType[ 1 ] << "." << std::endl;
       return false;
    }
diff --git a/src/Tools/tnl-quickstart/operator-grid-specialization.h.in b/src/Tools/tnl-quickstart/operator-grid-specialization.h.in
index e67c5e007a02f45bce36eb079ae529c27f7dce0f..89146c200933c51e79eac204d4862dd6c0a073c5 100644
--- a/src/Tools/tnl-quickstart/operator-grid-specialization.h.in
+++ b/src/Tools/tnl-quickstart/operator-grid-specialization.h.in
@@ -14,8 +14,6 @@ class {operatorName}< TNL::Meshes::Grid< {meshDimension}, MeshReal, Device, Mesh
       typedef TNL::Functions::MeshFunction< MeshType > MeshFunctionType;
       enum {{ Dimension = MeshType::getMeshDimension() }};
 
-      static TNL::String getType();
-
       template< typename MeshFunction, typename MeshEntity >
       __cuda_callable__
       Real operator()( const MeshFunction& u,
diff --git a/src/Tools/tnl-quickstart/operator-grid-specialization_impl.h.in b/src/Tools/tnl-quickstart/operator-grid-specialization_impl.h.in
index da4da6d635d10d1681d689972d5e93695e47b4dd..ed00005bcff2c159df519fc9493def8b18a238e7 100644
--- a/src/Tools/tnl-quickstart/operator-grid-specialization_impl.h.in
+++ b/src/Tools/tnl-quickstart/operator-grid-specialization_impl.h.in
@@ -1,21 +1,6 @@
 /****
  * {meshDimension}D problem
  */
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-TNL::String
-{operatorName}< TNL::Meshes::Grid< {meshDimension}, MeshReal, Device, MeshIndex >, Real, Index >::
-getType()
-{{
-   return TNL::String( "{operatorName}< " ) +
-          MeshType::getType() + ", " +
-          TNL::getType< Real >() + ", " +
-          TNL::getType< Index >() + " >";
-}}
-
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
diff --git a/src/Tools/tnl-quickstart/problem.h.in b/src/Tools/tnl-quickstart/problem.h.in
index 9006f7cf7c5f1e39e5b59333fe888e5d56b479d5..d72120c1fe54dc34d06f7c49832dee3214772ff7 100644
--- a/src/Tools/tnl-quickstart/problem.h.in
+++ b/src/Tools/tnl-quickstart/problem.h.in
@@ -38,8 +38,6 @@ class {problemBaseName}Problem:
 
       using CommunicatorType = Communicator;
 
-      static TNL::String getTypeStatic();
-
       TNL::String getPrologHeader() const;
 
       void writeProlog( TNL::Logger& logger,
diff --git a/src/Tools/tnl-quickstart/problem_impl.h.in b/src/Tools/tnl-quickstart/problem_impl.h.in
index f196ebcec1922b51539ca2f5794ba8b8324be368..3e72e4db125e0b9f6f17628f07539c218ff10907 100644
--- a/src/Tools/tnl-quickstart/problem_impl.h.in
+++ b/src/Tools/tnl-quickstart/problem_impl.h.in
@@ -7,18 +7,6 @@
 #include <TNL/Solvers/PDE/BoundaryConditionsSetter.h>
 #include <TNL/Solvers/PDE/BackwardTimeDiscretisation.h>
 
-template< typename Mesh,
-          typename Communicator,
-          typename BoundaryCondition,
-          typename RightHandSide,
-          typename DifferentialOperator >
-TNL::String
-{problemBaseName}Problem< Mesh, Communicator, BoundaryCondition, RightHandSide, DifferentialOperator >::
-getTypeStatic()
-{{
-   return TNL::String( "{problemBaseName}Problem< " ) + Mesh :: getTypeStatic() + " >";
-}}
-
 template< typename Mesh,
           typename Communicator,
           typename BoundaryCondition,
diff --git a/src/Tools/tnl-view.h b/src/Tools/tnl-view.h
index cd7cd93b9a2f0729cd8e9fd1e3628e6a5e58f6dd..7e7b82bbfc8165245fad06e1e97dad072e621442 100644
--- a/src/Tools/tnl-view.h
+++ b/src/Tools/tnl-view.h
@@ -52,7 +52,7 @@ bool writeMeshFunction( const typename MeshFunction::MeshPointer& meshPointer,
 {
 
    MeshFunction function( meshPointer );
-   std::cout << "Mesh function: " << function.getType() << std::endl;
+   std::cout << "Mesh function: " << getType( function ) << std::endl;
    try
    {
       function.load( inputFileName );
@@ -84,7 +84,7 @@ bool writeVectorField( const typename VectorField::FunctionType::MeshPointer& me
 {
 
    VectorField field( meshPointer );
-   std::cout << "VectorField: " << field.getType() << std::endl;
+   std::cout << "VectorField: " << getType( field ) << std::endl;
    try
    {
       field.load( inputFileName );
diff --git a/src/UnitTests/Algorithms/CMakeLists.txt b/src/UnitTests/Algorithms/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6870bc84e402f924e48f24f7e95fe8d52dac9434
--- /dev/null
+++ b/src/UnitTests/Algorithms/CMakeLists.txt
@@ -0,0 +1,29 @@
+IF( BUILD_CUDA )
+   CUDA_ADD_EXECUTABLE( MemoryOperationsTest MemoryOperationsTest.cu
+                        OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MemoryOperationsTest ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( MultireductionTest MultireductionTest.cu
+                        OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MultireductionTest ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( ParallelForTest ParallelForTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( ParallelForTest ${GTEST_BOTH_LIBRARIES} )
+ELSE( BUILD_CUDA )
+   ADD_EXECUTABLE( MemoryOperationsTest MemoryOperationsTest.cpp )
+   TARGET_COMPILE_OPTIONS( MemoryOperationsTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MemoryOperationsTest ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( MultireductionTest MultireductionTest.cpp )
+   TARGET_COMPILE_OPTIONS( MultireductionTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MultireductionTest ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( ParallelForTest ParallelForTest.cpp )
+   TARGET_COMPILE_OPTIONS( ParallelForTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( ParallelForTest ${GTEST_BOTH_LIBRARIES} )
+ENDIF( BUILD_CUDA )
+
+
+ADD_TEST( MemoryOperationsTest ${EXECUTABLE_OUTPUT_PATH}/MemoryOperationsTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( MultireductionTest ${EXECUTABLE_OUTPUT_PATH}/MultireductionTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( ParallelForTest ${EXECUTABLE_OUTPUT_PATH}/ParallelForTest${CMAKE_EXECUTABLE_SUFFIX} )
diff --git a/src/UnitTests/Algorithms/MemoryOperationsTest.cpp b/src/UnitTests/Algorithms/MemoryOperationsTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..976447eef2447bf0fee4f7580f106ae561aa9168
--- /dev/null
+++ b/src/UnitTests/Algorithms/MemoryOperationsTest.cpp
@@ -0,0 +1 @@
+#include "MemoryOperationsTest.h"
diff --git a/src/UnitTests/Algorithms/MemoryOperationsTest.cu b/src/UnitTests/Algorithms/MemoryOperationsTest.cu
new file mode 100644
index 0000000000000000000000000000000000000000..976447eef2447bf0fee4f7580f106ae561aa9168
--- /dev/null
+++ b/src/UnitTests/Algorithms/MemoryOperationsTest.cu
@@ -0,0 +1 @@
+#include "MemoryOperationsTest.h"
diff --git a/src/UnitTests/Containers/ArrayOperationsTest.h b/src/UnitTests/Algorithms/MemoryOperationsTest.h
similarity index 57%
rename from src/UnitTests/Containers/ArrayOperationsTest.h
rename to src/UnitTests/Algorithms/MemoryOperationsTest.h
index 4a48261be0401eed2231a007d7e68dfed711cb2b..ebfb01f1bf62144d2ff950c4d3265cc7474dab3b 100644
--- a/src/UnitTests/Containers/ArrayOperationsTest.h
+++ b/src/UnitTests/Algorithms/MemoryOperationsTest.h
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          ArrayOperationsTest.h  -  description
+                          MemoryOperationsTest.h  -  description
                              -------------------
     begin                : Jul 15, 2013
     copyright            : (C) 2013 by Tomas Oberhuber
@@ -13,19 +13,19 @@
 #ifdef HAVE_GTEST
 #include <TNL/Allocators/Host.h>
 #include <TNL/Allocators/Cuda.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
+#include <TNL/Algorithms/MemoryOperations.h>
+#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
 
 #include "gtest/gtest.h"
 
 using namespace TNL;
-using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 constexpr int ARRAY_TEST_SIZE = 5000;
 
 // test fixture for typed tests
 template< typename Value >
-class ArrayOperationsTest : public ::testing::Test
+class MemoryOperationsTest : public ::testing::Test
 {
 protected:
    using ValueType = Value;
@@ -34,9 +34,9 @@ protected:
 // types for which ArrayTest is instantiated
 using ValueTypes = ::testing::Types< short int, int, long, float, double >;
 
-TYPED_TEST_SUITE( ArrayOperationsTest, ValueTypes );
+TYPED_TEST_SUITE( MemoryOperationsTest, ValueTypes );
 
-TYPED_TEST( ArrayOperationsTest, allocateMemory_host )
+TYPED_TEST( MemoryOperationsTest, allocateMemory_host )
 {
    using ValueType = typename TestFixture::ValueType;
    using Allocator = Allocators::Host< ValueType >;
@@ -48,7 +48,7 @@ TYPED_TEST( ArrayOperationsTest, allocateMemory_host )
    allocator.deallocate( data, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, setElement_host )
+TYPED_TEST( MemoryOperationsTest, setElement_host )
 {
    using ValueType = typename TestFixture::ValueType;
    using Allocator = Allocators::Host< ValueType >;
@@ -56,27 +56,27 @@ TYPED_TEST( ArrayOperationsTest, setElement_host )
    Allocator allocator;
    ValueType* data = allocator.allocate( ARRAY_TEST_SIZE );
    for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) {
-      ArrayOperations< Devices::Host >::setElement( data + i, (ValueType) i );
+      MemoryOperations< Devices::Host >::setElement( data + i, (ValueType) i );
       EXPECT_EQ( data[ i ], i );
-      EXPECT_EQ( ArrayOperations< Devices::Host >::getElement( data + i ), i );
+      EXPECT_EQ( MemoryOperations< Devices::Host >::getElement( data + i ), i );
    }
    allocator.deallocate( data, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, set_host )
+TYPED_TEST( MemoryOperationsTest, set_host )
 {
    using ValueType = typename TestFixture::ValueType;
    using Allocator = Allocators::Host< ValueType >;
 
    Allocator allocator;
    ValueType* data = allocator.allocate( ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::set( data, (ValueType) 13, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Host >::set( data, (ValueType) 13, ARRAY_TEST_SIZE );
    for( int i = 0; i < ARRAY_TEST_SIZE; i ++ )
       EXPECT_EQ( data[ i ], 13 );
    allocator.deallocate( data, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, copy_host )
+TYPED_TEST( MemoryOperationsTest, copy_host )
 {
    using ValueType = typename TestFixture::ValueType;
    using Allocator = Allocators::Host< ValueType >;
@@ -84,15 +84,15 @@ TYPED_TEST( ArrayOperationsTest, copy_host )
    Allocator allocator;
    ValueType* data1 = allocator.allocate( ARRAY_TEST_SIZE );
    ValueType* data2 = allocator.allocate( ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::set( data1, (ValueType) 13, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::copy< ValueType, ValueType >( data2, data1, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Host >::set( data1, (ValueType) 13, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Host >::copy< ValueType, ValueType >( data2, data1, ARRAY_TEST_SIZE );
    for( int i = 0; i < ARRAY_TEST_SIZE; i ++ )
       EXPECT_EQ( data1[ i ], data2[ i ]);
    allocator.deallocate( data1, ARRAY_TEST_SIZE );
    allocator.deallocate( data2, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, copyWithConversion_host )
+TYPED_TEST( MemoryOperationsTest, copyWithConversion_host )
 {
    using Allocator1 = Allocators::Host< int >;
    using Allocator2 = Allocators::Host< float >;
@@ -101,15 +101,15 @@ TYPED_TEST( ArrayOperationsTest, copyWithConversion_host )
    Allocator2 allocator2;
    int* data1 = allocator1.allocate( ARRAY_TEST_SIZE );
    float* data2 = allocator2.allocate( ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::set( data1, 13, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::copy< float, int >( data2, data1, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Host >::set( data1, 13, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Host >::copy< float, int >( data2, data1, ARRAY_TEST_SIZE );
    for( int i = 0; i < ARRAY_TEST_SIZE; i ++ )
       EXPECT_EQ( data1[ i ], data2[ i ] );
    allocator1.deallocate( data1, ARRAY_TEST_SIZE );
    allocator2.deallocate( data2, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, compare_host )
+TYPED_TEST( MemoryOperationsTest, compare_host )
 {
    using ValueType = typename TestFixture::ValueType;
    using Allocator = Allocators::Host< ValueType >;
@@ -117,16 +117,16 @@ TYPED_TEST( ArrayOperationsTest, compare_host )
    Allocator allocator;
    ValueType* data1 = allocator.allocate( ARRAY_TEST_SIZE );
    ValueType* data2 = allocator.allocate( ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::set( data1, (ValueType) 7, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::set( data2, (ValueType) 0, ARRAY_TEST_SIZE );
-   EXPECT_FALSE( ( ArrayOperations< Devices::Host >::compare< ValueType, ValueType >( data1, data2, ARRAY_TEST_SIZE ) ) );
-   ArrayOperations< Devices::Host >::set( data2, (ValueType) 7, ARRAY_TEST_SIZE );
-   EXPECT_TRUE( ( ArrayOperations< Devices::Host >::compare< ValueType, ValueType >( data1, data2, ARRAY_TEST_SIZE ) ) );
+   MemoryOperations< Devices::Host >::set( data1, (ValueType) 7, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Host >::set( data2, (ValueType) 0, ARRAY_TEST_SIZE );
+   EXPECT_FALSE( ( MemoryOperations< Devices::Host >::compare< ValueType, ValueType >( data1, data2, ARRAY_TEST_SIZE ) ) );
+   MemoryOperations< Devices::Host >::set( data2, (ValueType) 7, ARRAY_TEST_SIZE );
+   EXPECT_TRUE( ( MemoryOperations< Devices::Host >::compare< ValueType, ValueType >( data1, data2, ARRAY_TEST_SIZE ) ) );
    allocator.deallocate( data1, ARRAY_TEST_SIZE );
    allocator.deallocate( data2, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, compareWithConversion_host )
+TYPED_TEST( MemoryOperationsTest, compareWithConversion_host )
 {
    using Allocator1 = Allocators::Host< int >;
    using Allocator2 = Allocators::Host< float >;
@@ -135,16 +135,16 @@ TYPED_TEST( ArrayOperationsTest, compareWithConversion_host )
    Allocator2 allocator2;
    int* data1 = allocator1.allocate( ARRAY_TEST_SIZE );
    float* data2 = allocator2.allocate( ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::set( data1, 7, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::set( data2, (float) 0.0, ARRAY_TEST_SIZE );
-   EXPECT_FALSE( ( ArrayOperations< Devices::Host >::compare< int, float >( data1, data2, ARRAY_TEST_SIZE ) ) );
-   ArrayOperations< Devices::Host >::set( data2, (float) 7.0, ARRAY_TEST_SIZE );
-   EXPECT_TRUE( ( ArrayOperations< Devices::Host >::compare< int, float >( data1, data2, ARRAY_TEST_SIZE ) ) );
+   MemoryOperations< Devices::Host >::set( data1, 7, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Host >::set( data2, (float) 0.0, ARRAY_TEST_SIZE );
+   EXPECT_FALSE( ( MemoryOperations< Devices::Host >::compare< int, float >( data1, data2, ARRAY_TEST_SIZE ) ) );
+   MemoryOperations< Devices::Host >::set( data2, (float) 7.0, ARRAY_TEST_SIZE );
+   EXPECT_TRUE( ( MemoryOperations< Devices::Host >::compare< int, float >( data1, data2, ARRAY_TEST_SIZE ) ) );
    allocator1.deallocate( data1, ARRAY_TEST_SIZE );
    allocator2.deallocate( data2, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, containsValue_host )
+TYPED_TEST( MemoryOperationsTest, containsValue_host )
 {
    using ValueType = typename TestFixture::ValueType;
    using Allocator = Allocators::Host< ValueType >;
@@ -155,14 +155,14 @@ TYPED_TEST( ArrayOperationsTest, containsValue_host )
    for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
       data[ i ] = i % 10;
    for( int i = 0; i < 10; i++ )
-      EXPECT_TRUE( ( ArrayOperations< Devices::Host >::containsValue( data, ARRAY_TEST_SIZE, (ValueType) i ) ) );
+      EXPECT_TRUE( ( MemoryOperations< Devices::Host >::containsValue( data, ARRAY_TEST_SIZE, (ValueType) i ) ) );
    for( int i = 10; i < 20; i++ )
-      EXPECT_FALSE( ( ArrayOperations< Devices::Host >::containsValue( data, ARRAY_TEST_SIZE, (ValueType) i ) ) );
+      EXPECT_FALSE( ( MemoryOperations< Devices::Host >::containsValue( data, ARRAY_TEST_SIZE, (ValueType) i ) ) );
 
    allocator.deallocate( data, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, containsOnlyValue_host )
+TYPED_TEST( MemoryOperationsTest, containsOnlyValue_host )
 {
    using ValueType = typename TestFixture::ValueType;
    using Allocator = Allocators::Host< ValueType >;
@@ -173,18 +173,18 @@ TYPED_TEST( ArrayOperationsTest, containsOnlyValue_host )
    for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
       data[ i ] = i % 10;
    for( int i = 0; i < 20; i++ )
-      EXPECT_FALSE( ( ArrayOperations< Devices::Host >::containsOnlyValue( data, ARRAY_TEST_SIZE, (ValueType) i ) ) );
+      EXPECT_FALSE( ( MemoryOperations< Devices::Host >::containsOnlyValue( data, ARRAY_TEST_SIZE, (ValueType) i ) ) );
 
    for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
       data[ i ] = 10;
-   EXPECT_TRUE( ( ArrayOperations< Devices::Host >::containsOnlyValue( data, ARRAY_TEST_SIZE, (ValueType) 10 ) ) );
+   EXPECT_TRUE( ( MemoryOperations< Devices::Host >::containsOnlyValue( data, ARRAY_TEST_SIZE, (ValueType) 10 ) ) );
 
    allocator.deallocate( data, ARRAY_TEST_SIZE );
 }
 
 
 #ifdef HAVE_CUDA
-TYPED_TEST( ArrayOperationsTest, allocateMemory_cuda )
+TYPED_TEST( MemoryOperationsTest, allocateMemory_cuda )
 {
    using ValueType = typename TestFixture::ValueType;
    using Allocator = Allocators::Cuda< ValueType >;
@@ -198,7 +198,7 @@ TYPED_TEST( ArrayOperationsTest, allocateMemory_cuda )
    ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE );
 }
 
-TYPED_TEST( ArrayOperationsTest, setElement_cuda )
+TYPED_TEST( MemoryOperationsTest, setElement_cuda )
 {
    using ValueType = typename TestFixture::ValueType;
    using Allocator = Allocators::Cuda< ValueType >;
@@ -208,21 +208,21 @@ TYPED_TEST( ArrayOperationsTest, setElement_cuda )
    ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE );
 
    for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
-      ArrayOperations< Devices::Cuda >::setElement( &data[ i ], (ValueType) i );
+      MemoryOperations< Devices::Cuda >::setElement( &data[ i ], (ValueType) i );
 
    for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
    {
       ValueType d;
       ASSERT_EQ( cudaMemcpy( &d, &data[ i ], sizeof( ValueType ), cudaMemcpyDeviceToHost ), cudaSuccess );
       EXPECT_EQ( d, i );
-      EXPECT_EQ( ArrayOperations< Devices::Cuda >::getElement( &data[ i ] ), i );
+      EXPECT_EQ( MemoryOperations< Devices::Cuda >::getElement( &data[ i ] ), i );
    }
 
    allocator.deallocate( data, ARRAY_TEST_SIZE );
    ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE );
 }
 
-TYPED_TEST( ArrayOperationsTest, set_cuda )
+TYPED_TEST( MemoryOperationsTest, set_cuda )
 {
    using ValueType = typename TestFixture::ValueType;
    using HostAllocator = Allocators::Host< ValueType >;
@@ -232,10 +232,10 @@ TYPED_TEST( ArrayOperationsTest, set_cuda )
    CudaAllocator cudaAllocator;
    ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE );
    ValueType* deviceData = cudaAllocator.allocate( ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::set( hostData, (ValueType) 0, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Cuda >::set( deviceData, (ValueType) 13, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Host >::set( hostData, (ValueType) 0, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Cuda >::set( deviceData, (ValueType) 13, ARRAY_TEST_SIZE );
    ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE );
-   ArrayOperations< Devices::Host, Devices::Cuda >::copy< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE );
+   MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE );
    ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE );
    for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
       EXPECT_EQ( hostData[ i ], 13 );
@@ -243,7 +243,7 @@ TYPED_TEST( ArrayOperationsTest, set_cuda )
    cudaAllocator.deallocate( deviceData, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, copy_cuda )
+TYPED_TEST( MemoryOperationsTest, copy_cuda )
 {
    using ValueType = typename TestFixture::ValueType;
    using HostAllocator = Allocators::Host< ValueType >;
@@ -255,18 +255,18 @@ TYPED_TEST( ArrayOperationsTest, copy_cuda )
    ValueType* hostData2 = hostAllocator.allocate( ARRAY_TEST_SIZE );
    ValueType* deviceData = cudaAllocator.allocate( ARRAY_TEST_SIZE );
    ValueType* deviceData2 = cudaAllocator.allocate( ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::set( hostData, (ValueType) 13, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Cuda, Devices::Host >::copy< ValueType >( deviceData, hostData, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Cuda >::copy< ValueType, ValueType >( deviceData2, deviceData, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host, Devices::Cuda >::copy< ValueType, ValueType >( hostData2, deviceData2, ARRAY_TEST_SIZE );
-   EXPECT_TRUE( ( ArrayOperations< Devices::Host >::compare< ValueType, ValueType >( hostData, hostData2, ARRAY_TEST_SIZE) ) );
+   MemoryOperations< Devices::Host >::set( hostData, (ValueType) 13, ARRAY_TEST_SIZE );
+   MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::copy< ValueType >( deviceData, hostData, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Cuda >::copy< ValueType, ValueType >( deviceData2, deviceData, ARRAY_TEST_SIZE );
+   MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy< ValueType, ValueType >( hostData2, deviceData2, ARRAY_TEST_SIZE );
+   EXPECT_TRUE( ( MemoryOperations< Devices::Host >::compare< ValueType, ValueType >( hostData, hostData2, ARRAY_TEST_SIZE) ) );
    hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE );
    hostAllocator.deallocate( hostData2, ARRAY_TEST_SIZE );
    cudaAllocator.deallocate( deviceData, ARRAY_TEST_SIZE );
    cudaAllocator.deallocate( deviceData2, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, copyWithConversions_cuda )
+TYPED_TEST( MemoryOperationsTest, copyWithConversions_cuda )
 {
    using HostAllocator1 = Allocators::Host< int >;
    using HostAllocator2 = Allocators::Host< double >;
@@ -281,10 +281,10 @@ TYPED_TEST( ArrayOperationsTest, copyWithConversions_cuda )
    double* hostData2 = hostAllocator2.allocate( ARRAY_TEST_SIZE );
    long* deviceData = cudaAllocator1.allocate( ARRAY_TEST_SIZE );
    float* deviceData2 = cudaAllocator2.allocate( ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host >::set( hostData, 13, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Cuda, Devices::Host >::copy< long, int >( deviceData, hostData, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Cuda >::copy< float, long >( deviceData2, deviceData, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Host, Devices::Cuda >::copy< double, float >( hostData2, deviceData2, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Host >::set( hostData, 13, ARRAY_TEST_SIZE );
+   MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::copy< long, int >( deviceData, hostData, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Cuda >::copy< float, long >( deviceData2, deviceData, ARRAY_TEST_SIZE );
+   MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy< double, float >( hostData2, deviceData2, ARRAY_TEST_SIZE );
    for( int i = 0; i < ARRAY_TEST_SIZE; i ++ )
       EXPECT_EQ( hostData[ i ], hostData2[ i ] );
    hostAllocator1.deallocate( hostData, ARRAY_TEST_SIZE );
@@ -293,7 +293,7 @@ TYPED_TEST( ArrayOperationsTest, copyWithConversions_cuda )
    cudaAllocator2.deallocate( deviceData2, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, compare_cuda )
+TYPED_TEST( MemoryOperationsTest, compare_cuda )
 {
    using ValueType = typename TestFixture::ValueType;
    using HostAllocator = Allocators::Host< ValueType >;
@@ -305,25 +305,25 @@ TYPED_TEST( ArrayOperationsTest, compare_cuda )
    ValueType* deviceData = cudaAllocator.allocate( ARRAY_TEST_SIZE );
    ValueType* deviceData2 = cudaAllocator.allocate( ARRAY_TEST_SIZE );
 
-   ArrayOperations< Devices::Host >::set( hostData, (ValueType) 7, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Cuda >::set( deviceData, (ValueType) 8, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Cuda >::set( deviceData2, (ValueType) 9, ARRAY_TEST_SIZE );
-   EXPECT_FALSE(( ArrayOperations< Devices::Host, Devices::Cuda >::compare< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
-   EXPECT_FALSE(( ArrayOperations< Devices::Cuda, Devices::Host >::compare< ValueType, ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
-   EXPECT_FALSE(( ArrayOperations< Devices::Cuda >::compare< ValueType, ValueType >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));
+   MemoryOperations< Devices::Host >::set( hostData, (ValueType) 7, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Cuda >::set( deviceData, (ValueType) 8, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Cuda >::set( deviceData2, (ValueType) 9, ARRAY_TEST_SIZE );
+   EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::compare< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
+   EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::compare< ValueType, ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
+   EXPECT_FALSE(( MemoryOperations< Devices::Cuda >::compare< ValueType, ValueType >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));
 
-   ArrayOperations< Devices::Cuda >::set( deviceData, (ValueType) 7, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Cuda >::set( deviceData2, (ValueType) 7, ARRAY_TEST_SIZE );
-   EXPECT_TRUE(( ArrayOperations< Devices::Host, Devices::Cuda >::compare< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
-   EXPECT_TRUE(( ArrayOperations< Devices::Cuda, Devices::Host >::compare< ValueType, ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
-   EXPECT_TRUE(( ArrayOperations< Devices::Cuda >::compare< ValueType, ValueType >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));
+   MemoryOperations< Devices::Cuda >::set( deviceData, (ValueType) 7, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Cuda >::set( deviceData2, (ValueType) 7, ARRAY_TEST_SIZE );
+   EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::compare< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
+   EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::compare< ValueType, ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
+   EXPECT_TRUE(( MemoryOperations< Devices::Cuda >::compare< ValueType, ValueType >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));
 
    hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE );
    cudaAllocator.deallocate( deviceData, ARRAY_TEST_SIZE );
    cudaAllocator.deallocate( deviceData2, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, compareWithConversions_cuda )
+TYPED_TEST( MemoryOperationsTest, compareWithConversions_cuda )
 {
    using HostAllocator = Allocators::Host< int >;
    using CudaAllocator1 = Allocators::Cuda< float >;
@@ -336,25 +336,25 @@ TYPED_TEST( ArrayOperationsTest, compareWithConversions_cuda )
    float* deviceData = cudaAllocator1.allocate( ARRAY_TEST_SIZE );
    double* deviceData2 = cudaAllocator2.allocate( ARRAY_TEST_SIZE );
 
-   ArrayOperations< Devices::Host >::set( hostData, 7, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Cuda >::set( deviceData, (float) 8, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Cuda >::set( deviceData2, (double) 9, ARRAY_TEST_SIZE );
-   EXPECT_FALSE(( ArrayOperations< Devices::Host, Devices::Cuda >::compare< int, float >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
-   EXPECT_FALSE(( ArrayOperations< Devices::Cuda, Devices::Host >::compare< float, int >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
-   EXPECT_FALSE(( ArrayOperations< Devices::Cuda >::compare< float, double >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));
+   MemoryOperations< Devices::Host >::set( hostData, 7, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Cuda >::set( deviceData, (float) 8, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Cuda >::set( deviceData2, (double) 9, ARRAY_TEST_SIZE );
+   EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::compare< int, float >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
+   EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::compare< float, int >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
+   EXPECT_FALSE(( MemoryOperations< Devices::Cuda >::compare< float, double >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));
 
-   ArrayOperations< Devices::Cuda >::set( deviceData, (float) 7, ARRAY_TEST_SIZE );
-   ArrayOperations< Devices::Cuda >::set( deviceData2, (double) 7, ARRAY_TEST_SIZE );
-   EXPECT_TRUE(( ArrayOperations< Devices::Host, Devices::Cuda >::compare< int, float >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
-   EXPECT_TRUE(( ArrayOperations< Devices::Cuda, Devices::Host >::compare< float, int >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
-   EXPECT_TRUE(( ArrayOperations< Devices::Cuda >::compare< float, double >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));
+   MemoryOperations< Devices::Cuda >::set( deviceData, (float) 7, ARRAY_TEST_SIZE );
+   MemoryOperations< Devices::Cuda >::set( deviceData2, (double) 7, ARRAY_TEST_SIZE );
+   EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::compare< int, float >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
+   EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::compare< float, int >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
+   EXPECT_TRUE(( MemoryOperations< Devices::Cuda >::compare< float, double >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));
 
    hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE );
    cudaAllocator1.deallocate( deviceData, ARRAY_TEST_SIZE );
    cudaAllocator2.deallocate( deviceData2, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, containsValue_cuda )
+TYPED_TEST( MemoryOperationsTest, containsValue_cuda )
 {
    using ValueType = typename TestFixture::ValueType;
    using HostAllocator = Allocators::Host< ValueType >;
@@ -367,18 +367,18 @@ TYPED_TEST( ArrayOperationsTest, containsValue_cuda )
 
    for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
       hostData[ i ] = i % 10;
-   ArrayOperations< Devices::Cuda, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE );
+   MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE );
 
    for( int i = 0; i < 10; i++ )
-      EXPECT_TRUE( ( ArrayOperations< Devices::Cuda >::containsValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) );
+      EXPECT_TRUE( ( MemoryOperations< Devices::Cuda >::containsValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) );
    for( int i = 10; i < 20; i++ )
-      EXPECT_FALSE( ( ArrayOperations< Devices::Cuda >::containsValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) );
+      EXPECT_FALSE( ( MemoryOperations< Devices::Cuda >::containsValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) );
 
    hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE );
    cudaAllocator.deallocate( deviceData, ARRAY_TEST_SIZE );
 }
 
-TYPED_TEST( ArrayOperationsTest, containsOnlyValue_cuda )
+TYPED_TEST( MemoryOperationsTest, containsOnlyValue_cuda )
 {
    using ValueType = typename TestFixture::ValueType;
    using HostAllocator = Allocators::Host< ValueType >;
@@ -391,16 +391,16 @@ TYPED_TEST( ArrayOperationsTest, containsOnlyValue_cuda )
 
    for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
       hostData[ i ] = i % 10;
-   ArrayOperations< Devices::Cuda, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE );
+   MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE );
 
    for( int i = 0; i < 20; i++ )
-      EXPECT_FALSE( ( ArrayOperations< Devices::Cuda >::containsOnlyValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) );
+      EXPECT_FALSE( ( MemoryOperations< Devices::Cuda >::containsOnlyValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) );
 
    for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
       hostData[ i ] = 10;
-   ArrayOperations< Devices::Cuda, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE );
+   MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE );
 
-   EXPECT_TRUE( ( ArrayOperations< Devices::Cuda >::containsOnlyValue( deviceData, ARRAY_TEST_SIZE, (ValueType) 10 ) ) );
+   EXPECT_TRUE( ( MemoryOperations< Devices::Cuda >::containsOnlyValue( deviceData, ARRAY_TEST_SIZE, (ValueType) 10 ) ) );
 
    hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE );
    cudaAllocator.deallocate( deviceData, ARRAY_TEST_SIZE );
diff --git a/src/UnitTests/Containers/MultireductionTest.cpp b/src/UnitTests/Algorithms/MultireductionTest.cpp
similarity index 100%
rename from src/UnitTests/Containers/MultireductionTest.cpp
rename to src/UnitTests/Algorithms/MultireductionTest.cpp
diff --git a/src/UnitTests/Containers/MultireductionTest.cu b/src/UnitTests/Algorithms/MultireductionTest.cu
similarity index 100%
rename from src/UnitTests/Containers/MultireductionTest.cu
rename to src/UnitTests/Algorithms/MultireductionTest.cu
diff --git a/src/UnitTests/Containers/MultireductionTest.h b/src/UnitTests/Algorithms/MultireductionTest.h
similarity index 92%
rename from src/UnitTests/Containers/MultireductionTest.h
rename to src/UnitTests/Algorithms/MultireductionTest.h
index 7a321f5836cb2e0b2737c6176eb0b23794c4a501..ec674d935d579ed76b6ba4afc30b1343ca017c51 100644
--- a/src/UnitTests/Containers/MultireductionTest.h
+++ b/src/UnitTests/Algorithms/MultireductionTest.h
@@ -15,11 +15,11 @@
 
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/VectorView.h>
-#include <TNL/Containers/Algorithms/Multireduction.h>
+#include <TNL/Algorithms/Multireduction.h>
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
+using namespace TNL::Algorithms;
 
 template< typename View >
 void setLinearSequence( View& deviceVector )
@@ -50,8 +50,8 @@ class MultireductionTest : public ::testing::Test
 protected:
    using DeviceVector = Vector;
    using DeviceView = VectorView< typename Vector::RealType, typename Vector::DeviceType, typename Vector::IndexType >;
-   using HostVector = typename DeviceVector::HostType;
-   using HostView = typename DeviceView::HostType;
+   using HostVector = typename DeviceVector::template Self< typename DeviceVector::RealType, Devices::Sequential >;
+   using HostView = typename DeviceView::template Self< typename DeviceView::RealType, Devices::Sequential >;
 
    // should be small enough to have fast tests, but larger than minGPUReductionDataSize
    // and large enough to require multiple CUDA blocks for reduction
diff --git a/src/UnitTests/ParallelForTest.cpp b/src/UnitTests/Algorithms/ParallelForTest.cpp
similarity index 100%
rename from src/UnitTests/ParallelForTest.cpp
rename to src/UnitTests/Algorithms/ParallelForTest.cpp
diff --git a/src/UnitTests/ParallelForTest.cu b/src/UnitTests/Algorithms/ParallelForTest.cu
similarity index 100%
rename from src/UnitTests/ParallelForTest.cu
rename to src/UnitTests/Algorithms/ParallelForTest.cu
diff --git a/src/UnitTests/ParallelForTest.h b/src/UnitTests/Algorithms/ParallelForTest.h
similarity index 86%
rename from src/UnitTests/ParallelForTest.h
rename to src/UnitTests/Algorithms/ParallelForTest.h
index 95455286e796f536215166c30c8173d52a14e785..aa75fd56093df72bb83b126fde7b3f77e363aa66 100644
--- a/src/UnitTests/ParallelForTest.h
+++ b/src/UnitTests/Algorithms/ParallelForTest.h
@@ -11,7 +11,7 @@
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Containers/Array.h>
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 #ifdef HAVE_GTEST
 #include <gtest/gtest.h>
@@ -38,7 +38,7 @@ TEST( ParallelForTest, 1D_host )
       {
          view[i] = i;
       };
-      ParallelFor< Devices::Host >::exec( 0, size, kernel );
+      Algorithms::ParallelFor< Devices::Host >::exec( 0, size, kernel );
 
       if( a != expected ) {
          for (int i = 0; i < size; i++)
@@ -65,7 +65,7 @@ TEST( ParallelForTest, 2D_host )
       {
          view[i] = i;
       };
-      ParallelFor2D< Devices::Host >::exec( 0, 0, size, 1, kernel1 );
+      Algorithms::ParallelFor2D< Devices::Host >::exec( 0, 0, size, 1, kernel1 );
 
       if( a != expected ) {
          for (int i = 0; i < size; i++)
@@ -77,7 +77,7 @@ TEST( ParallelForTest, 2D_host )
       {
          view[j] = j;
       };
-      ParallelFor2D< Devices::Host >::exec( 0, 0, 1, size, kernel2 );
+      Algorithms::ParallelFor2D< Devices::Host >::exec( 0, 0, 1, size, kernel2 );
 
       if( a != expected ) {
          for (int i = 0; i < size; i++)
@@ -104,7 +104,7 @@ TEST( ParallelForTest, 3D_host )
       {
          view[i] = i;
       };
-      ParallelFor3D< Devices::Host >::exec( 0, 0, 0, size, 1, 1, kernel1 );
+      Algorithms::ParallelFor3D< Devices::Host >::exec( 0, 0, 0, size, 1, 1, kernel1 );
 
       if( a != expected ) {
          for (int i = 0; i < size; i++)
@@ -116,7 +116,7 @@ TEST( ParallelForTest, 3D_host )
       {
          view[j] = j;
       };
-      ParallelFor3D< Devices::Host >::exec( 0, 0, 0, 1, size, 1, kernel2 );
+      Algorithms::ParallelFor3D< Devices::Host >::exec( 0, 0, 0, 1, size, 1, kernel2 );
 
       if( a != expected ) {
          for (int i = 0; i < size; i++)
@@ -128,7 +128,7 @@ TEST( ParallelForTest, 3D_host )
       {
          view[k] = k;
       };
-      ParallelFor3D< Devices::Host >::exec( 0, 0, 0, 1, 1, size, kernel3 );
+      Algorithms::ParallelFor3D< Devices::Host >::exec( 0, 0, 0, 1, 1, size, kernel3 );
 
       if( a != expected ) {
          for (int i = 0; i < size; i++)
@@ -158,7 +158,7 @@ void test_1D_cuda()
       {
          view[i] = i;
       };
-      ParallelFor< Devices::Cuda >::exec( 0, size, kernel );
+      Algorithms::ParallelFor< Devices::Cuda >::exec( 0, size, kernel );
 
       ArrayHost ah;
       ah = a;
@@ -194,7 +194,7 @@ void test_2D_cuda()
       {
          view[i] = i;
       };
-      ParallelFor2D< Devices::Cuda >::exec( 0, 0, size, 1, kernel1 );
+      Algorithms::ParallelFor2D< Devices::Cuda >::exec( 0, 0, size, 1, kernel1 );
 
       ArrayHost ah;
       ah = a;
@@ -208,7 +208,7 @@ void test_2D_cuda()
       {
          view[j] = j;
       };
-      ParallelFor2D< Devices::Cuda >::exec( 0, 0, 1, size, kernel2 );
+      Algorithms::ParallelFor2D< Devices::Cuda >::exec( 0, 0, 1, size, kernel2 );
 
       ah = a;
       if( ah != expected ) {
@@ -243,7 +243,7 @@ void test_3D_cuda()
       {
          view[i] = i;
       };
-      ParallelFor3D< Devices::Cuda >::exec( 0, 0, 0, size, 1, 1, kernel1 );
+      Algorithms::ParallelFor3D< Devices::Cuda >::exec( 0, 0, 0, size, 1, 1, kernel1 );
 
       ArrayHost ah;
       ah = a;
@@ -257,7 +257,7 @@ void test_3D_cuda()
       {
          view[j] = j;
       };
-      ParallelFor3D< Devices::Cuda >::exec( 0, 0, 0, 1, size, 1, kernel2 );
+      Algorithms::ParallelFor3D< Devices::Cuda >::exec( 0, 0, 0, 1, size, 1, kernel2 );
 
       ah = a;
       if( ah != expected ) {
@@ -270,7 +270,7 @@ void test_3D_cuda()
       {
          view[k] = k;
       };
-      ParallelFor3D< Devices::Cuda >::exec( 0, 0, 0, 1, 1, size, kernel3 );
+      Algorithms::ParallelFor3D< Devices::Cuda >::exec( 0, 0, 0, 1, 1, size, kernel3 );
 
       ah = a;
       if( ah != expected ) {
@@ -287,4 +287,4 @@ TEST( ParallelForTest, 3D_cuda )
 #endif
 #endif
 
-#include "main.h"
+#include "../main.h"
diff --git a/src/UnitTests/AllocatorsTest.h b/src/UnitTests/AllocatorsTest.h
index 5434a495085d38ad345f0ece4f530aba2184a6ba..16438e082be4c8cf380f7bd2e2e935c0761e2c2f 100644
--- a/src/UnitTests/AllocatorsTest.h
+++ b/src/UnitTests/AllocatorsTest.h
@@ -15,7 +15,7 @@
 #include <TNL/Allocators/Cuda.h>
 #include <TNL/Allocators/CudaHost.h>
 #include <TNL/Allocators/CudaManaged.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
+#include <TNL/Algorithms/MemoryOperations.h>
 
 #include "gtest/gtest.h"
 
@@ -83,7 +83,7 @@ TYPED_TEST( AllocatorsTest, CudaManaged )
    ASSERT_NE( data, nullptr );
 
    // set data on the device
-   Containers::Algorithms::ArrayOperations< Devices::Cuda >::set( data, (ValueType) 0, ARRAY_TEST_SIZE );
+   Algorithms::MemoryOperations< Devices::Cuda >::set( data, (ValueType) 0, ARRAY_TEST_SIZE );
    ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE );
 
    // check values on the host
@@ -103,7 +103,7 @@ TYPED_TEST( AllocatorsTest, Cuda )
    ASSERT_NE( data, nullptr );
 
    // set data on the device
-   Containers::Algorithms::ArrayOperations< Devices::Cuda >::set( data, (ValueType) 0, ARRAY_TEST_SIZE );
+   Algorithms::MemoryOperations< Devices::Cuda >::set( data, (ValueType) 0, ARRAY_TEST_SIZE );
    ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE );
 
    allocator.deallocate( data, ARRAY_TEST_SIZE );
diff --git a/src/UnitTests/AssertCudaTest.cu b/src/UnitTests/AssertCudaTest.cu
index 9d4865eb9c8ba3b7aaa6f8bf5506fabe17be4483..8f42da6772dc2b4b6dbe3f185ca96bc8efde9290 100644
--- a/src/UnitTests/AssertCudaTest.cu
+++ b/src/UnitTests/AssertCudaTest.cu
@@ -13,7 +13,7 @@
 #endif
 
 #include <TNL/Assert.h>
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Cuda/CheckDevice.h>
 #include <TNL/Exceptions/CudaRuntimeError.h>
 
 #ifdef HAVE_GTEST
diff --git a/src/UnitTests/CMakeLists.txt b/src/UnitTests/CMakeLists.txt
index a9fdeab528afa901f91eb5dbd23a35ed1bf018ef..6bfae47e1604e174272b19c1a0e5cfbe109c7dcc 100644
--- a/src/UnitTests/CMakeLists.txt
+++ b/src/UnitTests/CMakeLists.txt
@@ -44,19 +44,14 @@ ADD_EXECUTABLE( ObjectTest ObjectTest.cpp )
 TARGET_COMPILE_OPTIONS( ObjectTest PRIVATE ${CXX_TESTS_FLAGS} )
 TARGET_LINK_LIBRARIES( ObjectTest ${GTEST_BOTH_LIBRARIES} )
 
-if( BUILD_CUDA )
-   CUDA_ADD_EXECUTABLE( ParallelForTest ParallelForTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( ParallelForTest ${GTEST_BOTH_LIBRARIES} )
-else()
-   ADD_EXECUTABLE( ParallelForTest ParallelForTest.cpp )
-   TARGET_COMPILE_OPTIONS( ParallelForTest PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( ParallelForTest ${GTEST_BOTH_LIBRARIES} )
-endif()
-
 ADD_EXECUTABLE( TimerTest TimerTest.cpp )
 TARGET_COMPILE_OPTIONS( TimerTest PRIVATE ${CXX_TESTS_FLAGS} )
 TARGET_LINK_LIBRARIES( TimerTest ${GTEST_BOTH_LIBRARIES} )
 
+ADD_EXECUTABLE( TypeInfoTest TypeInfoTest.cpp )
+TARGET_COMPILE_OPTIONS( TypeInfoTest PRIVATE ${CXX_TESTS_FLAGS} )
+TARGET_LINK_LIBRARIES( TypeInfoTest ${GTEST_BOTH_LIBRARIES} )
+
 ADD_TEST( AssertTest ${EXECUTABLE_OUTPUT_PATH}/AssertTest${CMAKE_EXECUTABLE_SUFFIX} )
 if( BUILD_CUDA )
    ADD_TEST( AssertCudaTest ${EXECUTABLE_OUTPUT_PATH}/AssertCudaTest${CMAKE_EXECUTABLE_SUFFIX} )
@@ -65,5 +60,5 @@ ADD_TEST( AllocatorsTest ${EXECUTABLE_OUTPUT_PATH}/AllocatorsTest${CMAKE_EXECUTA
 ADD_TEST( FileTest ${EXECUTABLE_OUTPUT_PATH}/FileTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( StringTest ${EXECUTABLE_OUTPUT_PATH}/StringTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( ObjectTest ${EXECUTABLE_OUTPUT_PATH}/ObjectTest${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( ParallelForTest ${EXECUTABLE_OUTPUT_PATH}/ParallelForTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( TimerTest ${EXECUTABLE_OUTPUT_PATH}/TimerTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( TypeInfoTest ${EXECUTABLE_OUTPUT_PATH}/TypeInfoTest${CMAKE_EXECUTABLE_SUFFIX} )
diff --git a/src/UnitTests/Containers/ArrayOperationsTest.cpp b/src/UnitTests/Containers/ArrayOperationsTest.cpp
deleted file mode 100644
index c499a61b2cb1b50eebf9fc7fedacf56b9c7cb68a..0000000000000000000000000000000000000000
--- a/src/UnitTests/Containers/ArrayOperationsTest.cpp
+++ /dev/null
@@ -1,11 +0,0 @@
-/***************************************************************************
-                          ArrayOperationsTest.cpp  -  description
-                             -------------------
-    begin                : Jul 15, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include "ArrayOperationsTest.h"
diff --git a/src/UnitTests/Containers/ArrayOperationsTest.cu b/src/UnitTests/Containers/ArrayOperationsTest.cu
deleted file mode 100644
index 497b40f178e5030838f4d871edfb1bed61a14fde..0000000000000000000000000000000000000000
--- a/src/UnitTests/Containers/ArrayOperationsTest.cu
+++ /dev/null
@@ -1,11 +0,0 @@
-/***************************************************************************
-                          ArrayOperationsTest.cu  -  description
-                             -------------------
-    begin                : Jul 16, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include "ArrayOperationsTest.h"
diff --git a/src/UnitTests/Containers/ArrayTest.h b/src/UnitTests/Containers/ArrayTest.h
index a18471a4a8c24447b48fe673627471aa28e639f6..ef3119365f50444d89154e487a65a12464849062 100644
--- a/src/UnitTests/Containers/ArrayTest.h
+++ b/src/UnitTests/Containers/ArrayTest.h
@@ -15,6 +15,7 @@
 
 #include <TNL/Containers/Array.h>
 #include <TNL/Containers/Vector.h>
+#include <TNL/Pointers/DevicePointer.h>
 
 #include "gtest/gtest.h"
 
@@ -45,11 +46,6 @@ struct MyData
    // operator used in tests, not necessary for Array to work
    template< typename T >
    bool operator==( T v ) const { return data == v; }
-
-   static String getType()
-   {
-      return String( "MyData" );
-   }
 };
 
 std::ostream& operator<<( std::ostream& str, const MyData& v )
@@ -101,27 +97,6 @@ using ArrayTypes = ::testing::Types<
    Array< float,  Devices::Cuda, long >,
    Array< double, Devices::Cuda, long >,
    Array< MyData, Devices::Cuda, long >
-#endif
-#ifdef HAVE_MIC
-   ,
-   Array< int,    Devices::MIC, short >,
-   Array< long,   Devices::MIC, short >,
-   Array< float,  Devices::MIC, short >,
-   Array< double, Devices::MIC, short >,
-   // TODO: MyData does not work on MIC
-//   Array< MyData, Devices::MIC, short >,
-   Array< int,    Devices::MIC, int >,
-   Array< long,   Devices::MIC, int >,
-   Array< float,  Devices::MIC, int >,
-   Array< double, Devices::MIC, int >,
-   // TODO: MyData does not work on MIC
-//   Array< MyData, Devices::MIC, int >,
-   Array< int,    Devices::MIC, long >,
-   Array< long,   Devices::MIC, long >,
-   Array< float,  Devices::MIC, long >,
-   Array< double, Devices::MIC, long >
-   // TODO: MyData does not work on MIC
-//   Array< MyData, Devices::MIC, long >
 #endif
 
    // all array tests should also work with Vector
@@ -136,11 +111,6 @@ using ArrayTypes = ::testing::Types<
    Vector< float,  Devices::Cuda, long >,
    Vector< double, Devices::Cuda, long >
 #endif
-#ifdef HAVE_MIC
-   ,
-   Vector< float,  Devices::MIC, long >,
-   Vector< double, Devices::MIC, long >
-#endif
 >;
 
 TYPED_TEST_SUITE( ArrayTest, ArrayTypes );
@@ -343,9 +313,9 @@ void testArrayElementwiseAccess( Array< Value, Devices::Cuda, Index >&& u )
 #ifdef HAVE_CUDA
    u.setSize( 10 );
    using ArrayType = Array< Value, Devices::Cuda, Index >;
-   ArrayType* kernel_u = Devices::Cuda::passToDevice( u );
-   testSetGetElementKernel<<< 1, 16 >>>( kernel_u );
-   Devices::Cuda::freeFromDevice( kernel_u );
+   Pointers::DevicePointer< ArrayType > kernel_u( u );
+   testSetGetElementKernel<<< 1, 16 >>>( &kernel_u.template modifyData< Devices::Cuda >() );
+   cudaDeviceSynchronize();
    TNL_CHECK_CUDA_DEVICE;
    for( int i = 0; i < 10; i++ ) {
       EXPECT_EQ( u.getElement( i ), i );
@@ -353,14 +323,6 @@ void testArrayElementwiseAccess( Array< Value, Devices::Cuda, Index >&& u )
 #endif
 }
 
-template< typename Value, typename Index >
-void testArrayElementwiseAccess( Array< Value, Devices::MIC, Index >&& u )
-{
-#ifdef HAVE_MIC
-   // TODO
-#endif
-}
-
 TYPED_TEST( ArrayTest, elementwiseAccess )
 {
    using ArrayType = typename TestFixture::ArrayType;
@@ -405,9 +367,10 @@ TYPED_TEST( ArrayTest, containsOnlyValue )
 TYPED_TEST( ArrayTest, comparisonOperator )
 {
    using ArrayType = typename TestFixture::ArrayType;
+   using HostArrayType = typename ArrayType::template Self< typename ArrayType::ValueType, Devices::Sequential >;
 
    ArrayType u( 10 ), v( 10 ), w( 10 );
-   typename ArrayType::HostType u_host( 10 );
+   HostArrayType u_host( 10 );
    for( int i = 0; i < 10; i ++ ) {
       u.setElement( i, i );
       u_host.setElement( i, i );
@@ -460,9 +423,10 @@ TYPED_TEST( ArrayTest, comparisonOperatorWithDifferentType )
 TYPED_TEST( ArrayTest, assignmentOperator )
 {
    using ArrayType = typename TestFixture::ArrayType;
+   using HostArrayType = typename ArrayType::template Self< typename ArrayType::ValueType, Devices::Sequential >;
 
    ArrayType u( 10 ), v( 10 );
-   typename ArrayType::HostType u_host( 10 );
+   HostArrayType u_host( 10 );
    for( int i = 0; i < 10; i++ ) {
       u.setElement( i, i );
       u_host.setElement( i, i );
@@ -489,10 +453,12 @@ template< typename ArrayType,
           typename = typename std::enable_if< std::is_arithmetic< typename ArrayType::ValueType >::value >::type >
 void testArrayAssignmentWithDifferentType()
 {
+   using HostArrayType = typename ArrayType::template Self< typename ArrayType::ValueType, Devices::Sequential >;
+
    ArrayType u( 10 );
    Array< short, typename ArrayType::DeviceType, short > v( 10 );
    Array< short, Devices::Host, short > v_host( 10 );
-   typename ArrayType::HostType u_host( 10 );
+   HostArrayType u_host( 10 );
    for( int i = 0; i < 10; i++ ) {
       u.setElement( i, i );
       u_host.setElement( i, i );
diff --git a/src/UnitTests/Containers/ArrayViewTest.h b/src/UnitTests/Containers/ArrayViewTest.h
index 35344eecf1041725b33c84664b8bca2769f3d525..e5a9d5a2091781669d81391f89e9097c4f0b36b5 100644
--- a/src/UnitTests/Containers/ArrayViewTest.h
+++ b/src/UnitTests/Containers/ArrayViewTest.h
@@ -41,11 +41,6 @@ struct MyData
    // operator used in tests, not necessary for Array to work
    template< typename T >
    bool operator==( T v ) const { return data == v; }
-
-   static String getType()
-   {
-      return String( "MyData" );
-   }
 };
 
 std::ostream& operator<<( std::ostream& str, const MyData& v )
@@ -98,27 +93,6 @@ using ViewTypes = ::testing::Types<
    ArrayView< float,  Devices::Cuda, long >,
    ArrayView< double, Devices::Cuda, long >,
    ArrayView< MyData, Devices::Cuda, long >
-#endif
-#ifdef HAVE_MIC
-   ,
-   ArrayView< int,    Devices::MIC, short >,
-   ArrayView< long,   Devices::MIC, short >,
-   ArrayView< float,  Devices::MIC, short >,
-   ArrayView< double, Devices::MIC, short >,
-   // TODO: MyData does not work on MIC
-//   ArrayView< MyData, Devices::MIC, short >,
-   ArrayView< int,    Devices::MIC, int >,
-   ArrayView< long,   Devices::MIC, int >,
-   ArrayView< float,  Devices::MIC, int >,
-   ArrayView< double, Devices::MIC, int >,
-   // TODO: MyData does not work on MIC
-//   ArrayView< MyData, Devices::MIC, int >,
-   ArrayView< int,    Devices::MIC, long >,
-   ArrayView< long,   Devices::MIC, long >,
-   ArrayView< float,  Devices::MIC, long >,
-   ArrayView< double, Devices::MIC, long >,
-   // TODO: MyData does not work on MIC
-//   ArrayView< MyData, Devices::MIC, long >,
 #endif
 
    // all ArrayView tests should also work with VectorView
@@ -133,11 +107,6 @@ using ViewTypes = ::testing::Types<
    VectorView< float,  Devices::Cuda, long >,
    VectorView< double, Devices::Cuda, long >
 #endif
-#ifdef HAVE_MIC
-   ,
-   VectorView< float,  Devices::MIC, long >,
-   VectorView< double, Devices::MIC, long >
-#endif
 >;
 
 TYPED_TEST_SUITE( ArrayViewTest, ViewTypes );
@@ -289,14 +258,6 @@ void testArrayViewElementwiseAccess( Array< Value, Devices::Cuda, Index >&& u )
 #endif
 }
 
-template< typename Value, typename Index >
-void testArrayViewElementwiseAccess( Array< Value, Devices::MIC, Index >&& u )
-{
-#ifdef HAVE_MIC
-   // TODO
-#endif
-}
-
 TYPED_TEST( ArrayViewTest, elementwiseAccess )
 {
    using ArrayType = typename TestFixture::ArrayType;
@@ -375,9 +336,10 @@ TYPED_TEST( ArrayViewTest, comparisonOperator )
 {
    using ArrayType = typename TestFixture::ArrayType;
    using ViewType = typename TestFixture::ViewType;
+   using HostArrayType = typename ArrayType::template Self< typename ArrayType::ValueType, Devices::Sequential >;
 
    ArrayType a( 10 ), b( 10 );
-   typename ArrayType::HostType a_host( 10 );
+   HostArrayType a_host( 10 );
    for( int i = 0; i < 10; i ++ ) {
       a.setElement( i, i );
       a_host.setElement( i, i );
@@ -450,9 +412,11 @@ TYPED_TEST( ArrayViewTest, assignmentOperator )
    using ArrayType = typename TestFixture::ArrayType;
    using ViewType = typename TestFixture::ViewType;
    using ConstViewType = VectorView< const typename ArrayType::ValueType, typename ArrayType::DeviceType, typename ArrayType::IndexType >;
+   using HostArrayType = typename ArrayType::template Self< typename ArrayType::ValueType, Devices::Sequential >;
+   using HostViewType = typename HostArrayType::ViewType;
 
    ArrayType a( 10 ), b( 10 );
-   typename ArrayType::HostType a_host( 10 );
+   HostArrayType a_host( 10 );
    for( int i = 0; i < 10; i++ ) {
       a.setElement( i, i );
       a_host.setElement( i, i );
@@ -460,7 +424,7 @@ TYPED_TEST( ArrayViewTest, assignmentOperator )
 
    ViewType u = a.getView();
    ViewType v = b.getView();
-   typename ViewType::HostType u_host = a_host.getView();
+   HostViewType u_host = a_host.getView();
 
    v.setValue( 0 );
    v = u;
@@ -496,21 +460,25 @@ template< typename ArrayType,
           typename = typename std::enable_if< std::is_arithmetic< typename ArrayType::ValueType >::value >::type >
 void testArrayAssignmentWithDifferentType()
 {
+   using HostArrayType = typename ArrayType::template Self< typename ArrayType::ValueType, Devices::Sequential >;
+
    ArrayType a( 10 );
    Array< short, typename ArrayType::DeviceType, short > b( 10 );
-   Array< short, Devices::Host, short > b_host( 10 );
-   typename ArrayType::HostType a_host( 10 );
+   Array< short, Devices::Sequential, short > b_host( 10 );
+   HostArrayType a_host( 10 );
    for( int i = 0; i < 10; i++ ) {
       a.setElement( i, i );
       a_host.setElement( i, i );
    }
 
    using ViewType = ArrayView< typename ArrayType::ValueType, typename ArrayType::DeviceType, typename ArrayType::IndexType >;
+   using HostViewType = typename ViewType::template Self< typename ViewType::ValueType, Devices::Sequential >;
    ViewType u = a.getView();
-   typename ViewType::HostType u_host( a_host );
+   HostViewType u_host( a_host );
    using ShortViewType = ArrayView< short, typename ArrayType::DeviceType, short >;
+   using HostShortViewType = ArrayView< short, Devices::Sequential, short >;
    ShortViewType v( b );
-   typename ShortViewType::HostType v_host( b_host );
+   HostShortViewType v_host( b_host );
 
    v.setValue( 0 );
    v = u;
diff --git a/src/UnitTests/Containers/CMakeLists.txt b/src/UnitTests/Containers/CMakeLists.txt
index c8cd88af9f3ae8df5109c439aba858bc059bca2d..6ff7570dd3b1a62051ce295180205ddac0675156 100644
--- a/src/UnitTests/Containers/CMakeLists.txt
+++ b/src/UnitTests/Containers/CMakeLists.txt
@@ -1,17 +1,3 @@
-ADD_EXECUTABLE( ListTest ListTest.cpp )
-TARGET_COMPILE_OPTIONS( ListTest PRIVATE ${CXX_TESTS_FLAGS} )
-TARGET_LINK_LIBRARIES( ListTest ${GTEST_BOTH_LIBRARIES} )
-
-IF( BUILD_CUDA )
-   CUDA_ADD_EXECUTABLE( ArrayOperationsTest ArrayOperationsTest.cu
-                        OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( ArrayOperationsTest ${GTEST_BOTH_LIBRARIES} )
-ELSE( BUILD_CUDA )
-   ADD_EXECUTABLE( ArrayOperationsTest ArrayOperationsTest.cpp )
-   TARGET_COMPILE_OPTIONS( ArrayOperationsTest PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( ArrayOperationsTest ${GTEST_BOTH_LIBRARIES} )
-ENDIF( BUILD_CUDA )
-
 ADD_EXECUTABLE( ArrayTest ArrayTest.cpp )
 TARGET_COMPILE_OPTIONS( ArrayTest PRIVATE ${CXX_TESTS_FLAGS} )
 TARGET_LINK_LIBRARIES( ArrayTest ${GTEST_BOTH_LIBRARIES} )
@@ -70,16 +56,6 @@ IF( BUILD_CUDA )
    TARGET_LINK_LIBRARIES( VectorVerticalOperationsTestCuda ${GTEST_BOTH_LIBRARIES} )
 ENDIF( BUILD_CUDA )
 
-IF( BUILD_CUDA )
-   CUDA_ADD_EXECUTABLE( MultireductionTest MultireductionTest.cu
-                        OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( MultireductionTest ${GTEST_BOTH_LIBRARIES} )
-ELSE( BUILD_CUDA )
-   ADD_EXECUTABLE( MultireductionTest MultireductionTest.cpp )
-   TARGET_COMPILE_OPTIONS( MultireductionTest PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( MultireductionTest ${GTEST_BOTH_LIBRARIES} )
-ENDIF( BUILD_CUDA )
-
 ADD_EXECUTABLE( StaticArrayTest StaticArrayTest.cpp )
 TARGET_COMPILE_OPTIONS( StaticArrayTest PRIVATE ${CXX_TESTS_FLAGS} )
 TARGET_LINK_LIBRARIES( StaticArrayTest ${GTEST_BOTH_LIBRARIES} )
@@ -93,8 +69,6 @@ TARGET_COMPILE_OPTIONS( StaticVectorOperationsTest PRIVATE ${CXX_TESTS_FLAGS} )
 TARGET_LINK_LIBRARIES( StaticVectorOperationsTest ${GTEST_BOTH_LIBRARIES} )
 
 
-ADD_TEST( ListTest ${EXECUTABLE_OUTPUT_PATH}/ListTest${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( ArrayOperationsTest ${EXECUTABLE_OUTPUT_PATH}/ArrayOperationsTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( ArrayTest ${EXECUTABLE_OUTPUT_PATH}/ArrayTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( ArrayViewTest ${EXECUTABLE_OUTPUT_PATH}/ArrayViewTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( VectorTest ${EXECUTABLE_OUTPUT_PATH}/VectorTest${CMAKE_EXECUTABLE_SUFFIX} )
@@ -113,7 +87,6 @@ IF( BUILD_CUDA )
    ADD_TEST( VectorUnaryOperationsTestCuda ${EXECUTABLE_OUTPUT_PATH}/VectorUnaryOperationsTestCuda${CMAKE_EXECUTABLE_SUFFIX} )
    ADD_TEST( VectorVerticalOperationsTestCuda ${EXECUTABLE_OUTPUT_PATH}/VectorVerticalOperationsTestCuda${CMAKE_EXECUTABLE_SUFFIX} )
 ENDIF()
-ADD_TEST( MultireductionTest ${EXECUTABLE_OUTPUT_PATH}/MultireductionTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( StaticArrayTest ${EXECUTABLE_OUTPUT_PATH}/StaticArrayTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( StaticVectorTest ${EXECUTABLE_OUTPUT_PATH}/StaticVectorTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( StaticVectorOperationsTest ${EXECUTABLE_OUTPUT_PATH}/StaticVectorOperationsTest${CMAKE_EXECUTABLE_SUFFIX} )
diff --git a/src/UnitTests/Containers/DistributedVectorTest.h b/src/UnitTests/Containers/DistributedVectorTest.h
index b1844be3bcabfe421ea5e2b8ace54425ef75be2a..2a1834f318fa616d25a77ccccbdb68bb1cc016a4 100644
--- a/src/UnitTests/Containers/DistributedVectorTest.h
+++ b/src/UnitTests/Containers/DistributedVectorTest.h
@@ -42,18 +42,19 @@ protected:
    using DistributedVectorType = DistributedVector;
    using VectorViewType = typename DistributedVectorType::LocalViewType;
    using DistributedVectorView = Containers::DistributedVectorView< RealType, DeviceType, IndexType, CommunicatorType >;
+   using HostDistributedVectorType = typename DistributedVectorType::template Self< RealType, Devices::Sequential >;
 
    const typename CommunicatorType::CommunicationGroup group = CommunicatorType::AllGroup;
 
    DistributedVectorType v;
    DistributedVectorView v_view;
-   typename DistributedVectorType::HostType v_host;
+   HostDistributedVectorType v_host;
 
    const int rank = CommunicatorType::GetRank(group);
    const int nproc = CommunicatorType::GetSize(group);
 
    // should be small enough to have fast tests, but large enough to test
-   // prefix-sum with multiple CUDA grids
+   // scan with multiple CUDA grids
    const int globalSize = 10000 * nproc;
 
    DistributedVectorTest()
@@ -79,7 +80,7 @@ using DistributedVectorTypes = ::testing::Types<
 
 TYPED_TEST_SUITE( DistributedVectorTest, DistributedVectorTypes );
 
-TYPED_TEST( DistributedVectorTest, prefixSum )
+TYPED_TEST( DistributedVectorTest, scan )
 {
    using RealType = typename TestFixture::DistributedVectorType::RealType;
    using DeviceType = typename TestFixture::DistributedVectorType::DeviceType;
@@ -96,21 +97,21 @@ TYPED_TEST( DistributedVectorTest, prefixSum )
 
    setConstantSequence( v, 0 );
    v_host = -1;
-   v.prefixSum();
+   v.scan();
    v_host = v;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;
 
    setConstantSequence( v, 1 );
    v_host = -1;
-   v.prefixSum();
+   v.scan();
    v_host = v_view;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i;
 
    setLinearSequence( v );
    v_host = -1;
-   v.prefixSum();
+   v.scan();
    v_host = v;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i;
@@ -118,21 +119,21 @@ TYPED_TEST( DistributedVectorTest, prefixSum )
    // test views
    setConstantSequence( v, 0 );
    v_host = -1;
-   v_view.prefixSum();
+   v_view.scan();
    v_host = v;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;
 
    setConstantSequence( v, 1 );
    v_host = -1;
-   v_view.prefixSum();
+   v_view.scan();
    v_host = v_view;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i;
 
    setLinearSequence( v );
    v_host = -1;
-   v_view.prefixSum();
+   v_view.scan();
    v_host = v;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i;
@@ -146,7 +147,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum )
 
       setConstantSequence( v, 0 );
       v_host = -1;
-      v.prefixSum();
+      v.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
@@ -154,7 +155,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum )
 
       setConstantSequence( v, 1 );
       v_host = -1;
-      v.prefixSum();
+      v.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v_view;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
@@ -162,7 +163,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum )
 
       setLinearSequence( v );
       v_host = -1;
-      v.prefixSum();
+      v.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
@@ -171,7 +172,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum )
       // test views
       setConstantSequence( v, 0 );
       v_host = -1;
-      v_view.prefixSum();
+      v_view.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
@@ -179,7 +180,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum )
 
       setConstantSequence( v, 1 );
       v_host = -1;
-      v_view.prefixSum();
+      v_view.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v_view;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
@@ -187,7 +188,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum )
 
       setLinearSequence( v );
       v_host = -1;
-      v_view.prefixSum();
+      v_view.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
@@ -198,7 +199,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum )
    }
 }
 
-TYPED_TEST( DistributedVectorTest, exclusivePrefixSum )
+TYPED_TEST( DistributedVectorTest, exclusiveScan )
 {
    using RealType = typename TestFixture::DistributedVectorType::RealType;
    using DeviceType = typename TestFixture::DistributedVectorType::DeviceType;
@@ -215,21 +216,21 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum )
 
    setConstantSequence( v, 0 );
    v_host = -1;
-   v.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;
 
    setConstantSequence( v, 1 );
    v_host = -1;
-   v.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v_view;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], i ) << "i = " << i;
 
    setLinearSequence( v );
    v_host = -1;
-   v.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i;
@@ -237,21 +238,21 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum )
    // test views
    setConstantSequence( v, 0 );
    v_host = -1;
-   v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v_view.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;
 
    setConstantSequence( v, 1 );
    v_host = -1;
-   v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v_view.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v_view;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], i ) << "i = " << i;
 
    setLinearSequence( v );
    v_host = -1;
-   v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v_view.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v;
    for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
       EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i;
@@ -265,7 +266,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum )
 
       setConstantSequence( v, 0 );
       v_host = -1;
-      v.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
@@ -273,7 +274,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum )
 
       setConstantSequence( v, 1 );
       v_host = -1;
-      v.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v_view;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
@@ -281,7 +282,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum )
 
       setLinearSequence( v );
       v_host = -1;
-      v.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
@@ -290,7 +291,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum )
       // test views
       setConstantSequence( v, 0 );
       v_host = -1;
-      v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v_view.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
@@ -298,7 +299,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum )
 
       setConstantSequence( v, 1 );
       v_host = -1;
-      v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v_view.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v_view;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
@@ -306,7 +307,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum )
 
       setLinearSequence( v );
       v_host = -1;
-      v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v_view.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
diff --git a/src/UnitTests/Containers/ListTest.cpp b/src/UnitTests/Containers/ListTest.cpp
deleted file mode 100644
index f24e650b43b681dc82b9571800e0c3201b202504..0000000000000000000000000000000000000000
--- a/src/UnitTests/Containers/ListTest.cpp
+++ /dev/null
@@ -1,146 +0,0 @@
-/***************************************************************************
-                          ListTest.cpp  -  description
-                             -------------------
-    begin                : Feb 15, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber et al.
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#ifdef HAVE_GTEST 
-#include <gtest/gtest.h>
-
-#include <TNL/Containers/List.h>
-
-using namespace TNL;
-using namespace TNL::Containers;
-
-// minimal custom data structure usable as ValueType in List
-struct MyData
-{
-   double data;
-
-   __cuda_callable__
-   MyData() : data(0) {}
-
-   template< typename T >
-   __cuda_callable__
-   MyData( T v ) : data(v) {}
-
-   __cuda_callable__
-   bool operator==( const MyData& v ) const { return data == v.data; }
-
-   __cuda_callable__
-   bool operator!=( const MyData& v ) const { return data != v.data; }
-
-   static String getType()
-   {
-      return String( "MyData" );
-   }
-};
-
-std::ostream& operator<<( std::ostream& str, const MyData& v )
-{
-   return str << v.data;
-}
-
-
-// test fixture for typed tests
-template< typename List >
-class ListTest : public ::testing::Test
-{
-protected:
-   using ListType = List;
-};
-
-// types for which ListTest is instantiated
-using ListTypes = ::testing::Types<
-   List< short  >,
-   List< int    >,
-   List< long   >,
-   List< float  >,
-   List< double >,
-   List< MyData >
->;
-
-TYPED_TEST_SUITE( ListTest, ListTypes );
-
-
-TYPED_TEST( ListTest, constructor )
-{
-   using ListType = typename TestFixture::ListType;
-   using ValueType = typename ListType::ValueType;
-
-   ListType list;
-   EXPECT_TRUE( list.isEmpty() );
-   EXPECT_EQ( list.getSize(), 0 );
-
-   list.Append( ( ValueType ) 0 );
-   EXPECT_EQ( list.getSize(), 1 );
-
-   ListType copy( list );
-   list.Append( ( ValueType ) 0 );
-   EXPECT_EQ( list.getSize(), 2 );
-   EXPECT_EQ( copy.getSize(), 1 );
-   EXPECT_EQ( copy[ 0 ], list[ 0 ] );
-}
-
-TYPED_TEST( ListTest, operations )
-{
-   using ListType = typename TestFixture::ListType;
-   using ValueType = typename ListType::ValueType;
-
-   ListType a, b;
-
-   a.Append( (ValueType) 0 );
-   a.Append( (ValueType) 1 );
-   a.Prepend( (ValueType) 2 );
-   a.Insert( (ValueType) 3, 1 );
-   EXPECT_EQ( a.getSize(), 4 );
-   EXPECT_EQ( a[ 0 ], (ValueType) 2 );
-   EXPECT_EQ( a[ 1 ], (ValueType) 3 );
-   EXPECT_EQ( a[ 2 ], (ValueType) 0 );
-   EXPECT_EQ( a[ 3 ], (ValueType) 1 );
-
-   b = a;
-   EXPECT_EQ( b.getSize(), 4 );
-   EXPECT_EQ( a, b );
-
-   b.Insert( ( ValueType ) 4, 4 );
-   EXPECT_NE( a, b );
-   EXPECT_EQ( b[ 4 ], (ValueType) 4 );
-
-   a.AppendList( b );
-   EXPECT_EQ( a.getSize(), 9 );
-   EXPECT_EQ( a[ 0 ], (ValueType) 2 );
-   EXPECT_EQ( a[ 1 ], (ValueType) 3 );
-   EXPECT_EQ( a[ 2 ], (ValueType) 0 );
-   EXPECT_EQ( a[ 3 ], (ValueType) 1 );
-   EXPECT_EQ( a[ 4 ], (ValueType) 2 );
-   EXPECT_EQ( a[ 5 ], (ValueType) 3 );
-   EXPECT_EQ( a[ 6 ], (ValueType) 0 );
-   EXPECT_EQ( a[ 7 ], (ValueType) 1 );
-   EXPECT_EQ( a[ 8 ], (ValueType) 4 );
-
-   a.PrependList( b );
-   EXPECT_EQ( a.getSize(), 14 );
-   EXPECT_EQ( a[ 0 ],  (ValueType) 2 );
-   EXPECT_EQ( a[ 1 ],  (ValueType) 3 );
-   EXPECT_EQ( a[ 2 ],  (ValueType) 0 );
-   EXPECT_EQ( a[ 3 ],  (ValueType) 1 );
-   EXPECT_EQ( a[ 4 ],  (ValueType) 4 );
-   EXPECT_EQ( a[ 5 ],  (ValueType) 2 );
-   EXPECT_EQ( a[ 6 ],  (ValueType) 3 );
-   EXPECT_EQ( a[ 7 ],  (ValueType) 0 );
-   EXPECT_EQ( a[ 8 ],  (ValueType) 1 );
-   EXPECT_EQ( a[ 9 ],  (ValueType) 2 );
-   EXPECT_EQ( a[ 10 ], (ValueType) 3 );
-   EXPECT_EQ( a[ 11 ], (ValueType) 0 );
-   EXPECT_EQ( a[ 12 ], (ValueType) 1 );
-   EXPECT_EQ( a[ 13 ], (ValueType) 4 );
-}
-#endif
-
-
-#include "../main.h"
diff --git a/src/UnitTests/Containers/VectorBinaryOperationsTest.h b/src/UnitTests/Containers/VectorBinaryOperationsTest.h
index 93283483c8a06e89c6e9dfceee81f82e391c5955..bae5ce5f389e703711df651084994c8f39775116 100644
--- a/src/UnitTests/Containers/VectorBinaryOperationsTest.h
+++ b/src/UnitTests/Containers/VectorBinaryOperationsTest.h
@@ -595,8 +595,11 @@ TYPED_TEST( VectorBinaryOperationsTest, comparisonOnDifferentDevices )
 {
    SETUP_BINARY_TEST_ALIASES;
 
-   typename TestFixture::RightVector::HostType _R1_h; _R1_h = this->_R1;
-   typename TestFixture::Right::HostType R1_h( _R1_h );
+   using RightHostVector = typename TestFixture::RightVector::Self< typename TestFixture::RightVector::RealType, Devices::Sequential >;
+   using RightHost = typename TestFixture::Right::Self< typename TestFixture::Right::RealType, Devices::Sequential >;
+
+   RightHostVector _R1_h; _R1_h = this->_R1;
+   RightHost R1_h( _R1_h );
 
    // L1 and L2 are device vectors
    EXPECT_EQ( L1, R1_h );
diff --git a/src/UnitTests/Containers/VectorHelperFunctions.h b/src/UnitTests/Containers/VectorHelperFunctions.h
index b1a596c6a4502effce2f9e0a593edf3a25e4ea23..4e8c64faee6b906076bfcb55aabab9c46960eaf1 100644
--- a/src/UnitTests/Containers/VectorHelperFunctions.h
+++ b/src/UnitTests/Containers/VectorHelperFunctions.h
@@ -9,7 +9,8 @@ void setLinearSequence( Vector& deviceVector )
 #ifdef STATIC_VECTOR
    Vector a;
 #else
-   typename Vector::HostType a;
+   using HostVector = typename Vector::template Self< typename Vector::RealType, TNL::Devices::Host >;
+   HostVector a;
    a.setLike( deviceVector );
 #endif
 #ifdef DISTRIBUTED_VECTOR
@@ -34,7 +35,8 @@ void setConstantSequence( Vector& deviceVector,
 template< typename Vector >
 void setOscilatingLinearSequence( Vector& deviceVector )
 {
-   typename Vector::HostType a;
+   using HostVector = typename Vector::template Self< typename Vector::RealType, TNL::Devices::Host >;
+   HostVector a;
    a.setLike( deviceVector );
    for( int i = 0; i < a.getSize(); i++ )
       a[ i ] = i % 30 - 15;
@@ -45,7 +47,8 @@ template< typename Vector >
 void setOscilatingConstantSequence( Vector& deviceVector,
                                     typename Vector::RealType v )
 {
-   typename Vector::HostType a;
+   using HostVector = typename Vector::template Self< typename Vector::RealType, TNL::Devices::Host >;
+   HostVector a;
    a.setLike( deviceVector );
    for( int i = 0; i < a.getSize(); i++ )
       a[ i ] = TNL::sign( i % 30 - 15 );
@@ -55,7 +58,8 @@ void setOscilatingConstantSequence( Vector& deviceVector,
 template< typename Vector >
 void setNegativeLinearSequence( Vector& deviceVector )
 {
-   typename Vector::HostType a;
+   using HostVector = typename Vector::template Self< typename Vector::RealType, TNL::Devices::Host >;
+   HostVector a;
    a.setLike( deviceVector );
 #ifdef DISTRIBUTED_VECTOR
    for( int i = 0; i < a.getLocalView().getSize(); i++ ) {
@@ -76,7 +80,8 @@ void setOscilatingSequence( Vector& deviceVector,
 #ifdef STATIC_VECTOR
    Vector a;
 #else
-   typename Vector::HostType a;
+   using HostVector = typename Vector::template Self< typename Vector::RealType, TNL::Devices::Host >;
+   HostVector a;
    a.setLike( deviceVector );
 #endif
 #ifdef DISTRIBUTED_VECTOR
diff --git a/src/UnitTests/Containers/VectorPrefixSumTest.h b/src/UnitTests/Containers/VectorPrefixSumTest.h
index be295001ade53ab0be2b1afd9ce8950b333cbf4c..7f2151c5ef15429d549020d076e4fd99681c3b8f 100644
--- a/src/UnitTests/Containers/VectorPrefixSumTest.h
+++ b/src/UnitTests/Containers/VectorPrefixSumTest.h
@@ -17,13 +17,14 @@
 // and large enough to require multiple CUDA blocks for reduction
 constexpr int VECTOR_TEST_SIZE = 10000;
 
-TYPED_TEST( VectorTest, prefixSum )
+TYPED_TEST( VectorTest, scan )
 {
    using VectorType = typename TestFixture::VectorType;
    using ViewType = typename TestFixture::ViewType;
    using RealType = typename VectorType::RealType;
    using DeviceType = typename VectorType::DeviceType;
    using IndexType = typename VectorType::IndexType;
+   using HostVectorType = typename VectorType::template Self< RealType, Devices::Sequential >;
    const int size = VECTOR_TEST_SIZE;
 
    // FIXME: tests should work in all cases
@@ -32,25 +33,25 @@ TYPED_TEST( VectorTest, prefixSum )
 
    VectorType v( size );
    ViewType v_view( v );
-   typename VectorType::HostType v_host( size );
+   HostVectorType v_host( size );
 
    setConstantSequence( v, 0 );
    v_host = -1;
-   v.prefixSum();
+   v.scan();
    v_host = v;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;
 
    setConstantSequence( v, 1 );
    v_host = -1;
-   v.prefixSum();
+   v.scan();
    v_host = v_view;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i;
 
    setLinearSequence( v );
    v_host = -1;
-   v.prefixSum();
+   v.scan();
    v_host = v;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i;
@@ -58,21 +59,21 @@ TYPED_TEST( VectorTest, prefixSum )
    // test views
    setConstantSequence( v, 0 );
    v_host = -1;
-   v_view.prefixSum();
+   v_view.scan();
    v_host = v;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;
 
    setConstantSequence( v, 1 );
    v_host = -1;
-   v_view.prefixSum();
+   v_view.scan();
    v_host = v_view;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i;
 
    setLinearSequence( v );
    v_host = -1;
-   v_view.prefixSum();
+   v_view.scan();
    v_host = v;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i;
@@ -86,7 +87,7 @@ TYPED_TEST( VectorTest, prefixSum )
 
       setConstantSequence( v, 0 );
       v_host = -1;
-      v.prefixSum();
+      v.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = 0; i < size; i++ )
@@ -94,7 +95,7 @@ TYPED_TEST( VectorTest, prefixSum )
 
       setConstantSequence( v, 1 );
       v_host = -1;
-      v.prefixSum();
+      v.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v_view;
       for( int i = 0; i < size; i++ )
@@ -102,7 +103,7 @@ TYPED_TEST( VectorTest, prefixSum )
 
       setLinearSequence( v );
       v_host = -1;
-      v.prefixSum();
+      v.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = 0; i < size; i++ )
@@ -111,7 +112,7 @@ TYPED_TEST( VectorTest, prefixSum )
       // test views
       setConstantSequence( v, 0 );
       v_host = -1;
-      v_view.prefixSum();
+      v_view.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = 0; i < size; i++ )
@@ -119,7 +120,7 @@ TYPED_TEST( VectorTest, prefixSum )
 
       setConstantSequence( v, 1 );
       v_host = -1;
-      v_view.prefixSum();
+      v_view.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v_view;
       for( int i = 0; i < size; i++ )
@@ -127,7 +128,7 @@ TYPED_TEST( VectorTest, prefixSum )
 
       setLinearSequence( v );
       v_host = -1;
-      v_view.prefixSum();
+      v_view.scan();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
       v_host = v;
       for( int i = 0; i < size; i++ )
@@ -145,6 +146,7 @@ TYPED_TEST( VectorTest, exclusiveScan )
    using RealType = typename VectorType::RealType;
    using DeviceType = typename VectorType::DeviceType;
    using IndexType = typename VectorType::IndexType;
+   using HostVectorType = typename VectorType::template Self< RealType, Devices::Sequential >;
    const int size = VECTOR_TEST_SIZE;
 
    // FIXME: tests should work in all cases
@@ -154,25 +156,25 @@ TYPED_TEST( VectorTest, exclusiveScan )
    VectorType v;
    v.setSize( size );
    ViewType v_view( v );
-   typename VectorType::HostType v_host( size );
+   HostVectorType v_host( size );
 
    setConstantSequence( v, 0 );
    v_host = -1;
-   v.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;
 
    setConstantSequence( v, 1 );
    v_host = -1;
-   v.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], i ) << "i = " << i;
 
    setLinearSequence( v );
    v_host = -1;
-   v.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i;
@@ -180,21 +182,21 @@ TYPED_TEST( VectorTest, exclusiveScan )
    // test views
    setConstantSequence( v, 0 );
    v_host = -1;
-   v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v_view.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;
 
    setConstantSequence( v, 1 );
    v_host = -1;
-   v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v_view.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], i ) << "i = " << i;
 
    setLinearSequence( v );
    v_host = -1;
-   v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+   v_view.template scan< Algorithms::ScanType::Exclusive >();
    v_host = v;
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i;
@@ -208,7 +210,7 @@ TYPED_TEST( VectorTest, exclusiveScan )
 
       setConstantSequence( v, 0 );
       v_host = -1;
-      v.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
       v_host = v;
       for( int i = 0; i < size; i++ )
@@ -216,7 +218,7 @@ TYPED_TEST( VectorTest, exclusiveScan )
 
       setConstantSequence( v, 1 );
       v_host = -1;
-      v.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
       v_host = v;
       for( int i = 0; i < size; i++ )
@@ -224,7 +226,7 @@ TYPED_TEST( VectorTest, exclusiveScan )
 
       setLinearSequence( v );
       v_host = -1;
-      v.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
       v_host = v;
       for( int i = 0; i < size; i++ )
@@ -233,7 +235,7 @@ TYPED_TEST( VectorTest, exclusiveScan )
       // test views
       setConstantSequence( v, 0 );
       v_host = -1;
-      v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v_view.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
       v_host = v;
       for( int i = 0; i < size; i++ )
@@ -241,7 +243,7 @@ TYPED_TEST( VectorTest, exclusiveScan )
 
       setConstantSequence( v, 1 );
       v_host = -1;
-      v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v_view.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
       v_host = v;
       for( int i = 0; i < size; i++ )
@@ -249,7 +251,7 @@ TYPED_TEST( VectorTest, exclusiveScan )
 
       setLinearSequence( v );
       v_host = -1;
-      v_view.template prefixSum< Algorithms::ScanType::Exclusive >();
+      v_view.template scan< Algorithms::ScanType::Exclusive >();
       EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
       v_host = v;
       for( int i = 0; i < size; i++ )
@@ -260,7 +262,7 @@ TYPED_TEST( VectorTest, exclusiveScan )
    }
 }
 
-// TODO: test prefix sum with custom begin and end parameters
+// TODO: test scan with custom begin and end parameters
 
 
 template< typename FlagsView >
diff --git a/src/UnitTests/Containers/VectorTestSetup.h b/src/UnitTests/Containers/VectorTestSetup.h
index 5c342dced87f713824344cb43ee1c9922dbf0ef6..c8ec42bea482a1691fca97efecb8342985f8207d 100644
--- a/src/UnitTests/Containers/VectorTestSetup.h
+++ b/src/UnitTests/Containers/VectorTestSetup.h
@@ -76,21 +76,6 @@ using VectorTypes = ::testing::Types<
    //Vector< Quad< float >,  Devices::Cuda, long >,
    //Vector< Quad< double >, Devices::Cuda, long >
 #endif
-#ifdef HAVE_MIC
-   ,
-   Vector< int,    Devices::MIC, short >,
-   Vector< long,   Devices::MIC, short >,
-   Vector< float,  Devices::MIC, short >,
-   Vector< double, Devices::MIC, short >,
-   Vector< int,    Devices::MIC, int >,
-   Vector< long,   Devices::MIC, int >,
-   Vector< float,  Devices::MIC, int >,
-   Vector< double, Devices::MIC, int >,
-   Vector< int,    Devices::MIC, long >,
-   Vector< long,   Devices::MIC, long >,
-   Vector< float,  Devices::MIC, long >,
-   Vector< double, Devices::MIC, long >
-#endif
 >;
 
 TYPED_TEST_SUITE( VectorTest, VectorTypes );
diff --git a/src/UnitTests/Containers/VectorUnaryOperationsTest.h b/src/UnitTests/Containers/VectorUnaryOperationsTest.h
index 1224042532b2c55eb33757e61fe603219840aa61..827147cd53552209f9875370b93ca75d7514728a 100644
--- a/src/UnitTests/Containers/VectorUnaryOperationsTest.h
+++ b/src/UnitTests/Containers/VectorUnaryOperationsTest.h
@@ -168,14 +168,16 @@ TYPED_TEST_SUITE( VectorUnaryOperationsTest, VectorTypes );
       using VectorOrView = typename TestFixture::VectorOrView; \
       using RealType = typename VectorType::RealType;          \
       using ExpectedVector = typename TestFixture::template Vector< decltype(function(RealType{})) >; \
+      using HostVector = typename VectorType::template Self< RealType, Devices::Host >; \
+      using HostExpectedVector = typename ExpectedVector::template Self< decltype(function(RealType{})), Devices::Host >; \
       constexpr int size = _size;                              \
       using CommunicatorType = typename VectorOrView::CommunicatorType; \
       const auto group = CommunicatorType::AllGroup; \
       using LocalRangeType = typename VectorOrView::LocalRangeType; \
       const LocalRangeType localRange = Partitioner< typename VectorOrView::IndexType, CommunicatorType >::splitRange( size, group ); \
                                                                \
-      typename VectorType::HostType _V1h;                      \
-      typename ExpectedVector::HostType expected_h;            \
+      HostVector _V1h;                                         \
+      HostExpectedVector expected_h;                           \
       _V1h.setDistribution( localRange, size, group );         \
       expected_h.setDistribution( localRange, size, group );   \
                                                                \
@@ -209,10 +211,12 @@ TYPED_TEST_SUITE( VectorUnaryOperationsTest, VectorTypes );
       using VectorOrView = typename TestFixture::VectorOrView; \
       using RealType = typename VectorType::RealType;          \
       using ExpectedVector = typename TestFixture::template Vector< decltype(function(RealType{})) >; \
+      using HostVector = typename VectorType::template Self< RealType, Devices::Host >; \
+      using HostExpectedVector = typename ExpectedVector::template Self< decltype(function(RealType{})), Devices::Host >; \
       constexpr int size = _size;                              \
                                                                \
-      typename VectorType::HostType _V1h( size );              \
-      typename ExpectedVector::HostType expected_h( size );    \
+      HostVector _V1h( size );                                 \
+      HostExpectedVector expected_h( size );                   \
                                                                \
       const double h = (double) (end - begin) / size;          \
       for( int i = 0; i < size; i++ )                          \
@@ -254,8 +258,8 @@ void expect_vectors_near( const Left& _v1, const Right& _v2 )
    using LeftVector = Vector< LeftNonConstReal, typename Left::DeviceType, typename Left::IndexType >;
    using RightVector = Vector< RightNonConstReal, typename Right::DeviceType, typename Right::IndexType >;
 #endif
-   using LeftHostVector = typename LeftVector::HostType;
-   using RightHostVector = typename RightVector::HostType;
+   using LeftHostVector = typename LeftVector::template Self< LeftNonConstReal, Devices::Sequential >;
+   using RightHostVector = typename RightVector::template Self< RightNonConstReal, Devices::Sequential >;
 
    // first evaluate expressions
    LeftVector v1; v1 = _v1;
diff --git a/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h b/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h
index 2faf5ba040b71d8003ceb7629a1b9fa7a80870ef..04afb91a4073750ba5583465b85eeda2239f0628 100644
--- a/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h
+++ b/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h
@@ -139,7 +139,7 @@ void test_helper_setValue( DistributedArray& array, BufferView& buffer_view )
    {
       buffer_view[ i - localRange.getBegin() ] = array_view( i );
    };
-   ParallelFor< DeviceType >::exec( localRange.getBegin(), localRange.getEnd(), kernel );
+   Algorithms::ParallelFor< DeviceType >::exec( localRange.getBegin(), localRange.getEnd(), kernel );
 }
 
 TYPED_TEST( DistributedNDArray_1D_test, setValue )
@@ -224,7 +224,7 @@ void test_helper_comparisonOperators( DistributedArray& u, DistributedArray& v,
       v_view( gi ) = gi;
       w_view( gi ) = 2 * gi;
    };
-   ParallelFor< DeviceType >::exec( localRange.getBegin(), localRange.getEnd(), kernel );
+   Algorithms::ParallelFor< DeviceType >::exec( localRange.getBegin(), localRange.getEnd(), kernel );
 }
 
 TYPED_TEST( DistributedNDArray_1D_test, comparisonOperators )
diff --git a/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h b/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h
index 33390a33c8a230d2946f54569e211a4a711713d0..17108509d037be69b78ce7a13bc54edbf37731b3 100644
--- a/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h
+++ b/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h
@@ -181,7 +181,7 @@ void test_helper_comparisonOperators( DistributedArray& u, DistributedArray& v,
       v_view( q, gi, j ) = gi;
       w_view( q, gi, j ) = 2 * gi;
    };
-   ParallelFor3D< DeviceType >::exec( (IndexType) 0, localRange.getBegin(), (IndexType) 0,
+   Algorithms::ParallelFor3D< DeviceType >::exec( (IndexType) 0, localRange.getBegin(), (IndexType) 0,
                                       9, localRange.getEnd(), u.template getSize< 2 >(),
                                       kernel );
 }
diff --git a/src/UnitTests/Containers/ndarray/StaticNDArrayCudaTest.cu b/src/UnitTests/Containers/ndarray/StaticNDArrayCudaTest.cu
index 0a0a83dd83fae72ff0f1b5c349d81ba05ed0da65..5a0561955f85cce3fac6798cd480466e495cc181 100644
--- a/src/UnitTests/Containers/ndarray/StaticNDArrayCudaTest.cu
+++ b/src/UnitTests/Containers/ndarray/StaticNDArrayCudaTest.cu
@@ -2,8 +2,9 @@
 
 #include <TNL/Containers/NDArray.h>
 
-#include <TNL/ParallelFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
+using namespace TNL;
 using namespace TNL::Containers;
 using std::index_sequence;
 
@@ -37,7 +38,7 @@ void __test_SetThroughView()
     };
 
     a.setValue(0);
-    TNL::ParallelFor< TNL::Devices::Cuda >::exec( 0, 1, kernel, a_view );
+    Algorithms::ParallelFor< TNL::Devices::Cuda >::exec( 0, 1, kernel, a_view );
     expect_identity( a.getStorageArray() );
 }
 TEST( StaticNDArrayCudaTest, SetThroughView )
@@ -68,7 +69,7 @@ void __test_CopyFromArray()
     };
 
     a.setValue(0);
-    TNL::ParallelFor< TNL::Devices::Cuda >::exec( 0, 1, kernel, a_view );
+    Algorithms::ParallelFor< TNL::Devices::Cuda >::exec( 0, 1, kernel, a_view );
     expect_identity( a.getStorageArray() );
 }
 TEST( StaticNDArrayCudaTest, CopyFromArray )
diff --git a/src/UnitTests/FileTest.h b/src/UnitTests/FileTest.h
index f376c60f4242922a407ede2063b420b7eb9f4b32..b9f2ee7ef1f73f65ab231583f155f138d8d10626 100644
--- a/src/UnitTests/FileTest.h
+++ b/src/UnitTests/FileTest.h
@@ -61,15 +61,15 @@ TEST( FileTest, WriteAndReadWithConversion )
    int intData[ 3 ];
    File file;
    ASSERT_NO_THROW( file.open( TEST_FILE_NAME, std::ios_base::out | std::ios_base::trunc ) );
-   file.save< double, float, Devices::Host >( doubleData, 3 );
+   file.save< double, float >( doubleData, 3 );
    ASSERT_NO_THROW( file.close() );
 
    ASSERT_NO_THROW( file.open( TEST_FILE_NAME, std::ios_base::in ) );
-   file.load< float, float, Devices::Host >( floatData, 3 );
+   file.load< float, float >( floatData, 3 );
    ASSERT_NO_THROW( file.close() );
 
    ASSERT_NO_THROW( file.open( TEST_FILE_NAME, std::ios_base::in ) );
-   file.load< int, float, Devices::Host >( intData, 3 );
+   file.load< int, float >( intData, 3 );
    ASSERT_NO_THROW( file.close() );
 
    EXPECT_NEAR( floatData[ 0 ], 3.14159, 0.0001 );
@@ -112,9 +112,9 @@ TEST( FileTest, WriteAndReadCUDA )
    File file;
    ASSERT_NO_THROW( file.open( String( TEST_FILE_NAME ), std::ios_base::out ) );
 
-   file.save< int, int, Devices::Cuda >( cudaIntData );
-   file.save< float, float, Devices::Cuda >( cudaFloatData, 3 );
-   file.save< const double, double, Devices::Cuda >( cudaConstDoubleData );
+   file.save< int, int, Allocators::Cuda<int> >( cudaIntData );
+   file.save< float, float, Allocators::Cuda<float> >( cudaFloatData, 3 );
+   file.save< const double, double, Allocators::Cuda<const double> >( cudaConstDoubleData );
    ASSERT_NO_THROW( file.close() );
 
    ASSERT_NO_THROW( file.open( String( TEST_FILE_NAME ), std::ios_base::in ) );
@@ -127,9 +127,9 @@ TEST( FileTest, WriteAndReadCUDA )
    cudaMalloc( ( void** ) &newCudaIntData, sizeof( int ) );
    cudaMalloc( ( void** ) &newCudaFloatData, 3 * sizeof( float ) );
    cudaMalloc( ( void** ) &newCudaDoubleData, sizeof( double ) );
-   file.load< int, int, Devices::Cuda >( newCudaIntData, 1 );
-   file.load< float, float, Devices::Cuda >( newCudaFloatData, 3 );
-   file.load< double, double, Devices::Cuda >( newCudaDoubleData, 1 );
+   file.load< int, int, Allocators::Cuda<int> >( newCudaIntData, 1 );
+   file.load< float, float, Allocators::Cuda<float> >( newCudaFloatData, 3 );
+   file.load< double, double, Allocators::Cuda<double> >( newCudaDoubleData, 1 );
    cudaMemcpy( &newIntData,
                newCudaIntData,
                sizeof( int ),
@@ -172,15 +172,15 @@ TEST( FileTest, WriteAndReadCUDAWithConversion )
 
    File file;
    ASSERT_NO_THROW( file.open( String( TEST_FILE_NAME ), std::ios_base::out | std::ios_base::trunc ) );
-   file.save< double, float, Devices::Cuda >( cudaConstDoubleData, 3 );
+   file.save< double, float, Allocators::Cuda<double> >( cudaConstDoubleData, 3 );
    ASSERT_NO_THROW( file.close() );
 
    ASSERT_NO_THROW( file.open( String( TEST_FILE_NAME ), std::ios_base::in ) );
-   file.load< float, float, Devices::Cuda >( cudaFloatData, 3 );
+   file.load< float, float, Allocators::Cuda<float> >( cudaFloatData, 3 );
    ASSERT_NO_THROW( file.close() );
 
    ASSERT_NO_THROW( file.open( String( TEST_FILE_NAME ), std::ios_base::in ) );
-   file.load< int, float, Devices::Cuda >( cudaIntData, 3 );
+   file.load< int, float, Allocators::Cuda<int> >( cudaIntData, 3 );
    ASSERT_NO_THROW( file.close() );
 
    cudaMemcpy( floatData,
diff --git a/src/UnitTests/Matrices/DistributedMatrixTest.h b/src/UnitTests/Matrices/DistributedMatrixTest.h
index 7c74e77040e8d554b9ca641d0b9fb9f1e18dc741..93673a29063db52afcabf59165ea5949471cf1bc 100644
--- a/src/UnitTests/Matrices/DistributedMatrixTest.h
+++ b/src/UnitTests/Matrices/DistributedMatrixTest.h
@@ -6,10 +6,22 @@
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
 
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+#include <TNL/Communicators/MpiCommunicator.h>
+#include <TNL/Communicators/NoDistrCommunicator.h>
+#include <TNL/Matrices/DistributedMatrix.h>
+#include <TNL/Containers/Partitioner.h>
+#include <TNL/Matrices/CSR.h>
+
+using namespace TNL;
+
 template< typename Vector >
 void setLinearSequence( Vector& deviceVector, typename Vector::RealType offset = 0 )
 {
-   typename Vector::HostType a;
+   using HostVector = typename Vector::template Self< typename Vector::RealType, TNL::Devices::Sequential >;
+   HostVector a;
    a.setLike( deviceVector );
    for( int i = 0; i < a.getLocalView().getSize(); i++ ) {
       const auto gi = a.getLocalRange().getGlobalIndex( i );
@@ -21,8 +33,11 @@ void setLinearSequence( Vector& deviceVector, typename Vector::RealType offset =
 template< typename Matrix, typename RowLengths >
 void setMatrix( Matrix& matrix, const RowLengths& rowLengths )
 {
-   typename Matrix::HostType hostMatrix;
-   typename RowLengths::HostType hostRowLengths;
+   using HostMatrix = Matrices::DistributedMatrix< typename Matrix::MatrixType::template Self< typename Matrix::RealType, TNL::Devices::Sequential >, typename Matrix::CommunicatorType >;
+   using HostRowLengths = typename RowLengths::template Self< typename RowLengths::RealType, TNL::Devices::Sequential >;
+
+   HostMatrix hostMatrix;
+   HostRowLengths hostRowLengths;
    hostMatrix.setLike( matrix );
    hostRowLengths = rowLengths;
    hostMatrix.setCompressedRowLengths( hostRowLengths );
@@ -36,17 +51,6 @@ void setMatrix( Matrix& matrix, const RowLengths& rowLengths )
    matrix = hostMatrix;
 }
 
-#ifdef HAVE_GTEST
-#include <gtest/gtest.h>
-
-#include <TNL/Communicators/MpiCommunicator.h>
-#include <TNL/Communicators/NoDistrCommunicator.h>
-#include <TNL/Matrices/DistributedMatrix.h>
-#include <TNL/Containers/Partitioner.h>
-#include <TNL/Matrices/CSR.h>
-
-using namespace TNL;
-
 /*
  * Light check of DistributedMatrix.
  *
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp
index 9de7b70a281d346d80037761ae0633c09450e227..03b80259d502cf43eb05f7c6b14053aa2e4ed7d7 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp
@@ -10,12 +10,6 @@
 
 // TODO
 /*
- * getType()                        ::HOW?  How to test this for each format? edit string how?
- *      Found the mistake for Cuda instead of Devices::Cuda. Incorrect String in src/TNL/Devices/Cuda.cpp
- *      MISSING: indexType is missing in CSR_impl.h
- * getTypeVirtual()                 ::TEST? This just calls getType().
- * getSerializationType()           ::TEST? This just calls HostType::getType().
- * getSerializationTypeVirtual()    ::TEST? This just calls getSerializationType().
  * setDimensions()                      ::DONE
  * setCompressedRowLengths()            ::DONE
  * getRowLength()                   ::USED! In test_SetCompressedRowLengths() to verify the test itself.
diff --git a/src/UnitTests/Meshes/BoundaryTagsTest.h b/src/UnitTests/Meshes/BoundaryTagsTest.h
index b7eccf5f24b16927a4cd211b876f24bd5d9512ed..6bc07adabac4a0e1470c13238acf947f1b97f8fb 100644
--- a/src/UnitTests/Meshes/BoundaryTagsTest.h
+++ b/src/UnitTests/Meshes/BoundaryTagsTest.h
@@ -37,7 +37,8 @@ TEST( MeshTest, RegularMeshOfQuadrilateralsTest )
    using VertexMeshEntityType = typename QuadrilateralMeshEntityType::SubentityTraits< 0 >::SubentityType;
 
    using PointType = typename VertexMeshEntityType::PointType;
-   ASSERT_TRUE( PointType::getType() == ( Containers::StaticVector< 2, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value,
+                  "unexpected PointType" );
 
    const IndexType xSize( 3 ), ySize( 4 );
    const RealType width( 1.0 ), height( 1.0 );
diff --git a/src/UnitTests/Meshes/CMakeLists.txt b/src/UnitTests/Meshes/CMakeLists.txt
index c71bde352dc80832dc9dce44a896905372635579..91bf37215b772df499a4ecc7d32cd4fdfe335f05 100644
--- a/src/UnitTests/Meshes/CMakeLists.txt
+++ b/src/UnitTests/Meshes/CMakeLists.txt
@@ -10,6 +10,10 @@ if( ${BUILD_CUDA} AND ${CUDA_VERSION_MAJOR} GREATER_EQUAL 9 )
                         OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( MeshTest ${GTEST_BOTH_LIBRARIES} )
 
+   CUDA_ADD_EXECUTABLE( MeshTraverserTest MeshTraverserTest.cu
+                        OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MeshTraverserTest ${GTEST_BOTH_LIBRARIES} )
+
    CUDA_ADD_EXECUTABLE( MeshOrderingTest MeshOrderingTest.cu
                         OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( MeshOrderingTest ${GTEST_BOTH_LIBRARIES} )
@@ -18,6 +22,10 @@ else()
    TARGET_COMPILE_OPTIONS( MeshTest PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( MeshTest ${GTEST_BOTH_LIBRARIES} )
 
+   ADD_EXECUTABLE( MeshTraverserTest MeshTraverserTest.cpp )
+   TARGET_COMPILE_OPTIONS( MeshTraverserTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MeshTraverserTest ${GTEST_BOTH_LIBRARIES} )
+
    ADD_EXECUTABLE( MeshOrderingTest MeshOrderingTest.cpp )
    TARGET_COMPILE_OPTIONS( MeshOrderingTest PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( MeshOrderingTest ${GTEST_BOTH_LIBRARIES} )
@@ -30,6 +38,7 @@ TARGET_LINK_LIBRARIES( MeshEntityTest ${GTEST_BOTH_LIBRARIES} )
 
 ADD_TEST( BoundaryTagsTest ${EXECUTABLE_OUTPUT_PATH}/BoundaryTagsTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( MeshTest ${EXECUTABLE_OUTPUT_PATH}/MeshTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( MeshTraverserTest ${EXECUTABLE_OUTPUT_PATH}/MeshTraverserTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( MeshOrderingTest ${EXECUTABLE_OUTPUT_PATH}/MeshOrderingTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( MeshEntityTest ${EXECUTABLE_OUTPUT_PATH}/MeshEntityTest${CMAKE_EXECUTABLE_SUFFIX} )
 
diff --git a/src/UnitTests/Meshes/MeshEntityTest.h b/src/UnitTests/Meshes/MeshEntityTest.h
index 5360fd6a8b9151972c798f8f2abb97bf6e31d6eb..235150d9d26748a7b95af60fb79471ee8e3986da 100644
--- a/src/UnitTests/Meshes/MeshEntityTest.h
+++ b/src/UnitTests/Meshes/MeshEntityTest.h
@@ -114,7 +114,8 @@ TEST( MeshEntityTest, VertexMeshEntityTest )
    using VertexMeshEntityType = TestMeshEntity< TestEdgeMeshConfig, typename EdgeMeshEntityType::SubentityTraits< 0 >::SubentityTopology >;
 
    using PointType = typename VertexMeshEntityType::PointType;
-   EXPECT_EQ( PointType::getType(),  ( Containers::StaticVector< 2, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value,
+                  "unexpected PointType" );
 
    VertexMeshEntityType vertexEntity;
    PointType point;
@@ -131,7 +132,8 @@ TEST( MeshEntityTest, EdgeMeshEntityTest )
    static_assert( EdgeMeshEntityType::SubentityTraits< 0 >::storageEnabled, "Testing edge entity does not store vertices as required." );
 
    using PointType = typename VertexMeshEntityType::PointType;
-   EXPECT_EQ( PointType::getType(),  ( Containers::StaticVector< 2, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value,
+                  "unexpected PointType" );
 
    /****
     *
@@ -211,7 +213,8 @@ TEST( MeshEntityTest, TriangleMeshEntityTest )
    static_assert( EdgeMeshEntityType::SubentityTraits< 0 >::storageEnabled, "Testing edge entity does not store vertices as required." );
 
    using PointType = typename VertexMeshEntityType::PointType;
-   EXPECT_EQ( PointType::getType(), ( Containers::StaticVector< 2, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value,
+                  "unexpected PointType" );
 
    /****
     * We set-up the same situation as in the test above
@@ -293,7 +296,8 @@ TEST( MeshEntityTest, TetrahedronMeshEntityTest )
    static_assert( EdgeMeshEntityType::SubentityTraits< 0 >::storageEnabled, "Testing edge entity does not store vertices as required." );
 
    using PointType = typename VertexMeshEntityType::PointType;
-   EXPECT_EQ( PointType::getType(),  ( Containers::StaticVector< 3, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 3, RealType > >::value,
+                  "unexpected PointType" );
 
    /****
     * We set-up similar situation as above but with
@@ -457,7 +461,8 @@ TEST( MeshEntityTest, TwoTrianglesMeshEntityTest )
    static_assert( VertexMeshEntityType::SuperentityTraits< 1 >::storageEnabled, "Testing vertex entity does not store edges as required." );
 
    using PointType = typename VertexMeshEntityType::PointType;
-   EXPECT_EQ( PointType::getType(),  ( Containers::StaticVector< 2, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value,
+                  "unexpected PointType" );
 
    /****
     * We set-up the following situation
@@ -647,7 +652,8 @@ TEST( MeshEntityTest, OneTriangleComparisonTest )
    static_assert( VertexMeshEntityType::SuperentityTraits< 1 >::storageEnabled, "Testing vertex entity does not store edges as required." );
 
    using PointType = typename VertexMeshEntityType::PointType;
-   EXPECT_EQ( PointType::getType(),  ( Containers::StaticVector< 2, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value,
+                  "unexpected PointType" );
 
    PointType point0( 0.0, 0.0 ),
              point1( 1.0, 0.0 ),
diff --git a/src/UnitTests/Meshes/MeshTest.h b/src/UnitTests/Meshes/MeshTest.h
index 352a2d791395cd16ce8b2afeff628461abe0b27c..5c95221ed8ad611f61265209fcc5d25b7cd5bb59 100644
--- a/src/UnitTests/Meshes/MeshTest.h
+++ b/src/UnitTests/Meshes/MeshTest.h
@@ -180,7 +180,8 @@ TEST( MeshTest, TwoTrianglesTest )
    static_assert( VertexMeshEntityType::SuperentityTraits< 1 >::storageEnabled, "Testing vertex entity does not store edges as required." );
 
    using PointType = typename VertexMeshEntityType::PointType;
-   ASSERT_TRUE( PointType::getType() == ( Containers::StaticVector< 2, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value,
+                  "unexpected PointType" );
 
    /****
     * We set-up the following situation
@@ -289,7 +290,8 @@ TEST( MeshTest, TetrahedronsTest )
    using VertexMeshEntityType = typename TetrahedronMeshEntityType::SubentityTraits< 0 >::SubentityType;
 
    using PointType = typename VertexMeshEntityType::PointType;
-   ASSERT_TRUE( PointType::getType() == ( Containers::StaticVector< 3, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 3, RealType > >::value,
+                  "unexpected PointType" );
 
    typedef Mesh< TestTetrahedronMeshConfig > TestTetrahedronMesh;
    TestTetrahedronMesh mesh;
@@ -454,7 +456,8 @@ TEST( MeshTest, RegularMeshOfTrianglesTest )
    using VertexMeshEntityType = typename TriangleMeshEntityType::SubentityTraits< 0 >::SubentityType;
 
    using PointType = typename VertexMeshEntityType::PointType;
-   ASSERT_TRUE( PointType::getType() == ( Containers::StaticVector< 2, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value,
+                  "unexpected PointType" );
 
    const IndexType xSize( 5 ), ySize( 5 );
    const RealType width( 1.0 ), height( 1.0 );
@@ -554,7 +557,8 @@ TEST( MeshTest, RegularMeshOfQuadrilateralsTest )
    using VertexMeshEntityType = typename QuadrilateralMeshEntityType::SubentityTraits< 0 >::SubentityType;
 
    using PointType = typename VertexMeshEntityType::PointType;
-   ASSERT_TRUE( PointType::getType() == ( Containers::StaticVector< 2, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value,
+                  "unexpected PointType" );
 
    const IndexType xSize( 3 ), ySize( 4 );
    const RealType width( 1.0 ), height( 1.0 );
@@ -652,7 +656,8 @@ TEST( MeshTest, RegularMeshOfHexahedronsTest )
    using VertexMeshEntityType = typename HexahedronMeshEntityType::SubentityTraits< 0 >::SubentityType;
 
    using PointType = typename VertexMeshEntityType::PointType;
-   ASSERT_TRUE( PointType::getType() == ( Containers::StaticVector< 3, RealType >::getType() ) );
+   static_assert( std::is_same< PointType, Containers::StaticVector< 3, RealType > >::value,
+                  "unexpected PointType" );
 
    const IndexType xSize( 3 ), ySize( 4 ), zSize( 5 );
    const RealType width( 1.0 ), height( 1.0 ), depth( 1.0 );
diff --git a/src/UnitTests/Meshes/MeshTraverserTest.cpp b/src/UnitTests/Meshes/MeshTraverserTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..426d5fef5de10ddc6dfdc795435c57b1c7be5b5c
--- /dev/null
+++ b/src/UnitTests/Meshes/MeshTraverserTest.cpp
@@ -0,0 +1 @@
+#include "MeshTraverserTest.h"
diff --git a/src/UnitTests/Meshes/MeshTraverserTest.cu b/src/UnitTests/Meshes/MeshTraverserTest.cu
new file mode 100644
index 0000000000000000000000000000000000000000..426d5fef5de10ddc6dfdc795435c57b1c7be5b5c
--- /dev/null
+++ b/src/UnitTests/Meshes/MeshTraverserTest.cu
@@ -0,0 +1 @@
+#include "MeshTraverserTest.h"
diff --git a/src/UnitTests/Meshes/MeshTraverserTest.h b/src/UnitTests/Meshes/MeshTraverserTest.h
new file mode 100644
index 0000000000000000000000000000000000000000..b6c8208ad9e48731c45b2a6875fe722594a6ffe7
--- /dev/null
+++ b/src/UnitTests/Meshes/MeshTraverserTest.h
@@ -0,0 +1,434 @@
+#pragma once
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+#include <TNL/Meshes/Mesh.h>
+#include <TNL/Meshes/MeshEntity.h>
+#include <TNL/Meshes/DefaultConfig.h>
+#include <TNL/Meshes/Topologies/Quadrilateral.h>
+#include <TNL/Meshes/Topologies/Hexahedron.h>
+#include <TNL/Meshes/MeshBuilder.h>
+#include <TNL/Meshes/Traverser.h>
+
+namespace MeshTest {
+
+using namespace TNL;
+using namespace TNL::Meshes;
+
+using RealType = double;
+using Device = Devices::Host;
+using IndexType = int;
+
+static const char* TEST_FILE_NAME = "test_MeshTest.tnl";
+
+// FIXME: Traverser does not work with Id = void
+//class TestQuadrilateralMeshConfig : public DefaultConfig< Topologies::Quadrilateral >
+class TestQuadrilateralMeshConfig : public DefaultConfig< Topologies::Quadrilateral, 2, double, int, int, int >
+{
+public:
+   static constexpr bool entityStorage( int dimensions ) { return true; }
+   template< typename EntityTopology > static constexpr bool subentityStorage( EntityTopology, int SubentityDimensions ) { return true; }
+   template< typename EntityTopology > static constexpr bool subentityOrientationStorage( EntityTopology, int SubentityDimensions ) { return ( SubentityDimensions % 2 != 0 ); }
+   template< typename EntityTopology > static constexpr bool superentityStorage( EntityTopology, int SuperentityDimensions ) { return true; }
+};
+
+// FIXME: Traverser does not work with Id = void
+//class TestHexahedronMeshConfig : public DefaultConfig< Topologies::Hexahedron >
+class TestHexahedronMeshConfig : public DefaultConfig< Topologies::Hexahedron, 3, double, int, int, int >
+{
+public:
+   static constexpr bool entityStorage( int dimensions ) { return true; }
+   template< typename EntityTopology > static constexpr bool subentityStorage( EntityTopology, int SubentityDimensions ) { return true; }
+   template< typename EntityTopology > static constexpr bool subentityOrientationStorage( EntityTopology, int SubentityDimensions ) {  return ( SubentityDimensions % 2 != 0 ); }
+   template< typename EntityTopology > static constexpr bool superentityStorage( EntityTopology, int SuperentityDimensions ) { return true; }
+};
+
+struct TestEntitiesProcessor
+{
+   template< typename Mesh, typename UserData, typename Entity >
+   __cuda_callable__
+   static void processEntity( const Mesh& mesh, UserData& userData, const Entity& entity )
+   {
+      userData[ entity.getIndex() ] += 1;
+   }
+};
+
+template< typename EntityType, typename DeviceMeshPointer, typename HostArray >
+void testCudaTraverser( const DeviceMeshPointer& deviceMeshPointer,
+                        const HostArray& host_array_boundary,
+                        const HostArray& host_array_interior,
+                        const HostArray& host_array_all )
+{
+   using MeshType = typename DeviceMeshPointer::ObjectType;
+   Traverser< MeshType, EntityType > traverser;
+
+   Containers::Array< int, Devices::Cuda > array_boundary( deviceMeshPointer->template getEntitiesCount< EntityType >() );
+   Containers::Array< int, Devices::Cuda > array_interior( deviceMeshPointer->template getEntitiesCount< EntityType >() );
+   Containers::Array< int, Devices::Cuda > array_all     ( deviceMeshPointer->template getEntitiesCount< EntityType >() );
+
+   array_boundary.setValue( 0 );
+   array_interior.setValue( 0 );
+   array_all     .setValue( 0 );
+
+   traverser.template processBoundaryEntities< TestEntitiesProcessor >( deviceMeshPointer, array_boundary.getView() );
+   traverser.template processInteriorEntities< TestEntitiesProcessor >( deviceMeshPointer, array_interior.getView() );
+   traverser.template processAllEntities     < TestEntitiesProcessor >( deviceMeshPointer, array_all.getView() );
+
+   EXPECT_EQ( array_boundary, host_array_boundary );
+   EXPECT_EQ( array_interior, host_array_interior );
+   EXPECT_EQ( array_all,      host_array_all      );
+}
+
+TEST( MeshTest, RegularMeshOfQuadrilateralsTest )
+{
+   using QuadrilateralMeshEntityType = MeshEntity< TestQuadrilateralMeshConfig, Devices::Host, Topologies::Quadrilateral >;
+   using EdgeMeshEntityType = typename QuadrilateralMeshEntityType::SubentityTraits< 1 >::SubentityType;
+   using VertexMeshEntityType = typename QuadrilateralMeshEntityType::SubentityTraits< 0 >::SubentityType;
+
+   using PointType = typename VertexMeshEntityType::PointType;
+   static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value,
+                  "unexpected PointType" );
+
+   const IndexType xSize( 3 ), ySize( 4 );
+   const RealType width( 1.0 ), height( 1.0 );
+   const RealType hx( width / ( RealType ) xSize ),
+                  hy( height / ( RealType ) ySize );
+   const IndexType numberOfCells = xSize * ySize;
+   const IndexType numberOfVertices = ( xSize + 1 ) * ( ySize + 1 );
+
+   using TestQuadrilateralMesh = Mesh< TestQuadrilateralMeshConfig >;
+   Pointers::SharedPointer< TestQuadrilateralMesh > meshPointer;
+   MeshBuilder< TestQuadrilateralMesh > meshBuilder;
+   meshBuilder.setPointsCount( numberOfVertices );
+   meshBuilder.setCellsCount( numberOfCells );
+
+   /****
+    * Setup vertices
+    */
+   for( IndexType j = 0; j <= ySize; j++ )
+   for( IndexType i = 0; i <= xSize; i++ )
+      meshBuilder.setPoint( j * ( xSize + 1 ) + i, PointType( i * hx, j * hy ) );
+
+   /****
+    * Setup cells
+    */
+   IndexType cellIdx( 0 );
+   for( IndexType j = 0; j < ySize; j++ )
+   for( IndexType i = 0; i < xSize; i++ )
+   {
+      const IndexType vertex0 = j * ( xSize + 1 ) + i;
+      const IndexType vertex1 = j * ( xSize + 1 ) + i + 1;
+      const IndexType vertex2 = ( j + 1 ) * ( xSize + 1 ) + i + 1;
+      const IndexType vertex3 = ( j + 1 ) * ( xSize + 1 ) + i;
+
+      meshBuilder.getCellSeed( cellIdx   ).setCornerId( 0, vertex0 );
+      meshBuilder.getCellSeed( cellIdx   ).setCornerId( 1, vertex1 );
+      meshBuilder.getCellSeed( cellIdx   ).setCornerId( 2, vertex2 );
+      meshBuilder.getCellSeed( cellIdx++ ).setCornerId( 3, vertex3 );
+   }
+
+   ASSERT_TRUE( meshBuilder.build( *meshPointer ) );
+
+   // traversers for all test cases
+   Traverser< TestQuadrilateralMesh, QuadrilateralMeshEntityType > traverser_cells;
+   Traverser< TestQuadrilateralMesh, EdgeMeshEntityType > traverser_edges;
+   Traverser< TestQuadrilateralMesh, VertexMeshEntityType > traverser_vertices;
+
+   // arrays for all test cases
+   Containers::Array< int > array_cells_boundary( meshPointer->template getEntitiesCount< 2 >() );
+   Containers::Array< int > array_cells_interior( meshPointer->template getEntitiesCount< 2 >() );
+   Containers::Array< int > array_cells_all     ( meshPointer->template getEntitiesCount< 2 >() );
+
+   Containers::Array< int > array_edges_boundary( meshPointer->template getEntitiesCount< 1 >() );
+   Containers::Array< int > array_edges_interior( meshPointer->template getEntitiesCount< 1 >() );
+   Containers::Array< int > array_edges_all     ( meshPointer->template getEntitiesCount< 1 >() );
+
+   Containers::Array< int > array_vertices_boundary( meshPointer->template getEntitiesCount< 0 >() );
+   Containers::Array< int > array_vertices_interior( meshPointer->template getEntitiesCount< 0 >() );
+   Containers::Array< int > array_vertices_all     ( meshPointer->template getEntitiesCount< 0 >() );
+
+   // reset all arrays
+   array_cells_boundary.setValue( 0 );
+   array_cells_interior.setValue( 0 );
+   array_cells_all     .setValue( 0 );
+
+   array_edges_boundary.setValue( 0 );
+   array_edges_interior.setValue( 0 );
+   array_edges_all     .setValue( 0 );
+
+   array_vertices_boundary.setValue( 0 );
+   array_vertices_interior.setValue( 0 );
+   array_vertices_all     .setValue( 0 );
+
+   // traverse for all test cases
+   traverser_cells.template processBoundaryEntities< TestEntitiesProcessor >( meshPointer, array_cells_boundary.getView() );
+   traverser_cells.template processInteriorEntities< TestEntitiesProcessor >( meshPointer, array_cells_interior.getView() );
+   traverser_cells.template processAllEntities     < TestEntitiesProcessor >( meshPointer, array_cells_all.getView() );
+
+   traverser_edges.template processBoundaryEntities< TestEntitiesProcessor >( meshPointer, array_edges_boundary.getView() );
+   traverser_edges.template processInteriorEntities< TestEntitiesProcessor >( meshPointer, array_edges_interior.getView() );
+   traverser_edges.template processAllEntities     < TestEntitiesProcessor >( meshPointer, array_edges_all.getView() );
+
+   traverser_vertices.template processBoundaryEntities< TestEntitiesProcessor >( meshPointer, array_vertices_boundary.getView() );
+   traverser_vertices.template processInteriorEntities< TestEntitiesProcessor >( meshPointer, array_vertices_interior.getView() );
+   traverser_vertices.template processAllEntities     < TestEntitiesProcessor >( meshPointer, array_vertices_all.getView() );
+
+   // test traversing cells
+   for( IndexType j = 0; j < ySize; j++ )
+   for( IndexType i = 0; i < xSize; i++ )
+   {
+      const IndexType idx = j * xSize + i;
+      if( j == 0 || j == ySize - 1 || i == 0 || i == xSize - 1 ) {
+         EXPECT_EQ( array_cells_boundary[ idx ], 1 );
+         EXPECT_EQ( array_cells_interior[ idx ], 0 );
+      }
+      else {
+         EXPECT_EQ( array_cells_boundary[ idx ], 0 );
+         EXPECT_EQ( array_cells_interior[ idx ], 1 );
+      }
+      EXPECT_EQ( array_cells_all[ idx ], 1 );
+   }
+
+   // test traversing edges
+   // (edges are not numbered systematically, so we just compare with isBoundaryEntity)
+   for( IndexType idx = 0; idx < meshPointer->template getEntitiesCount< 1 >(); idx++ )
+   {
+      if( meshPointer->template isBoundaryEntity< 1 >( idx ) ) {
+         EXPECT_EQ( array_edges_boundary[ idx ], 1 );
+         EXPECT_EQ( array_edges_interior[ idx ], 0 );
+      }
+      else {
+         EXPECT_EQ( array_edges_boundary[ idx ], 0 );
+         EXPECT_EQ( array_edges_interior[ idx ], 1 );
+      }
+      EXPECT_EQ( array_edges_all[ idx ], 1 );
+   }
+
+   // test traversing vertices
+   for( IndexType j = 0; j <= ySize; j++ )
+   for( IndexType i = 0; i <= xSize; i++ )
+   {
+      const IndexType idx = j * (xSize + 1) + i;
+      if( j == 0 || j == ySize || i == 0 || i == xSize ) {
+         EXPECT_EQ( array_vertices_boundary[ idx ], 1 );
+         EXPECT_EQ( array_vertices_interior[ idx ], 0 );
+      }
+      else {
+         EXPECT_EQ( array_vertices_boundary[ idx ], 0 );
+         EXPECT_EQ( array_vertices_interior[ idx ], 1 );
+      }
+      EXPECT_EQ( array_vertices_all[ idx ], 1 );
+   }
+
+   // test traverser with CUDA
+#ifdef HAVE_CUDA
+   using DeviceMesh = Mesh< TestQuadrilateralMeshConfig, Devices::Cuda >;
+   Pointers::SharedPointer< DeviceMesh > deviceMeshPointer;
+   *deviceMeshPointer = *meshPointer;
+
+   testCudaTraverser< QuadrilateralMeshEntityType >( deviceMeshPointer, array_cells_boundary, array_cells_interior, array_cells_all );
+   testCudaTraverser< EdgeMeshEntityType          >( deviceMeshPointer, array_edges_boundary, array_edges_interior, array_edges_all );
+   testCudaTraverser< VertexMeshEntityType        >( deviceMeshPointer, array_vertices_boundary, array_vertices_interior, array_vertices_all );
+#endif
+}
+
+TEST( MeshTest, RegularMeshOfHexahedronsTest )
+{
+   using HexahedronMeshEntityType = MeshEntity< TestHexahedronMeshConfig, Devices::Host, Topologies::Hexahedron >;
+   using QuadrilateralMeshEntityType = typename HexahedronMeshEntityType::SubentityTraits< 2 >::SubentityType;
+   using EdgeMeshEntityType = typename HexahedronMeshEntityType::SubentityTraits< 1 >::SubentityType;
+   using VertexMeshEntityType = typename HexahedronMeshEntityType::SubentityTraits< 0 >::SubentityType;
+
+   using PointType = typename VertexMeshEntityType::PointType;
+   static_assert( std::is_same< PointType, Containers::StaticVector< 3, RealType > >::value,
+                  "unexpected PointType" );
+
+   const IndexType xSize( 3 ), ySize( 4 ), zSize( 5 );
+   const RealType width( 1.0 ), height( 1.0 ), depth( 1.0 );
+   const RealType hx( width / ( RealType ) xSize ),
+                  hy( height / ( RealType ) ySize ),
+                  hz( depth / ( RealType ) zSize );
+   const IndexType numberOfCells = xSize * ySize * zSize;
+   const IndexType numberOfVertices = ( xSize + 1 ) * ( ySize + 1 ) * ( zSize + 1 );
+
+   using TestHexahedronMesh = Mesh< TestHexahedronMeshConfig >;
+   Pointers::SharedPointer< TestHexahedronMesh > meshPointer;
+   MeshBuilder< TestHexahedronMesh > meshBuilder;
+   meshBuilder.setPointsCount( numberOfVertices );
+   meshBuilder.setCellsCount( numberOfCells );
+
+   /****
+    * Setup vertices
+    */
+   for( IndexType k = 0; k <= zSize; k++ )
+   for( IndexType j = 0; j <= ySize; j++ )
+   for( IndexType i = 0; i <= xSize; i++ )
+      meshBuilder.setPoint( k * ( xSize + 1 ) * ( ySize + 1 ) + j * ( xSize + 1 ) + i, PointType( i * hx, j * hy, k * hz ) );
+
+   /****
+    * Setup cells
+    */
+   IndexType cellIdx( 0 );
+   for( IndexType k = 0; k < zSize; k++ )
+   for( IndexType j = 0; j < ySize; j++ )
+   for( IndexType i = 0; i < xSize; i++ )
+   {
+      const IndexType vertex0 = k * ( xSize + 1 ) * ( ySize + 1 ) + j * ( xSize + 1 ) + i;
+      const IndexType vertex1 = k * ( xSize + 1 ) * ( ySize + 1 ) + j * ( xSize + 1 ) + i + 1;
+      const IndexType vertex2 = k * ( xSize + 1 ) * ( ySize + 1 ) + ( j + 1 ) * ( xSize + 1 ) + i + 1;
+      const IndexType vertex3 = k * ( xSize + 1 ) * ( ySize + 1 ) + ( j + 1 ) * ( xSize + 1 ) + i;
+      const IndexType vertex4 = ( k + 1 ) * ( xSize + 1 ) * ( ySize + 1 ) + j * ( xSize + 1 ) + i;
+      const IndexType vertex5 = ( k + 1 ) * ( xSize + 1 ) * ( ySize + 1 ) + j * ( xSize + 1 ) + i + 1;
+      const IndexType vertex6 = ( k + 1 ) * ( xSize + 1 ) * ( ySize + 1 ) + ( j + 1 ) * ( xSize + 1 ) + i + 1;
+      const IndexType vertex7 = ( k + 1 ) * ( xSize + 1 ) * ( ySize + 1 ) + ( j + 1 ) * ( xSize + 1 ) + i;
+
+      meshBuilder.getCellSeed( cellIdx   ).setCornerId( 0, vertex0 );
+      meshBuilder.getCellSeed( cellIdx   ).setCornerId( 1, vertex1 );
+      meshBuilder.getCellSeed( cellIdx   ).setCornerId( 2, vertex2 );
+      meshBuilder.getCellSeed( cellIdx   ).setCornerId( 3, vertex3 );
+      meshBuilder.getCellSeed( cellIdx   ).setCornerId( 4, vertex4 );
+      meshBuilder.getCellSeed( cellIdx   ).setCornerId( 5, vertex5 );
+      meshBuilder.getCellSeed( cellIdx   ).setCornerId( 6, vertex6 );
+      meshBuilder.getCellSeed( cellIdx++ ).setCornerId( 7, vertex7 );
+   }
+
+   ASSERT_TRUE( meshBuilder.build( *meshPointer ) );
+
+   // traversers for all test cases
+   Traverser< TestHexahedronMesh, HexahedronMeshEntityType > traverser_cells;
+   Traverser< TestHexahedronMesh, QuadrilateralMeshEntityType > traverser_faces;
+   Traverser< TestHexahedronMesh, EdgeMeshEntityType > traverser_edges;
+   Traverser< TestHexahedronMesh, VertexMeshEntityType > traverser_vertices;
+
+   // arrays for all test cases
+   Containers::Array< int > array_cells_boundary( meshPointer->template getEntitiesCount< 3 >() );
+   Containers::Array< int > array_cells_interior( meshPointer->template getEntitiesCount< 3 >() );
+   Containers::Array< int > array_cells_all     ( meshPointer->template getEntitiesCount< 3 >() );
+
+   Containers::Array< int > array_faces_boundary( meshPointer->template getEntitiesCount< 2 >() );
+   Containers::Array< int > array_faces_interior( meshPointer->template getEntitiesCount< 2 >() );
+   Containers::Array< int > array_faces_all     ( meshPointer->template getEntitiesCount< 2 >() );
+
+   Containers::Array< int > array_edges_boundary( meshPointer->template getEntitiesCount< 1 >() );
+   Containers::Array< int > array_edges_interior( meshPointer->template getEntitiesCount< 1 >() );
+   Containers::Array< int > array_edges_all     ( meshPointer->template getEntitiesCount< 1 >() );
+
+   Containers::Array< int > array_vertices_boundary( meshPointer->template getEntitiesCount< 0 >() );
+   Containers::Array< int > array_vertices_interior( meshPointer->template getEntitiesCount< 0 >() );
+   Containers::Array< int > array_vertices_all     ( meshPointer->template getEntitiesCount< 0 >() );
+
+   // reset all arrays
+   array_cells_boundary.setValue( 0 );
+   array_cells_interior.setValue( 0 );
+   array_cells_all     .setValue( 0 );
+
+   array_faces_boundary.setValue( 0 );
+   array_faces_interior.setValue( 0 );
+   array_faces_all     .setValue( 0 );
+
+   array_edges_boundary.setValue( 0 );
+   array_edges_interior.setValue( 0 );
+   array_edges_all     .setValue( 0 );
+
+   array_vertices_boundary.setValue( 0 );
+   array_vertices_interior.setValue( 0 );
+   array_vertices_all     .setValue( 0 );
+
+   // traverse for all test cases
+   traverser_cells.template processBoundaryEntities< TestEntitiesProcessor >( meshPointer, array_cells_boundary.getView() );
+   traverser_cells.template processInteriorEntities< TestEntitiesProcessor >( meshPointer, array_cells_interior.getView() );
+   traverser_cells.template processAllEntities     < TestEntitiesProcessor >( meshPointer, array_cells_all.getView() );
+
+   traverser_faces.template processBoundaryEntities< TestEntitiesProcessor >( meshPointer, array_faces_boundary.getView() );
+   traverser_faces.template processInteriorEntities< TestEntitiesProcessor >( meshPointer, array_faces_interior.getView() );
+   traverser_faces.template processAllEntities     < TestEntitiesProcessor >( meshPointer, array_faces_all.getView() );
+
+   traverser_edges.template processBoundaryEntities< TestEntitiesProcessor >( meshPointer, array_edges_boundary.getView() );
+   traverser_edges.template processInteriorEntities< TestEntitiesProcessor >( meshPointer, array_edges_interior.getView() );
+   traverser_edges.template processAllEntities     < TestEntitiesProcessor >( meshPointer, array_edges_all.getView() );
+
+   traverser_vertices.template processBoundaryEntities< TestEntitiesProcessor >( meshPointer, array_vertices_boundary.getView() );
+   traverser_vertices.template processInteriorEntities< TestEntitiesProcessor >( meshPointer, array_vertices_interior.getView() );
+   traverser_vertices.template processAllEntities     < TestEntitiesProcessor >( meshPointer, array_vertices_all.getView() );
+
+   // test traversing cells
+   for( IndexType k = 0; k < zSize; k++ )
+   for( IndexType j = 0; j < ySize; j++ )
+   for( IndexType i = 0; i < xSize; i++ )
+   {
+      const IndexType idx = k * xSize * ySize + j * xSize + i;
+      if( k == 0 || k == zSize - 1 || j == 0 || j == ySize - 1 || i == 0 || i == xSize - 1 ) {
+         EXPECT_EQ( array_cells_boundary[ idx ], 1 );
+         EXPECT_EQ( array_cells_interior[ idx ], 0 );
+      }
+      else {
+         EXPECT_EQ( array_cells_boundary[ idx ], 0 );
+         EXPECT_EQ( array_cells_interior[ idx ], 1 );
+      }
+      EXPECT_EQ( array_cells_all[ idx ], 1 );
+   }
+
+   // test traversing faces
+   // (faces are not numbered systematically, so we just compare with isBoundaryEntity)
+   for( IndexType idx = 0; idx < meshPointer->template getEntitiesCount< 2 >(); idx++ )
+   {
+      if( meshPointer->template isBoundaryEntity< 2 >( idx ) ) {
+         EXPECT_EQ( array_faces_boundary[ idx ], 1 );
+         EXPECT_EQ( array_faces_interior[ idx ], 0 );
+      }
+      else {
+         EXPECT_EQ( array_faces_boundary[ idx ], 0 );
+         EXPECT_EQ( array_faces_interior[ idx ], 1 );
+      }
+      EXPECT_EQ( array_faces_all[ idx ], 1 );
+   }
+
+   // test traversing edges
+   // (edges are not numbered systematically, so we just compare with isBoundaryEntity)
+   for( IndexType idx = 0; idx < meshPointer->template getEntitiesCount< 1 >(); idx++ )
+   {
+      if( meshPointer->template isBoundaryEntity< 1 >( idx ) ) {
+         EXPECT_EQ( array_edges_boundary[ idx ], 1 );
+         EXPECT_EQ( array_edges_interior[ idx ], 0 );
+      }
+      else {
+         EXPECT_EQ( array_edges_boundary[ idx ], 0 );
+         EXPECT_EQ( array_edges_interior[ idx ], 1 );
+      }
+      EXPECT_EQ( array_edges_all[ idx ], 1 );
+   }
+
+   // test traversing vertices
+   for( IndexType k = 0; k <= zSize; k++ )
+   for( IndexType j = 0; j <= ySize; j++ )
+   for( IndexType i = 0; i <= xSize; i++ )
+   {
+      const IndexType idx = k * (xSize + 1) * (ySize + 1) + j * (xSize + 1) + i;
+      if( k == 0 || k == zSize || j == 0 || j == ySize || i == 0 || i == xSize ) {
+         EXPECT_EQ( array_vertices_boundary[ idx ], 1 );
+         EXPECT_EQ( array_vertices_interior[ idx ], 0 );
+      }
+      else {
+         EXPECT_EQ( array_vertices_boundary[ idx ], 0 );
+         EXPECT_EQ( array_vertices_interior[ idx ], 1 );
+      }
+      EXPECT_EQ( array_vertices_all[ idx ], 1 );
+   }
+
+   // test traverser with CUDA
+#ifdef HAVE_CUDA
+   using DeviceMesh = Mesh< TestHexahedronMeshConfig, Devices::Cuda >;
+   Pointers::SharedPointer< DeviceMesh > deviceMeshPointer;
+   *deviceMeshPointer = *meshPointer;
+
+   testCudaTraverser< HexahedronMeshEntityType    >( deviceMeshPointer, array_cells_boundary, array_cells_interior, array_cells_all );
+   testCudaTraverser< QuadrilateralMeshEntityType >( deviceMeshPointer, array_faces_boundary, array_faces_interior, array_faces_all );
+   testCudaTraverser< EdgeMeshEntityType          >( deviceMeshPointer, array_edges_boundary, array_edges_interior, array_edges_all );
+   testCudaTraverser< VertexMeshEntityType        >( deviceMeshPointer, array_vertices_boundary, array_vertices_interior, array_vertices_all );
+#endif
+}
+
+} // namespace MeshTest
+
+#endif
diff --git a/src/UnitTests/Pointers/SharedPointerCudaTest.cu b/src/UnitTests/Pointers/SharedPointerCudaTest.cu
index c0d76b2cc050d074831a4a6065d71b99ea24a7e9..83b6b4793bf6d5e17f6587b71c60261e8b80cea0 100644
--- a/src/UnitTests/Pointers/SharedPointerCudaTest.cu
+++ b/src/UnitTests/Pointers/SharedPointerCudaTest.cu
@@ -55,7 +55,7 @@ TEST( SharedPointerCudaTest, getDataTest )
    ASSERT_EQ( ptr1->y(), 2 );
 #else
 
-   Devices::Cuda::synchronizeDevice();
+   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
 
    TestType aux;
 
@@ -89,7 +89,7 @@ TEST( SharedPointerCudaTest, getDataArrayTest )
    ptr->setElement( 0, 1 );
    ptr->setElement( 1, 2 );
 
-   Devices::Cuda::synchronizeDevice();
+   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
 
    int *testArray_device, *testArray_host;
    cudaMalloc( ( void** ) &testArray_device, 2 * sizeof( int ) );
diff --git a/src/UnitTests/TypeInfoTest.cpp b/src/UnitTests/TypeInfoTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c47cdffc819c27f08fc62e0b8c01110b888d0bfd
--- /dev/null
+++ b/src/UnitTests/TypeInfoTest.cpp
@@ -0,0 +1,165 @@
+/***************************************************************************
+                          TypeInfoTest.cpp  -  description
+                             -------------------
+    begin                : Aug 20, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/TypeInfo.h>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+#endif
+
+using namespace TNL;
+
+#ifdef HAVE_GTEST
+
+enum MyEnumType { foo, bar };
+enum class MyEnumClass { foo, bar };
+
+class MyClass {};
+class MyClassWithGetSerializationType
+{
+public:
+   static std::string getSerializationType() { return "SomethingElse"; }
+};
+
+template< typename... >
+class MyClassTemplate {};
+
+class MyPolymorphicBase
+{
+public:
+   virtual ~MyPolymorphicBase() {}
+};
+class MyPolymorphicDerived : public MyPolymorphicBase
+{
+public:
+   virtual ~MyPolymorphicDerived() {}
+};
+
+
+TEST( TypeInfoTest, getType )
+{
+   // non-const variants
+   EXPECT_EQ( getType< void >(), std::string( "void" ) );
+   EXPECT_EQ( getType< bool >(), std::string( "bool" ) );
+
+   EXPECT_EQ( getType< char >(), std::string( "char" ) );
+   EXPECT_EQ( getType< short >(), std::string( "short" ) );
+   EXPECT_EQ( getType< int >(), std::string( "int" ) );
+   EXPECT_EQ( getType< long >(), std::string( "long" ) );
+
+   EXPECT_EQ( getType< unsigned char >(), std::string( "unsigned char" ) );
+   EXPECT_EQ( getType< unsigned short >(), std::string( "unsigned short" ) );
+   EXPECT_EQ( getType< unsigned int >(), std::string( "unsigned int" ) );
+   EXPECT_EQ( getType< unsigned long >(), std::string( "unsigned long" ) );
+
+   EXPECT_EQ( getType< signed char >(), std::string( "signed char" ) );
+
+   EXPECT_EQ( getType< float >(), std::string( "float" ) );
+   EXPECT_EQ( getType< double >(), std::string( "double" ) );
+   EXPECT_EQ( getType< long double >(), std::string( "long double" ) );
+
+   // const variants - top-level cv-qualifiers are ignored
+   EXPECT_EQ( getType< const void >(), std::string( "void" ) );
+   EXPECT_EQ( getType< const bool >(), std::string( "bool" ) );
+
+   EXPECT_EQ( getType< const char >(), std::string( "char" ) );
+   EXPECT_EQ( getType< const short >(), std::string( "short" ) );
+   EXPECT_EQ( getType< const int >(), std::string( "int" ) );
+   EXPECT_EQ( getType< const long >(), std::string( "long" ) );
+
+   EXPECT_EQ( getType< const unsigned char >(), std::string( "unsigned char" ) );
+   EXPECT_EQ( getType< const unsigned short >(), std::string( "unsigned short" ) );
+   EXPECT_EQ( getType< const unsigned int >(), std::string( "unsigned int" ) );
+   EXPECT_EQ( getType< const unsigned long >(), std::string( "unsigned long" ) );
+
+   EXPECT_EQ( getType< const signed char >(), std::string( "signed char" ) );
+
+   EXPECT_EQ( getType< const float >(), std::string( "float" ) );
+   EXPECT_EQ( getType< const double >(), std::string( "double" ) );
+   EXPECT_EQ( getType< const long double >(), std::string( "long double" ) );
+
+   // enum types
+   EXPECT_EQ( getType< MyEnumType >(), std::string( "MyEnumType" ) );
+   EXPECT_EQ( getType< MyEnumClass >(), std::string( "MyEnumClass" ) );
+
+   // classes
+   EXPECT_EQ( getType< MyClass >(), std::string( "MyClass" ) );
+   EXPECT_EQ( getType< MyClassWithGetSerializationType >(), std::string( "MyClassWithGetSerializationType" ) );
+
+   // class templates
+   using T1 = MyClassTemplate< int, MyClassTemplate< int, int >, MyClass >;
+   EXPECT_EQ( getType< T1 >(), std::string( "MyClassTemplate<int, MyClassTemplate<int, int>, MyClass>" ) );
+
+   // polymorphic base
+   MyPolymorphicDerived obj;
+   MyPolymorphicBase* ptr = &obj;
+   // no dynamic cast for pointer types
+   EXPECT_EQ( getType( ptr ), std::string( "MyPolymorphicBase*" ) );
+   // reference to a polymorphic object gets dynamic cast
+   EXPECT_EQ( getType( *ptr ), std::string( "MyPolymorphicDerived" ) );
+}
+
+TEST( TypeInfoTest, getSerializationType )
+{
+   // non-const variants
+   EXPECT_EQ( getSerializationType< void >(), std::string( "void" ) );
+   EXPECT_EQ( getSerializationType< bool >(), std::string( "bool" ) );
+
+   EXPECT_EQ( getSerializationType< char >(), std::string( "char" ) );
+   EXPECT_EQ( getSerializationType< short >(), std::string( "short" ) );
+   EXPECT_EQ( getSerializationType< int >(), std::string( "int" ) );
+   EXPECT_EQ( getSerializationType< long >(), std::string( "long" ) );
+
+   EXPECT_EQ( getSerializationType< unsigned char >(), std::string( "unsigned char" ) );
+   EXPECT_EQ( getSerializationType< unsigned short >(), std::string( "unsigned short" ) );
+   EXPECT_EQ( getSerializationType< unsigned int >(), std::string( "unsigned int" ) );
+   EXPECT_EQ( getSerializationType< unsigned long >(), std::string( "unsigned long" ) );
+
+   EXPECT_EQ( getSerializationType< signed char >(), std::string( "signed char" ) );
+
+   EXPECT_EQ( getSerializationType< float >(), std::string( "float" ) );
+   EXPECT_EQ( getSerializationType< double >(), std::string( "double" ) );
+   EXPECT_EQ( getSerializationType< long double >(), std::string( "long double" ) );
+
+   // const variants - top-level cv-qualifiers are ignored
+   EXPECT_EQ( getSerializationType< const void >(), std::string( "void" ) );
+   EXPECT_EQ( getSerializationType< const bool >(), std::string( "bool" ) );
+
+   EXPECT_EQ( getSerializationType< const char >(), std::string( "char" ) );
+   EXPECT_EQ( getSerializationType< const short >(), std::string( "short" ) );
+   EXPECT_EQ( getSerializationType< const int >(), std::string( "int" ) );
+   EXPECT_EQ( getSerializationType< const long >(), std::string( "long" ) );
+
+   EXPECT_EQ( getSerializationType< const unsigned char >(), std::string( "unsigned char" ) );
+   EXPECT_EQ( getSerializationType< const unsigned short >(), std::string( "unsigned short" ) );
+   EXPECT_EQ( getSerializationType< const unsigned int >(), std::string( "unsigned int" ) );
+   EXPECT_EQ( getSerializationType< const unsigned long >(), std::string( "unsigned long" ) );
+
+   EXPECT_EQ( getSerializationType< const signed char >(), std::string( "signed char" ) );
+
+   EXPECT_EQ( getSerializationType< const float >(), std::string( "float" ) );
+   EXPECT_EQ( getSerializationType< const double >(), std::string( "double" ) );
+   EXPECT_EQ( getSerializationType< const long double >(), std::string( "long double" ) );
+
+   // enum types
+   EXPECT_EQ( getSerializationType< MyEnumType >(), std::string( "MyEnumType" ) );
+   EXPECT_EQ( getSerializationType< MyEnumClass >(), std::string( "MyEnumClass" ) );
+
+   // classes
+   EXPECT_EQ( getSerializationType< MyClass >(), std::string( "MyClass" ) );
+   EXPECT_EQ( getSerializationType< MyClassWithGetSerializationType >(), std::string( "SomethingElse" ) );
+
+   // class templates
+   using T1 = MyClassTemplate< int, MyClassTemplate< int, int >, MyClass >;
+   EXPECT_EQ( getSerializationType< T1 >(), "MyClassTemplate<int, MyClassTemplate<int, int>, MyClass>" );
+}
+#endif
+
+#include "main.h"