diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 50697564b0d47c4327776fa48d1702adf6dd9812..157374d4c7c1224768332810c40103f6aea4646c 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -18,19 +18,36 @@ stages:
     WITH_CUDA: "no"
     WITH_CUDA_ARCH: "auto"
     WITH_MIC: "no"
+    WITH_MPI: "no"
     WITH_TESTS: "yes"
     WITH_COVERAGE: "no"
-    WITH_EXAMPLES: "yes"
+    # these are built only in the "full" config
+    WITH_BENCHMARKS: "no"
+    WITH_EXAMPLES: "no"
+    WITH_TOOLS: "no"
+    WITH_PYTHON: "no"
 
 # template for build jobs
 .build_template_def: &build_template
     stage: build
     script:
-        - export NUM_CORES=$(grep "core id" /proc/cpuinfo | wc -l)
-        - export MAKEFLAGS="-l$(echo 1.5*$NUM_CORES | bc) -j$NUM_CORES"
+        # set MPI compiler wrapper
+        - if [[ ${WITH_MPI} == "yes" ]]; then
+                export CXX=mpicxx;
+                export CC=mpicc;
+          fi
+        # all cores including hyperthreading
+#        - export NUM_CORES=$(grep "core id" /proc/cpuinfo | wc -l)
+#       # all pyhsical cores
+        - export NUM_CORES=$(grep "core id" /proc/cpuinfo | sort -u | wc -l)
+        # ninja does not have -l
+#        - export MAKEFLAGS="-l$(echo 1.5*$NUM_CORES | bc) -j$NUM_CORES"
+        - export NINJAFLAGS="-j$NUM_CORES"
+        - export CTEST_OUTPUT_ON_FAILURE=1
         - mkdir -p "./builddir/$CI_JOB_NAME"
         - pushd "./builddir/$CI_JOB_NAME"
         - cmake ../..
+                -G Ninja
                 -DCMAKE_BUILD_TYPE=${BUILD_TYPE}
                 -DCMAKE_INSTALL_PREFIX=$(pwd)/${BUILD_TYPE}_install_prefix
                 -DWITH_OPENMP=${WITH_OPENMP}
@@ -39,10 +56,16 @@ stages:
                 -DWITH_MIC=${WITH_MIC}
                 -DWITH_TESTS=${WITH_TESTS}
                 -DWITH_COVERAGE=${WITH_COVERAGE}
+                -DWITH_BENCHMARKS=${WITH_BENCHMARKS}
                 -DWITH_EXAMPLES=${WITH_EXAMPLES}
-        - make
-        - make test CTEST_OUTPUT_ON_FAILURE=1
-        - make install
+                -DWITH_TOOLS=${WITH_TOOLS}
+                -DWITH_PYTHON=${WITH_PYTHON}
+#        - make
+#        - make test
+#        - make install
+        - ninja ${NINJAFLAGS}
+        - ninja test
+        - ninja install
         - popd
     variables:
         <<: *default_cmake_flags
@@ -51,7 +74,7 @@ stages:
 # Cuda builds are specified first because they take more time than host-only builds,
 # which can be allocated on hosts whitout GPUs.
 
-cuda_Debug:
+cuda_base_Debug:
     <<: *build_template
     tags:
         - gpu
@@ -60,7 +83,7 @@ cuda_Debug:
         WITH_CUDA: "yes"
         BUILD_TYPE: Debug
 
-cuda_Release:
+cuda_base_Release:
     <<: *build_template
     tags:
         - gpu
@@ -69,38 +92,94 @@ cuda_Release:
         WITH_CUDA: "yes"
         BUILD_TYPE: Release
 
-cuda+openmp_Debug:
+cuda_mpi_Debug:
     <<: *build_template
     tags:
         - openmp
         - gpu
+        - mpi
     variables:
         <<: *default_cmake_flags
         WITH_OPENMP: "yes"
         WITH_CUDA: "yes"
+        WITH_MPI: "yes"
         BUILD_TYPE: Debug
 
-cuda+openmp_Release:
+cuda_mpi_Release:
     <<: *build_template
     tags:
         - openmp
         - gpu
+        - mpi
     variables:
         <<: *default_cmake_flags
         WITH_OPENMP: "yes"
         WITH_CUDA: "yes"
+        WITH_MPI: "yes"
         BUILD_TYPE: Release
 
-default_Debug:
+cuda_full_Debug:
     <<: *build_template
+    tags:
+        - openmp
+        - gpu
+    variables:
+        <<: *default_cmake_flags
+        WITH_OPENMP: "yes"
+        WITH_CUDA: "yes"
+        BUILD_TYPE: Debug
+        WITH_BENCHMARKS: "yes"
+        WITH_EXAMPLES: "yes"
+        WITH_TOOLS: "yes"
+        WITH_PYTHON: "yes"
+
+cuda_full_Release:
+    <<: *build_template
+    tags:
+        - openmp
+        - gpu
+    variables:
+        <<: *default_cmake_flags
+        WITH_OPENMP: "yes"
+        WITH_CUDA: "yes"
+        BUILD_TYPE: Release
+        WITH_BENCHMARKS: "yes"
+        WITH_EXAMPLES: "yes"
+        WITH_TOOLS: "yes"
+        WITH_PYTHON: "yes"
 
-default_Release:
+default_base_Debug:
     <<: *build_template
+
+default_base_Release:
+    <<: *build_template
+    variables:
+        <<: *default_cmake_flags
+        BUILD_TYPE: Release
+
+default_mpi_Debug:
+    <<: *build_template
+    tags:
+        - openmp
+        - mpi
     variables:
         <<: *default_cmake_flags
+        WITH_OPENMP: "yes"
+        WITH_MPI: "yes"
+        BUILD_TYPE: Debug
+
+default_mpi_Release:
+    <<: *build_template
+    tags:
+        - openmp
+        - mpi
+    variables:
+        <<: *default_cmake_flags
+        WITH_OPENMP: "yes"
+        WITH_MPI: "yes"
         BUILD_TYPE: Release
 
-openmp_Debug:
+default_full_Debug:
     <<: *build_template
     tags:
         - openmp
@@ -108,8 +187,12 @@ openmp_Debug:
         <<: *default_cmake_flags
         WITH_OPENMP: "yes"
         BUILD_TYPE: Debug
+        WITH_BENCHMARKS: "yes"
+        WITH_EXAMPLES: "yes"
+        WITH_TOOLS: "yes"
+        WITH_PYTHON: "yes"
 
-openmp_Release:
+default_full_Release:
     <<: *build_template
     tags:
         - openmp
@@ -117,3 +200,7 @@ openmp_Release:
         <<: *default_cmake_flags
         WITH_OPENMP: "yes"
         BUILD_TYPE: Release
+        WITH_BENCHMARKS: "yes"
+        WITH_EXAMPLES: "yes"
+        WITH_TOOLS: "yes"
+        WITH_PYTHON: "yes"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f0b812d80bcf8fa36dbe66678ffffb9a5e03a14c..82ae1a00819e154db93c1b4b1d139801ac3f897d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -29,6 +29,7 @@ option(WITH_TESTS "Build tests" ON)
 option(WITH_COVERAGE "Enable code coverage reports from unit tests" OFF)
 option(WITH_EXAMPLES "Compile the 'examples' directory" ON)
 option(WITH_TOOLS "Compile the 'src/Tools' directory" ON)
+option(WITH_BENCHMARKS "Compile the 'src/Benchmarks' directory" ON)
 option(WITH_PYTHON "Compile the Python bindings" ON)
 option(WITH_TEMPLATES_INSTANTIATION "Enable explicit template instantiation" OFF)
 
@@ -62,6 +63,20 @@ else()
     set( EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/Release/bin )
 endif()
 
+# check if the compiler is good enough
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+   # GCC 5.0 is the first release with full C++11 support (due to libstdc++)
+   # https://gcc.gnu.org/gcc-5/changes.html
+   if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0")
+      message(FATAL_ERROR "Insufficient GCC version")
+   endif()
+elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+   # Clang 3.4 has full C++14 support: http://clang.llvm.org/cxx_status.html
+   if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "3.4")
+      message(FATAL_ERROR "Insufficient Clang version")
+   endif()
+endif()
+
 # set Debug/Release options
 set( CMAKE_CXX_FLAGS "-std=c++11 -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable" )
 set( CMAKE_CXX_FLAGS_DEBUG "-g -rdynamic -ftemplate-backtrace-limit=0" )
@@ -97,8 +112,8 @@ if( CXX_COMPILER_NAME MATCHES "icpc" )
 endif()
 
 # force colorized output in continuous integration
-if( DEFINED ENV{CI_JOB_NAME} )
-   message(STATUS "Continuous integration detected -- forcing compilers to produce colorized output.")
+if( DEFINED ENV{CI_JOB_NAME} OR ${CMAKE_GENERATOR} STREQUAL "Ninja" )
+   message(STATUS "Continuous integration or Ninja detected -- forcing compilers to produce colorized output.")
    if( CXX_COMPILER_NAME MATCHES "clang" )
       set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcolor-diagnostics" )
    else()
@@ -109,10 +124,11 @@ endif()
 #####
 # Check for MPI -- poznej podle vraperu compileru -- da se testovat preklad bez MPI
 #
-if( ${CXX_COMPILER_NAME} STREQUAL "mpic++" )
+if( ${CXX_COMPILER_NAME} STREQUAL "mpicxx" )
    message( "MPI compiler detected."    )
    set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_MPI" )
-   set( CUDA_HOST_COMPILER "mpic++" )
+   set( CUDA_HOST_COMPILER "mpicxx" )
+   set( BUILD_MPI ON )
 endif()
 
 ####
@@ -150,7 +166,7 @@ if( ${WITH_CUDA} )
             endif()
         endif()
         # An extra CUDA_ARCH_HOST_COMPILER variable for compiling tnl-cuda-arch alone,
-        # because it SHOULD NOT be compiled using mpic++, which would cause weird
+        # because it SHOULD NOT be compiled using mpicxx, which would cause weird
         # RPATH_CHANGE error in cmake.
         # FIXME: find better solution to switch between MPI-enabled and MPI-disabled binaries in cmake
         if( NOT $ENV{CUDA_ARCH_HOST_COMPILER} STREQUAL "" )
@@ -405,15 +421,12 @@ INCLUDE_DIRECTORIES( src )
 INCLUDE_DIRECTORIES( ${PROJECT_BUILD_PATH} )
 LINK_DIRECTORIES( ${LIBRARY_OUTPUT_PATH} )
 
-#Pokracujeme dalsimi podadresari
+# Add all subdirectories
+if( ${WITH_TESTS} )
+    add_subdirectory( tests )
+endif()
 add_subdirectory( src )
 add_subdirectory( share )
-if( WITH_TESTS STREQUAL "yes" )
-    add_subdirectory( tests )
-endif( WITH_TESTS STREQUAL "yes" )
-if( WITH_EXAMPLES STREQUAL "yes" )
-   add_subdirectory( examples )
-endif( WITH_EXAMPLES STREQUAL "yes" )
 
 set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Template Numerical Library")
 set(CPACK_PACKAGE_VENDOR "MMG")
@@ -455,6 +468,7 @@ message( "   WITH_TESTS=${WITH_TESTS}" )
 message( "   WITH_COVERAGE=${WITH_COVERAGE}" )
 message( "   WITH_EXAMPLES=${WITH_EXAMPLES}" )
 message( "   WITH_TOOLS=${WITH_TOOLS}" )
+message( "   WITH_BENCHMARKS=${WITH_BENCHMARKS}" )
 message( "   WITH_PYTHON=${WITH_PYTHON}" )
 message( "   WITH_TEMPLATES_INSTANTIATION=${WITH_TEMPLATES_INSTANTIATION}" )
 # Print compiler options
diff --git a/build b/build
index 40bfa6d4f501dcbbf5415ed22efa475c9f966a2b..e0c8dbb993e592c420aa62abd991c189fbff4870 100755
--- a/build
+++ b/build
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+set -e
+
 TARGET=TNL
 PREFIX=${HOME}/.local
 INSTALL="no"
@@ -24,6 +26,7 @@ WITH_COVERAGE="no"
 WITH_EXAMPLES="yes"
 WITH_PYTHON="yes"
 WITH_TOOLS="yes"
+WITH_BENCHMARKS="yes"
 
 WITH_TEMPLATE_INSTANTIATION="no"
 INSTANTIATE_LONG_INT="no"
@@ -45,8 +48,8 @@ do
         --cmake=*                        ) CMAKE="${option#*=}" ;;
         --cmake-only=*                   ) CMAKE_ONLY="${option#*=}" ;;
         --verbose                        ) VERBOSE="VERBOSE=1" ;;
-        --offline-build                  ) OFFLINE_BUILD="yes" ;;
         --help                           ) HELP="yes" ;;
+        --offline-build                  ) OFFLINE_BUILD="yes" ;;
         --with-clang=*                   ) WITH_CLANG="${option#*=}" ;;
         --with-mpi=*                     ) WITH_MPI="${option#*=}" ;;
         --with-mic=*                     ) WITH_MIC="${option#*=}" ;;
@@ -58,6 +61,7 @@ do
         --with-coverage=*                ) WITH_COVERAGE="${option#*=}" ;;
         --with-examples=*                ) WITH_EXAMPLES="${option#*=}" ;;
         --with-tools=*                   ) WITH_TOOLS="${option#*=}" ;;
+        --with-benchmarks=*              ) WITH_BENCHMARKS="${option#*=}" ;;
         --with-python=*                  ) WITH_PYTHON="${option#*=}" ;;
         --with-templates-instantiation=* ) WITH_TEMPLATE_INSTANTIATION="${option#*=}" ;;
         --instantiate-long-int=*         ) INSTANTIATE_LONG_INT="${option#*=}" ;;
@@ -78,15 +82,15 @@ do
     esac
 done
 
-if test ${HELP} = "yes";
-then
+if [[ ${HELP} == "yes" ]]; then
     echo "TNL build options:"
     echo ""
     echo "   --build=Debug/Release                 Build type."
     echo "   --build-jobs=NUM                      Number of processes to be used for the build. It is set to the number of available CPU cores by default."
     echo "   --prefix=PATH                         Prefix for the installation directory. ${HOME}/local by default."
     echo "   --install=yes/no                      Enables the installation of TNL files."
-    echo "   --with-mpi=yes/no                     Enables MPI. 'no' by default (Intel Compiler required)."
+    echo "   --offline-build=yes/no                Disables online updates during the build. 'no' by default."
+    echo "   --with-mpi=yes/no                     Enables MPI. 'yes' by default (OpenMPI required)."
     echo "   --with-mic=yes/no                     Enables MIC (Intel Xeon Phi). 'no' by default (Intel Compiler required)."
     echo "   --with-cuda=yes/no                    Enables CUDA. 'yes' by default (CUDA Toolkit is required)."
     echo "   --with-cuda-arch=all/auto/30/35/...   Chooses CUDA architecture. 'auto' by default."
@@ -106,77 +110,113 @@ then
     exit 1
 fi
 
-if test ${WITH_CLANG} = "yes";
-then
+if [[ ${WITH_CLANG} == "yes" ]]; then
    export CXX=clang++
    export CC=clang
 fi
 
-if test ${WITH_MPI} = "yes";
-then
-    if ! [ -x  "$(command -v mpic++)" ]; then
-       echo "Warning:mpic++ is not installed on this system. MPI support is turned off." 
+if [[ ${WITH_MPI} == "yes" ]]; then
+    # NOTE: OpenMPI provides mpic++, but Intel MPI does not
+    if [[ ! -x  "$(command -v mpicxx)" ]]; then
+       echo "Warning: mpicxx is not installed on this system. MPI support is turned off."
     else
-       export CXX=mpic++
-       export CUDA_HOST_COMPILER=mpic++
+       # instruct OpenMPI to use the original compiler
+       # reference: https://www.open-mpi.org/faq/?category=mpi-apps#override-wrappers-after-v1.0
+       # FIXME: this does not work with CUDA_HOST_COMPILER=mpicxx
+#       if [ -n "$CXX" ]; then
+#          export OMPI_CXX="$CXX"
+#       fi
+       export CXX=mpicxx
+       export CUDA_HOST_COMPILER=mpicxx
     fi
-    if ! [ -x  "$(command -v mpicc)" ]; then
+    if [[ ! -x  "$(command -v mpicc)" ]]; then
        echo "Warning: mpicc is not installed on this system." 
     else
+       # instruct OpenMPI to use the original compiler
+       # reference: https://www.open-mpi.org/faq/?category=mpi-apps#override-wrappers-after-v1.0
+#       if [ -n "$CC" ]; then
+#          export OMPI_CC="$CC"
+#       fi
        export CC=mpicc
     fi
 fi
 
+if hash ninja 2>/dev/null; then
+   generator=Ninja
+   make=ninja
+   check_file="build.ninja"
+else
+   generator="Unix Makefiles"
+   make=make
+   check_file="Makefile"
+fi
 
-echo "Configuring ${BUILD} $TARGET ..."
-
-${CMAKE} ${ROOT_DIR} \
-         -DCMAKE_BUILD_TYPE=${BUILD} \
-         -DCMAKE_INSTALL_PREFIX=${PREFIX} \
-         -DOFFLINE_BUILD=${OFFLINE_BUILD} \
-         -DWITH_MIC=${WITH_MIC} \
-         -DWITH_CUDA=${WITH_CUDA} \
-         -DWITH_CUDA_ARCH=${WITH_CUDA_ARCH} \
-         -DWITH_OPENMP=${WITH_OPENMP} \
-         -DWITH_GMP=${WITH_GMP} \
-         -DWITH_TESTS=${WITH_TESTS} \
-         -DWITH_COVERAGE=${WITH_COVERAGE} \
-         -DWITH_EXAMPLES=${WITH_EXAMPLES} \
-         -DWITH_TOOLS=${WITH_TOOLS} \
-         -DWITH_PYTHON=${WITH_PYTHON} \
-         -DDCMTK_DIR=${DCMTK_DIR} \
-         -DWITH_TEMPLATE_INSTANTIATION=${WITH_TEMPLATE_INSTANTIATION} \
-         -DINSTANTIATE_FLOAT=${INSTANTIATE_FLOAT} \
-         -DINSTANTIATE_DOUBLE=${INSTANTIATE_DOUBLE} \
-         -DINSTANTIATE_LONG_DOUBLE=${INSTANTIATE_LONG_DOUBLE} \
-         -DINSTANTIATE_INT=${INSTANTIATE_INT} \
-         -DINSTANTIATE_LONG_INT=${INSTANTIATE_LONG_INT} \
+cmake_command=(
+   ${CMAKE} ${ROOT_DIR}
+         -G "${generator}"
+         -DCMAKE_BUILD_TYPE=${BUILD}
+         -DCMAKE_INSTALL_PREFIX=${PREFIX}
+         -DOFFLINE_BUILD=${OFFLINE_BUILD}
+         -DWITH_MIC=${WITH_MIC}
+         -DWITH_CUDA=${WITH_CUDA}
+         -DWITH_CUDA_ARCH=${WITH_CUDA_ARCH}
+         -DWITH_OPENMP=${WITH_OPENMP}
+         -DWITH_GMP=${WITH_GMP}
+         -DWITH_TESTS=${WITH_TESTS}
+         -DWITH_COVERAGE=${WITH_COVERAGE}
+         -DWITH_EXAMPLES=${WITH_EXAMPLES}
+         -DWITH_TOOLS=${WITH_TOOLS}
+         -DWITH_BENCHMARKS=${WITH_BENCHMARKS}
+         -DWITH_PYTHON=${WITH_PYTHON}
+         -DDCMTK_DIR=${DCMTK_DIR}
+         -DWITH_TEMPLATE_INSTANTIATION=${WITH_TEMPLATE_INSTANTIATION}
+         -DINSTANTIATE_FLOAT=${INSTANTIATE_FLOAT}
+         -DINSTANTIATE_DOUBLE=${INSTANTIATE_DOUBLE}
+         -DINSTANTIATE_LONG_DOUBLE=${INSTANTIATE_LONG_DOUBLE}
+         -DINSTANTIATE_INT=${INSTANTIATE_INT}
+         -DINSTANTIATE_LONG_INT=${INSTANTIATE_LONG_INT}
          -DOPTIMIZED_VECTOR_HOST_OPERATIONS=${OPTIMIZED_VECTOR_HOST_OPERATIONS}
+)
 
-if test $? != 0; then
-    echo "Error: cmake exited with error code."
-    exit 1
+# Skip running cmake if it was already run and the cmake command is the same.
+# The build system (e.g. make) will call it automatically if necessary (e.g.
+# when some CMakeLists.txt changes).
+if [[ -f ".cmake_command" ]]; then
+   last_cmake_command=$(cat ".cmake_command" 2>/dev/null)
+else
+   last_cmake_command=""
+fi
+if [[ ! -f "$check_file" ]] || [[ "$last_cmake_command" != "${cmake_command[@]}" ]]; then
+   echo "Configuring ${BUILD} $TARGET ..."
+   "${cmake_command[@]}"
+   echo -n "${cmake_command[@]}" > ".cmake_command"
 fi
 
-if test ${CMAKE_ONLY} = "yes";
-then
-    exit 0
+if [[ ${CMAKE_ONLY} == "yes" ]]; then
+   exit 0
 fi
 
-if [[ -n ${BUILD_JOBS} ]]; then
-    # override $MAKEFLAGS from parent environment
-    export MAKEFLAGS=-j${BUILD_JOBS}
-elif [[ -z ${MAKEFLAGS} ]]; then
-    # $BUILD_JOBS and $MAKEFLAGS are not set => set default value
-    BUILD_JOBS=$(grep "core id" /proc/cpuinfo | sort -u | wc -l)
-    export MAKEFLAGS=-j${BUILD_JOBS}
+if [[ "$make" == "make" ]]; then
+   if [[ -n ${BUILD_JOBS} ]]; then
+      # override $MAKEFLAGS from parent environment
+      export MAKEFLAGS=-j${BUILD_JOBS}
+   elif [[ -z ${MAKEFLAGS} ]]; then
+      # $BUILD_JOBS and $MAKEFLAGS are not set => set default value
+      BUILD_JOBS=$(grep "core id" /proc/cpuinfo | sort -u | wc -l)
+      export MAKEFLAGS=-j${BUILD_JOBS}
+   fi
+else
+   if [[ -z ${BUILD_JOBS} ]]; then
+      BUILD_JOBS=$(grep "core id" /proc/cpuinfo | sort -u | wc -l)
+   fi
+   make="$make -j$BUILD_JOBS"
 fi
 
 if [[ -n ${BUILD_JOBS} ]]; then
-    echo "Building ${BUILD} $TARGET using $BUILD_JOBS processors ..."
+   echo "Building ${BUILD} $TARGET using $BUILD_JOBS processors ..."
 else
-    # number of processors is unknown - it is encoded in $MAKEFLAGS from parent environment
-    echo "Building ${BUILD} $TARGET ..."
+   # number of processors is unknown - it is encoded in $MAKEFLAGS from parent environment
+   echo "Building ${BUILD} $TARGET ..."
 fi
 
 if [[ "$INSTALL" == "yes" ]]; then
@@ -186,19 +226,8 @@ else
    make_target="all"
 fi
 
-make ${VERBOSE} $make_target
-if test $? != 0; then
-    echo "Error: Build process failed."
-    exit 1
-fi
+$make ${VERBOSE} $make_target
 
-
-if test ${WITH_TESTS} = "yes";
-then
-    make test CTEST_OUTPUT_ON_FAILURE=1
-    if test $? != 0; then
-        echo "Error: Some test did not pass successfuly."
-    fi
+if [[ ${WITH_TESTS} == "yes" ]]; then
+   CTEST_OUTPUT_ON_FAILURE=1 $make test
 fi
-
-exit 0
diff --git a/examples/inviscid-flow-vl/CompressibleConservativeVariables.h b/examples/inviscid-flow-vl/CompressibleConservativeVariables.h
deleted file mode 100644
index a3afc845366f8df17b41c5affc5a4e49d5da052a..0000000000000000000000000000000000000000
--- a/examples/inviscid-flow-vl/CompressibleConservativeVariables.h
+++ /dev/null
@@ -1,147 +0,0 @@
-/***************************************************************************
-                          CompressibleConservativeVariables.h  -  description
-                             -------------------
-    begin                : Feb 12, 2017
-    copyright            : (C) 2017 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-
-#pragma once
-
-#include <TNL/Functions/MeshFunction.h>
-#include <TNL/Functions/VectorField.h>
-#include <TNL/SharedPointer.h>
-
-namespace TNL {
-
-template< typename Mesh >
-class CompressibleConservativeVariables
-{
-   public:
-      typedef Mesh MeshType;
-      static const int Dimensions = MeshType::getMeshDimension();
-      typedef typename MeshType::RealType RealType;
-      typedef typename MeshType::DeviceType DeviceType;
-      typedef typename MeshType::IndexType IndexType;
-      typedef Functions::MeshFunction< Mesh > MeshFunctionType;
-      typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshType > MeshPointer;      
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > MomentumFieldPointer;
-      
-      CompressibleConservativeVariables(){};
-      
-      CompressibleConservativeVariables( const MeshPointer& meshPointer )
-      : density( meshPointer ),
-        momentum( meshPointer ),
-        //pressure( meshPointer ),
-        energy( meshPointer ){};
-        
-      void setMesh( const MeshPointer& meshPointer )
-      {
-         this->density->setMesh( meshPointer );
-         this->momentum->setMesh( meshPointer );
-         //this->pressure.setMesh( meshPointer );
-         this->energy->setMesh( meshPointer );
-      }
-      
-      template< typename Vector >
-      void bind( const MeshPointer& meshPointer,
-                 const Vector& data,
-                 IndexType offset = 0 )
-      {
-         IndexType currentOffset( offset );
-         this->density->bind( meshPointer, data, currentOffset );
-         currentOffset += this->density->getDofs( meshPointer );
-         for( IndexType i = 0; i < Dimensions; i++ )
-         {
-            ( *this->momentum )[ i ]->bind( meshPointer, data, currentOffset );
-            currentOffset += ( *this->momentum )[ i ]->getDofs( meshPointer );
-         }
-         this->energy->bind( meshPointer, data, currentOffset );
-      }
-      
-      IndexType getDofs( const MeshPointer& meshPointer ) const
-      {
-         return this->density->getDofs( meshPointer ) + 
-            this->momentum->getDofs( meshPointer ) +
-            this->energy->getDofs( meshPointer );
-      }
-      
-      MeshFunctionPointer& getDensity()
-      {
-         return this->density;
-      }
-
-      const MeshFunctionPointer& getDensity() const
-      {
-         return this->density;
-      }
-      
-      void setDensity( MeshFunctionPointer& density )
-      {
-         this->density = density;
-      }
-      
-      MomentumFieldPointer& getMomentum()
-      {
-         return this->momentum;
-      }
-      
-      const MomentumFieldPointer& getMomentum() const
-      {
-         return this->momentum;
-      }
-      
-      void setMomentum( MomentumFieldPointer& momentum )
-      {
-         this->momentum = momentum;
-      }
-      
-      /*MeshFunctionPointer& getPressure()
-      {
-         return this->pressure;
-      }
-      
-      const MeshFunctionPointer& getPressure() const
-      {
-         return this->pressure;
-      }
-      
-      void setPressure( MeshFunctionPointer& pressure )
-      {
-         this->pressure = pressure;
-      }*/
-      
-      MeshFunctionPointer& getEnergy()
-      {
-         return this->energy;
-      }
-      
-      const MeshFunctionPointer& getEnergy() const
-      {
-         return this->energy;
-      }
-      
-      void setEnergy( MeshFunctionPointer& energy )
-      {
-         this->energy = energy;
-      }
-      
-      void getVelocityField( VelocityFieldType& velocityField )
-      {
-         
-      }
-
-   protected:
-      
-      MeshFunctionPointer density;
-      MomentumFieldPointer momentum;
-      MeshFunctionPointer energy;
-      
-};
-
-} // namespace TN
\ No newline at end of file
diff --git a/examples/inviscid-flow-vl/PhysicalVariablesGetter.h b/examples/inviscid-flow-vl/PhysicalVariablesGetter.h
deleted file mode 100644
index f1ba6bd1222b8653faeaac041606c101a071e188..0000000000000000000000000000000000000000
--- a/examples/inviscid-flow-vl/PhysicalVariablesGetter.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/***************************************************************************
-                          CompressibleConservativeVariables.h  -  description
-                             -------------------
-    begin                : Feb 12, 2017
-    copyright            : (C) 2017 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <TNL/SharedPointer.h>
-#include <TNL/Functions/MeshFunction.h>
-#include <TNL/Functions/VectorField.h>
-#include <TNL/Functions/MeshFunctionEvaluator.h>
-#include "CompressibleConservativeVariables.h"
-
-namespace TNL {
-   
-template< typename Mesh >
-class PhysicalVariablesGetter
-{
-   public:
-      
-      typedef Mesh MeshType;
-      typedef typename MeshType::RealType RealType;
-      typedef typename MeshType::DeviceType DeviceType;
-      typedef typename MeshType::IndexType IndexType;
-      static const int Dimensions = MeshType::getMeshDimension();
-      
-      typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef CompressibleConservativeVariables< MeshType > ConservativeVariablesType;
-      typedef SharedPointer< ConservativeVariablesType > ConservativeVariablesPointer;
-      typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
-      
-      class VelocityGetter : public Functions::Domain< Dimensions, Functions::MeshDomain >
-      {
-         public:
-            typedef typename MeshType::RealType RealType;
-            
-            VelocityGetter( MeshFunctionPointer density, 
-                            MeshFunctionPointer momentum )
-            : density( density ), momentum( momentum ) {}
-            
-            template< typename EntityType >
-            __cuda_callable__
-            RealType operator()( const EntityType& meshEntity,
-                                        const RealType& time = 0.0 ) const
-            {
-               if( density.template getData< DeviceType >()( meshEntity ) == 0.0 )
-                  return 0;
-               else
-                  return momentum.template getData< DeviceType >()( meshEntity ) / 
-                         density.template getData< DeviceType >()( meshEntity );
-            }
-            
-         protected:
-            const MeshFunctionPointer density, momentum;
-      };
-      
-      class PressureGetter : public Functions::Domain< Dimensions, Functions::MeshDomain >
-      {
-         public:
-            typedef typename MeshType::RealType RealType;
-            
-            PressureGetter( MeshFunctionPointer density,
-                            MeshFunctionPointer energy, 
-                            VelocityFieldPointer momentum,
-                            const RealType& gamma )
-            : density( density ), energy( energy ), momentum( momentum ), gamma( gamma ) {}
-            
-            template< typename EntityType >
-            __cuda_callable__
-            RealType operator()( const EntityType& meshEntity,
-                                 const RealType& time = 0.0 ) const
-            {
-               const RealType e = energy.template getData< DeviceType >()( meshEntity );
-               const RealType rho = density.template getData< DeviceType >()( meshEntity );
-               const RealType momentumNorm = momentum.template getData< DeviceType >().getVector( meshEntity ).lpNorm( 2.0 );
-               if( rho == 0.0 )
-                  return 0;
-               else
-                  return ( gamma - 1.0 ) * ( e - 0.5 * momentumNorm * momentumNorm / rho );
-            }
-            
-         protected:
-            const MeshFunctionPointer density, energy;
-            const VelocityFieldPointer momentum;
-            const RealType gamma;
-      };      
-
-      
-      void getVelocity( const ConservativeVariablesPointer& conservativeVariables,
-                        VelocityFieldPointer& velocity )
-      {
-         Functions::MeshFunctionEvaluator< MeshFunctionType, VelocityGetter > evaluator;
-         for( int i = 0; i < Dimensions; i++ )
-         {
-            SharedPointer< VelocityGetter, DeviceType > velocityGetter( conservativeVariables->getDensity(),
-                                                                        ( *conservativeVariables->getMomentum() )[ i ] );
-            evaluator.evaluate( ( *velocity )[ i ], velocityGetter );
-         }
-      }
-      
-      void getPressure( const ConservativeVariablesPointer& conservativeVariables,
-                        const RealType& gamma,
-                        MeshFunctionPointer& pressure )
-      {
-         Functions::MeshFunctionEvaluator< MeshFunctionType, PressureGetter > evaluator;
-         SharedPointer< PressureGetter, DeviceType > pressureGetter( conservativeVariables->getDensity(),
-                                                                     conservativeVariables->getEnergy(),
-                                                                     conservativeVariables->getMomentum(),
-                                                                     gamma );
-         evaluator.evaluate( pressure, pressureGetter );
-      }
-      
-};
-   
-} //namespace TNL
diff --git a/examples/inviscid-flow/CompressibleConservativeVariables.h b/examples/inviscid-flow/CompressibleConservativeVariables.h
deleted file mode 100644
index a3afc845366f8df17b41c5affc5a4e49d5da052a..0000000000000000000000000000000000000000
--- a/examples/inviscid-flow/CompressibleConservativeVariables.h
+++ /dev/null
@@ -1,147 +0,0 @@
-/***************************************************************************
-                          CompressibleConservativeVariables.h  -  description
-                             -------------------
-    begin                : Feb 12, 2017
-    copyright            : (C) 2017 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-
-#pragma once
-
-#include <TNL/Functions/MeshFunction.h>
-#include <TNL/Functions/VectorField.h>
-#include <TNL/SharedPointer.h>
-
-namespace TNL {
-
-template< typename Mesh >
-class CompressibleConservativeVariables
-{
-   public:
-      typedef Mesh MeshType;
-      static const int Dimensions = MeshType::getMeshDimension();
-      typedef typename MeshType::RealType RealType;
-      typedef typename MeshType::DeviceType DeviceType;
-      typedef typename MeshType::IndexType IndexType;
-      typedef Functions::MeshFunction< Mesh > MeshFunctionType;
-      typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshType > MeshPointer;      
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > MomentumFieldPointer;
-      
-      CompressibleConservativeVariables(){};
-      
-      CompressibleConservativeVariables( const MeshPointer& meshPointer )
-      : density( meshPointer ),
-        momentum( meshPointer ),
-        //pressure( meshPointer ),
-        energy( meshPointer ){};
-        
-      void setMesh( const MeshPointer& meshPointer )
-      {
-         this->density->setMesh( meshPointer );
-         this->momentum->setMesh( meshPointer );
-         //this->pressure.setMesh( meshPointer );
-         this->energy->setMesh( meshPointer );
-      }
-      
-      template< typename Vector >
-      void bind( const MeshPointer& meshPointer,
-                 const Vector& data,
-                 IndexType offset = 0 )
-      {
-         IndexType currentOffset( offset );
-         this->density->bind( meshPointer, data, currentOffset );
-         currentOffset += this->density->getDofs( meshPointer );
-         for( IndexType i = 0; i < Dimensions; i++ )
-         {
-            ( *this->momentum )[ i ]->bind( meshPointer, data, currentOffset );
-            currentOffset += ( *this->momentum )[ i ]->getDofs( meshPointer );
-         }
-         this->energy->bind( meshPointer, data, currentOffset );
-      }
-      
-      IndexType getDofs( const MeshPointer& meshPointer ) const
-      {
-         return this->density->getDofs( meshPointer ) + 
-            this->momentum->getDofs( meshPointer ) +
-            this->energy->getDofs( meshPointer );
-      }
-      
-      MeshFunctionPointer& getDensity()
-      {
-         return this->density;
-      }
-
-      const MeshFunctionPointer& getDensity() const
-      {
-         return this->density;
-      }
-      
-      void setDensity( MeshFunctionPointer& density )
-      {
-         this->density = density;
-      }
-      
-      MomentumFieldPointer& getMomentum()
-      {
-         return this->momentum;
-      }
-      
-      const MomentumFieldPointer& getMomentum() const
-      {
-         return this->momentum;
-      }
-      
-      void setMomentum( MomentumFieldPointer& momentum )
-      {
-         this->momentum = momentum;
-      }
-      
-      /*MeshFunctionPointer& getPressure()
-      {
-         return this->pressure;
-      }
-      
-      const MeshFunctionPointer& getPressure() const
-      {
-         return this->pressure;
-      }
-      
-      void setPressure( MeshFunctionPointer& pressure )
-      {
-         this->pressure = pressure;
-      }*/
-      
-      MeshFunctionPointer& getEnergy()
-      {
-         return this->energy;
-      }
-      
-      const MeshFunctionPointer& getEnergy() const
-      {
-         return this->energy;
-      }
-      
-      void setEnergy( MeshFunctionPointer& energy )
-      {
-         this->energy = energy;
-      }
-      
-      void getVelocityField( VelocityFieldType& velocityField )
-      {
-         
-      }
-
-   protected:
-      
-      MeshFunctionPointer density;
-      MomentumFieldPointer momentum;
-      MeshFunctionPointer energy;
-      
-};
-
-} // namespace TN
\ No newline at end of file
diff --git a/examples/inviscid-flow/LaxFridrichs.h b/examples/inviscid-flow/LaxFridrichs.h
deleted file mode 100644
index cdf32899f69eb797a6d9a18a52b84c09709867bf..0000000000000000000000000000000000000000
--- a/examples/inviscid-flow/LaxFridrichs.h
+++ /dev/null
@@ -1,141 +0,0 @@
-/***************************************************************************
-                          LaxFridrichs.h  -  description
-                             -------------------
-    begin                : Feb 18, 2017
-    copyright            : (C) 2017 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-
-#pragma once
-
-#include <TNL/Containers/Vector.h>
-#include <TNL/Meshes/Grid.h>
-#include <TNL/Functions/VectorField.h>
-
-#include "LaxFridrichsContinuity.h"
-#include "LaxFridrichsEnergy.h"
-#include "LaxFridrichsMomentumX.h"
-#include "LaxFridrichsMomentumY.h"
-#include "LaxFridrichsMomentumZ.h"
-
-namespace TNL {
-
-template< typename Mesh,
-          typename Real = typename Mesh::RealType,
-          typename Index = typename Mesh::IndexType >
-class LaxFridrichs
-{
-   public:
-      typedef Mesh MeshType;
-      typedef Real RealType;
-      typedef typename Mesh::DeviceType DeviceType;
-      typedef Index IndexType;
-      typedef Functions::MeshFunction< Mesh > MeshFunctionType;
-      static const int Dimensions = Mesh::getMeshDimension();
-      typedef Functions::VectorField< Dimensions, MeshFunctionType > VectorFieldType;
- 
-      typedef LaxFridrichsContinuity< Mesh, Real, Index > ContinuityOperatorType;
-      typedef LaxFridrichsMomentumX< Mesh, Real, Index > MomentumXOperatorType;
-      typedef LaxFridrichsMomentumY< Mesh, Real, Index > MomentumYOperatorType;
-      typedef LaxFridrichsMomentumZ< Mesh, Real, Index > MomentumZOperatorType;
-      typedef LaxFridrichsEnergy< Mesh, Real, Index > EnergyOperatorType;
-
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VectorFieldType > VectorFieldPointer;
-      typedef SharedPointer< MeshType > MeshPointer;
-      
-      typedef SharedPointer< ContinuityOperatorType > ContinuityOperatorPointer;
-      typedef SharedPointer< MomentumXOperatorType > MomentumXOperatorPointer;
-      typedef SharedPointer< MomentumYOperatorType > MomentumYOperatorPointer;      
-      typedef SharedPointer< MomentumZOperatorType > MomentumZOperatorPointer;      
-      typedef SharedPointer< EnergyOperatorType > EnergyOperatorPointer;
-
-      static void configSetup( Config::ConfigDescription& config,
-                               const String& prefix = "" )
-      {
-         config.addEntry< double >( prefix + "numerical-viscosity", "Value of artificial (numerical) viscosity in the Lax-Fridrichs scheme", 1.0 );
-      }
-      
-      LaxFridrichs()
-         : artificialViscosity( 1.0 ) {}
-      
-      bool setup( const MeshPointer& meshPointer,
-                  const Config::ParameterContainer& parameters,
-                  const String& prefix = "" )
-      {
-         this->artificialViscosity = parameters.getParameter< double >( prefix + "numerical-viscosity" );
-         this->continuityOperatorPointer->setArtificialViscosity( artificialViscosity );
-         this->momentumXOperatorPointer->setArtificialViscosity( artificialViscosity );
-         this->momentumYOperatorPointer->setArtificialViscosity( artificialViscosity );
-         this->momentumZOperatorPointer->setArtificialViscosity( artificialViscosity );
-         this->energyOperatorPointer->setArtificialViscosity( artificialViscosity );
-         
-         return true;
-      }
-      
-      void setTau( const RealType& tau )
-      {
-         this->continuityOperatorPointer->setTau( tau );
-         this->momentumXOperatorPointer->setTau( tau );
-         this->momentumYOperatorPointer->setTau( tau );
-         this->momentumZOperatorPointer->setTau( tau );
-         this->energyOperatorPointer->setTau( tau );
-      }
-      
-      void setPressure( const MeshFunctionPointer& pressure )
-      {
-         this->momentumXOperatorPointer->setPressure( pressure );
-         this->momentumYOperatorPointer->setPressure( pressure );
-         this->momentumZOperatorPointer->setPressure( pressure );
-         this->energyOperatorPointer->setPressure( pressure );
-      }
-      
-      void setVelocity( const VectorFieldPointer& velocity )
-      {
-         this->continuityOperatorPointer->setVelocity( velocity );
-         this->momentumXOperatorPointer->setVelocity( velocity );
-         this->momentumYOperatorPointer->setVelocity( velocity );
-         this->momentumZOperatorPointer->setVelocity( velocity );
-         this->energyOperatorPointer->setVelocity( velocity );
-      }
-      
-      const ContinuityOperatorPointer& getContinuityOperator() const
-      {
-         return this->continuityOperatorPointer;
-      }
-      
-      const MomentumXOperatorPointer& getMomentumXOperator() const
-      {
-         return this->momentumXOperatorPointer;
-      }
-
-      const MomentumYOperatorPointer& getMomentumYOperator() const
-      {
-         return this->momentumYOperatorPointer;
-      }
-      
-      const MomentumZOperatorPointer& getMomentumZOperator() const
-      {
-         return this->momentumZOperatorPointer;
-      }
-      
-      const EnergyOperatorPointer& getEnergyOperator() const
-      {
-         return this->energyOperatorPointer;
-      }
-
-   protected:
-      
-      ContinuityOperatorPointer continuityOperatorPointer;
-      MomentumXOperatorPointer momentumXOperatorPointer;
-      MomentumYOperatorPointer momentumYOperatorPointer;
-      MomentumZOperatorPointer momentumZOperatorPointer;
-      EnergyOperatorPointer energyOperatorPointer;  
-      
-      RealType artificialViscosity;
-};
-
-} //namespace TNL
diff --git a/examples/inviscid-flow/LaxFridrichsContinuity.h b/examples/inviscid-flow/LaxFridrichsContinuity.h
deleted file mode 100644
index 45ad4d52b12d402365a40cac043d5525e230cecb..0000000000000000000000000000000000000000
--- a/examples/inviscid-flow/LaxFridrichsContinuity.h
+++ /dev/null
@@ -1,288 +0,0 @@
-/***************************************************************************
-                          LaxFridrichsContinuity.h  -  description
-                             -------------------
-    begin                : Feb 17, 2017
-    copyright            : (C) 2017 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-
-#pragma once
-
-#include <TNL/Containers/Vector.h>
-#include <TNL/Meshes/Grid.h>
-#include <TNL/Functions/VectorField.h>
-#include <TNL/SharedPointer.h>
-
-namespace TNL {
-
-   
-template< typename Mesh,
-          typename Real = typename Mesh::RealType,
-          typename Index = typename Mesh::IndexType >
-class LaxFridrichsContinuityBase
-{
-   public:
-      
-      typedef Real RealType;
-      typedef Index IndexType;
-      typedef Mesh MeshType;
-      typedef typename MeshType::DeviceType DeviceType;
-      typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      static const int Dimensions = MeshType::getMeshDimension();
-      typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
-
-      LaxFridrichsContinuityBase()
-       : artificialViscosity( 1.0 ){};
-      
-      static String getType()
-      {
-         return String( "LaxFridrichsContinuity< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
-      void setTau(const Real& tau)
-      {
-          this->tau = tau;
-      };
-      
-      void setVelocity( const VelocityFieldPointer& velocity )
-      {
-          this->velocity = velocity;
-      };
-      
-      void setArtificialViscosity( const RealType& artificialViscosity )
-      {
-         this->artificialViscosity = artificialViscosity;
-      }
-
-
-      protected:
-         
-         RealType tau;
-         
-         VelocityFieldPointer velocity;
-         
-         RealType artificialViscosity;
-};
-
-   
-template< typename Mesh,
-          typename Real = typename Mesh::RealType,
-          typename Index = typename Mesh::IndexType >
-class LaxFridrichsContinuity
-{
-};
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-class LaxFridrichsContinuity< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index >
-   : public LaxFridrichsContinuityBase< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index >
-{
-   public:
-      typedef Meshes::Grid< 1, MeshReal, Device, MeshIndex > MeshType;
-      typedef LaxFridrichsContinuityBase< MeshType, Real, Index > BaseType;
-      
-      using typename BaseType::RealType;
-      using typename BaseType::IndexType;
-      using typename BaseType::DeviceType;
-      using typename BaseType::CoordinatesType;
-      using typename BaseType::MeshFunctionType;
-      using typename BaseType::VelocityFieldType;
-      using typename BaseType::VelocityFieldPointer;
-      using BaseType::Dimensions;
-
-      template< typename MeshFunction, typename MeshEntity >
-      __cuda_callable__
-      Real operator()( const MeshFunction& u,
-                       const MeshEntity& entity,
-                       const RealType& time = 0.0 ) const
-      {
-         static_assert( MeshEntity::getEntityDimension() == 1, "Wrong mesh entity dimensions." ); 
-         static_assert( MeshFunction::getEntitiesDimension() == 1, "Wrong preimage function" ); 
-         const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities(); 
-
-         const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1 >(); 
-         const IndexType& center = entity.getIndex(); 
-         const IndexType& east = neighborEntities.template getEntityIndex< 1 >(); 
-         const IndexType& west = neighborEntities.template getEntityIndex< -1 >();
-         const RealType& velocity_x_west = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ west ];
-         const RealType& velocity_x_east = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ east ];
-         return 1.0 / ( 2.0 * this->tau ) * this->artificialViscosity * ( u[ west ] - 2.0 * u[ center ]  + u[ east ] ) 
-               - 0.5 * ( u[ east ] * velocity_x_east - u[ west ] * velocity_x_west ) * hxInverse;
-      }
-
-      /*template< typename MeshEntity >
-      __cuda_callable__
-      Index getLinearSystemRowLength( const MeshType& mesh,
-                                      const IndexType& index,
-                                      const MeshEntity& entity ) const;
-
-      template< typename MeshEntity, typename Vector, typename MatrixRow >
-      __cuda_callable__
-      void updateLinearSystem( const RealType& time,
-                               const RealType& tau,
-                               const MeshType& mesh,
-                               const IndexType& index,
-                               const MeshEntity& entity,
-                               const MeshFunctionType& u,
-                               Vector& b,
-                               MatrixRow& matrixRow ) const;*/
-};
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-class LaxFridrichsContinuity< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index >
-   : public LaxFridrichsContinuityBase< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index >
-{
-   public:
-      typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType;
-      typedef LaxFridrichsContinuityBase< MeshType, Real, Index > BaseType;
-      
-      using typename BaseType::RealType;
-      using typename BaseType::IndexType;
-      using typename BaseType::DeviceType;
-      using typename BaseType::CoordinatesType;
-      using typename BaseType::MeshFunctionType;
-      using typename BaseType::VelocityFieldType;
-      using typename BaseType::VelocityFieldPointer;
-      using BaseType::Dimensions;      
-
-      template< typename MeshFunction, typename MeshEntity >
-      __cuda_callable__
-      Real operator()( const MeshFunction& u,
-                       const MeshEntity& entity,
-                       const RealType& time = 0.0 ) const
-      {
-         static_assert( MeshEntity::getEntityDimension() == 2, "Wrong mesh entity dimensions." ); 
-         static_assert( MeshFunction::getEntitiesDimension() == 2, "Wrong preimage function" ); 
-         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities(); 
-
-         //rho
-         const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1, 0 >(); 
-         const RealType& hyInverse = entity.getMesh().template getSpaceStepsProducts< 0, -1 >(); 
-         const IndexType& center = entity.getIndex(); 
-         const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0 >(); 
-         const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0 >(); 
-         const IndexType& north = neighborEntities.template getEntityIndex<  0,  1 >(); 
-         const IndexType& south = neighborEntities.template getEntityIndex<  0, -1 >();
-         const RealType& velocity_x_west = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ west ];
-         const RealType& velocity_x_east = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ east ];
-         const RealType& velocity_y_north = this->velocity.template getData< DeviceType >()[ 1 ].template getData< DeviceType >()[ north ];
-         const RealType& velocity_y_south = this->velocity.template getData< DeviceType >()[ 1 ].template getData< DeviceType >()[ south ];
-         
-         return 1.0 / ( 4.0 * this->tau ) * this->artificialViscosity * ( u[ west ] + u[ east ] + u[ south ] + u[ north ] - 4.0 * u[ center ] ) 
-                       - 0.5 * ( ( u[ east ] * velocity_x_east - u[ west ] * velocity_x_west ) * hxInverse
-                               + ( u[ north ] * velocity_y_north - u[ south ] * velocity_y_south ) * hyInverse );
-      }
-
-      /*template< typename MeshEntity >
-      __cuda_callable__
-      Index getLinearSystemRowLength( const MeshType& mesh,
-                                      const IndexType& index,
-                                      const MeshEntity& entity ) const;
-
-      template< typename MeshEntity, typename Vector, typename MatrixRow >
-      __cuda_callable__
-      void updateLinearSystem( const RealType& time,
-                               const RealType& tau,
-                               const MeshType& mesh,
-                               const IndexType& index,
-                               const MeshEntity& entity,
-                               const MeshFunctionType& u,
-                               Vector& b,
-                               MatrixRow& matrixRow ) const;*/
-};
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-class LaxFridrichsContinuity< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index >
-   : public LaxFridrichsContinuityBase< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index >
-{
-   public:
-      typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType;
-      typedef LaxFridrichsContinuityBase< MeshType, Real, Index > BaseType;
-      
-      using typename BaseType::RealType;
-      using typename BaseType::IndexType;
-      using typename BaseType::DeviceType;
-      using typename BaseType::CoordinatesType;
-      using typename BaseType::MeshFunctionType;
-      using typename BaseType::VelocityFieldType;
-      using typename BaseType::VelocityFieldPointer;
-      using BaseType::Dimensions;
-
-      template< typename MeshFunction, typename MeshEntity >
-      __cuda_callable__
-      Real operator()( const MeshFunction& u,
-                       const MeshEntity& entity,
-                       const RealType& time = 0.0 ) const
-      {
-         static_assert( MeshEntity::getEntityDimension() == 3, "Wrong mesh entity dimensions." ); 
-         static_assert( MeshFunction::getEntitiesDimension() == 3, "Wrong preimage function" ); 
-         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities(); 
-
-         //rho
-         const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1,  0,  0 >(); 
-         const RealType& hyInverse = entity.getMesh().template getSpaceStepsProducts<  0, -1,  0 >(); 
-         const RealType& hzInverse = entity.getMesh().template getSpaceStepsProducts<  0,  0, -1 >(); 
-         const IndexType& center = entity.getIndex(); 
-         const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0,  0 >(); 
-         const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0,  0 >(); 
-         const IndexType& north = neighborEntities.template getEntityIndex<  0,  1,  0 >(); 
-         const IndexType& south = neighborEntities.template getEntityIndex<  0, -1,  0 >();
-         const IndexType& up    = neighborEntities.template getEntityIndex<  0,  0,  1 >(); 
-         const IndexType& down  = neighborEntities.template getEntityIndex<  0,  0, -1 >();
-         
-         const RealType& velocity_x_west  = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ west ];
-         const RealType& velocity_x_east  = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ east ];
-         const RealType& velocity_y_north = this->velocity.template getData< DeviceType >()[ 1 ].template getData< DeviceType >()[ north ];
-         const RealType& velocity_y_south = this->velocity.template getData< DeviceType >()[ 1 ].template getData< DeviceType >()[ south ];
-         const RealType& velocity_z_up    = this->velocity.template getData< DeviceType >()[ 2 ].template getData< DeviceType >()[ up ];
-         const RealType& velocity_z_down  = this->velocity.template getData< DeviceType >()[ 2 ].template getData< DeviceType >()[ down ];
-         
-         return 1.0 / ( 6.0 * this->tau ) * this->artificialViscosity *
-                ( u[ west ] + u[ east ] + u[ south ] + u[ north ] + u[ up ] + u[ down ]- 6.0 * u[ center ] ) 
-                - 0.5 * ( ( u[ east ] * velocity_x_east - u[ west ] * velocity_x_west ) * hxInverse
-                        + ( u[ north ] * velocity_y_north - u[ south ] * velocity_y_south ) * hyInverse
-                        + ( u[ up ] * velocity_z_up - u[ down ] * velocity_z_down ) * hzInverse );
-         
-      }
-
-      /*template< typename MeshEntity >
-      __cuda_callable__
-      Index getLinearSystemRowLength( const MeshType& mesh,
-                                      const IndexType& index,
-                                      const MeshEntity& entity ) const;
-
-      template< typename MeshEntity, typename Vector, typename MatrixRow >
-      __cuda_callable__
-      void updateLinearSystem( const RealType& time,
-                               const RealType& tau,
-                               const MeshType& mesh,
-                               const IndexType& index,
-                               const MeshEntity& entity,
-                               const MeshFunctionType& u,
-                               Vector& b,
-                               MatrixRow& matrixRow ) const;*/
-};
-
-
-} //namespace TNL
diff --git a/examples/inviscid-flow/LaxFridrichsEnergy.h b/examples/inviscid-flow/LaxFridrichsEnergy.h
deleted file mode 100644
index 18c824762b8c677253dbd4e494be7ad3aea7e769..0000000000000000000000000000000000000000
--- a/examples/inviscid-flow/LaxFridrichsEnergy.h
+++ /dev/null
@@ -1,309 +0,0 @@
-/***************************************************************************
-                          LaxFridrichsEnergy.h  -  description
-                             -------------------
-    begin                : Feb 17, 2017
-    copyright            : (C) 2017 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <TNL/Containers/Vector.h>
-#include <TNL/Meshes/Grid.h>
-
-namespace TNL {
-   
-template< typename Mesh,
-          typename Real = typename Mesh::RealType,
-          typename Index = typename Mesh::IndexType >
-class LaxFridrichsEnergyBase
-{
-   public:
-      
-      typedef Real RealType;
-      typedef Index IndexType;
-      typedef Mesh MeshType;
-      typedef typename MeshType::DeviceType DeviceType;
-      typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      static const int Dimensions = MeshType::getMeshDimension();
-      typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
-      
-      LaxFridrichsEnergyBase()
-       : artificialViscosity( 1.0 ){};
-
-      static String getType()
-      {
-         return String( "LaxFridrichsEnergy< " ) +
-             MeshType::getType() + ", " +
-             TNL::getType< Real >() + ", " +
-             TNL::getType< Index >() + " >"; 
-      }
-
-      void setTau(const Real& tau)
-      {
-          this->tau = tau;
-      };
-      
-      void setVelocity( const VelocityFieldPointer& velocity )
-      {
-          this->velocity = velocity;
-      };
-      
-      void setPressure( const MeshFunctionPointer& pressure )
-      {
-          this->pressure = pressure;
-      };
-      
-      void setArtificialViscosity( const RealType& artificialViscosity )
-      {
-         this->artificialViscosity = artificialViscosity;
-      }      
-
-      protected:
-         
-         RealType tau;
-         
-         VelocityFieldPointer velocity;
-         
-         MeshFunctionPointer pressure;
-         
-         RealType artificialViscosity;
-};
-   
-template< typename Mesh,
-          typename Real = typename Mesh::RealType,
-          typename Index = typename Mesh::IndexType >
-class LaxFridrichsEnergy
-{
-};
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-class LaxFridrichsEnergy< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index >
-   : public LaxFridrichsEnergyBase< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index >
-{
-   public:
-
-      typedef Meshes::Grid< 1, MeshReal, Device, MeshIndex > MeshType;
-      typedef LaxFridrichsEnergyBase< MeshType, Real, Index > BaseType;
-      
-      using typename BaseType::RealType;
-      using typename BaseType::IndexType;
-      using typename BaseType::DeviceType;
-      using typename BaseType::CoordinatesType;
-      using typename BaseType::MeshFunctionType;
-      using typename BaseType::MeshFunctionPointer;
-      using typename BaseType::VelocityFieldType;
-      using typename BaseType::VelocityFieldPointer;
-      using BaseType::Dimensions;      
-      
-      template< typename MeshFunction, typename MeshEntity >
-      __cuda_callable__
-      Real operator()( const MeshFunction& e,
-                       const MeshEntity& entity,
-                       const RealType& time = 0.0 ) const
-      {
-         static_assert( MeshEntity::getEntityDimension() == 1, "Wrong mesh entity dimensions." ); 
-         static_assert( MeshFunction::getEntitiesDimension() == 1, "Wrong preimage function" ); 
-         const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities(); 
-
-         const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1 >(); 
-         const IndexType& center = entity.getIndex(); 
-         const IndexType& east = neighborEntities.template getEntityIndex< 1 >(); 
-         const IndexType& west = neighborEntities.template getEntityIndex< -1 >();
-         const RealType& pressure_west = this->pressure.template getData< DeviceType >()[ west ];
-         const RealType& pressure_east = this->pressure.template getData< DeviceType >()[ east ];
-         const RealType& velocity_x_east = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ east ];
-         const RealType& velocity_x_west = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ west ];
-         return 1.0 / ( 2.0 * this->tau ) * this->artificialViscosity * ( e[ west ] - 2.0 * e[ center ]  + e[ east ] ) 
-                - 0.5 * ( ( e[ east ] + pressure_east ) * velocity_x_east  
-                        - ( e[ west ] + pressure_west ) * velocity_x_west ) * hxInverse;
-  
-      }
-
-      /*template< typename MeshEntity >
-      __cuda_callable__
-      Index getLinearSystemRowLength( const MeshType& mesh,
-                                      const IndexType& index,
-                                      const MeshEntity& entity ) const;
-
-      template< typename MeshEntity, typename Vector, typename MatrixRow >
-      __cuda_callable__
-      void updateLinearSystem( const RealType& time,
-                               const RealType& tau,
-                               const MeshType& mesh,
-                               const IndexType& index,
-                               const MeshEntity& entity,
-                               const MeshFunctionType& u,
-                               Vector& b,
-                               MatrixRow& matrixRow ) const;*/
-};
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-class LaxFridrichsEnergy< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index >
-   : public LaxFridrichsEnergyBase< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index >
-{
-   public:
-      typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType;
-      typedef LaxFridrichsEnergyBase< MeshType, Real, Index > BaseType;
-      
-      using typename BaseType::RealType;
-      using typename BaseType::IndexType;
-      using typename BaseType::DeviceType;
-      using typename BaseType::CoordinatesType;
-      using typename BaseType::MeshFunctionType;
-      using typename BaseType::MeshFunctionPointer;
-      using typename BaseType::VelocityFieldType;
-      using typename BaseType::VelocityFieldPointer;
-      using BaseType::Dimensions;
-      
-
-      template< typename MeshFunction, typename MeshEntity >
-      __cuda_callable__
-      Real operator()( const MeshFunction& e,
-                       const MeshEntity& entity,
-                       const RealType& time = 0.0 ) const
-      {
-         static_assert( MeshEntity::getEntityDimension() == 2, "Wrong mesh entity dimensions." ); 
-         static_assert( MeshFunction::getEntitiesDimension() == 2, "Wrong preimage function" ); 
-         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities(); 
- 
-         const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1, 0 >(); 
-         const RealType& hyInverse = entity.getMesh().template getSpaceStepsProducts< 0, -1 >(); 
-         const IndexType& center = entity.getIndex(); 
-         const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0 >(); 
-         const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0 >(); 
-         const IndexType& north = neighborEntities.template getEntityIndex<  0,  1 >(); 
-         const IndexType& south = neighborEntities.template getEntityIndex<  0, -1 >();
-         const RealType& pressure_west = this->pressure.template getData< DeviceType >()[ west ];
-         const RealType& pressure_east = this->pressure.template getData< DeviceType >()[ east ];
-         const RealType& pressure_north = this->pressure.template getData< DeviceType >()[ north ];
-         const RealType& pressure_south = this->pressure.template getData< DeviceType >()[ south ];
-         const RealType& velocity_x_east = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ east ];
-         const RealType& velocity_x_west = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ west ];
-         const RealType& velocity_y_north = this->velocity.template getData< DeviceType >()[ 1 ].template getData< DeviceType >()[ north ];
-         const RealType& velocity_y_south = this->velocity.template getData< DeviceType >()[ 1 ].template getData< DeviceType >()[ south ];         
-         
-         return 1.0 / ( 4.0 * this->tau ) * this->artificialViscosity * ( e[ west ] + e[ east ] + e[ south ] + e[ north ] - 4.0 * e[ center ] ) 
-                - 0.5 * ( ( ( ( e[ east ] + pressure_east ) * velocity_x_east )
-                          -( ( e[ west ] + pressure_west ) * velocity_x_west ) ) * hxInverse
-                        + ( ( ( e[ north ] + pressure_north ) * velocity_y_north )
-                          -( ( e[ south ] + pressure_south ) * velocity_y_south ) ) * hyInverse );
-      }
-
-      /*template< typename MeshEntity >
-      __cuda_callable__
-      Index getLinearSystemRowLength( const MeshType& mesh,
-                                      const IndexType& index,
-                                      const MeshEntity& entity ) const;
-
-      template< typename MeshEntity, typename Vector, typename MatrixRow >
-      __cuda_callable__
-      void updateLinearSystem( const RealType& time,
-                               const RealType& tau,
-                               const MeshType& mesh,
-                               const IndexType& index,
-                               const MeshEntity& entity,
-                               const MeshFunctionType& u,
-                               Vector& b,
-                               MatrixRow& matrixRow ) const;*/
-};
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-class LaxFridrichsEnergy< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index >
-   : public LaxFridrichsEnergyBase< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index >
-{
-   public:
-      typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType;
-      typedef LaxFridrichsEnergyBase< MeshType, Real, Index > BaseType;
-      
-      using typename BaseType::RealType;
-      using typename BaseType::IndexType;
-      using typename BaseType::DeviceType;
-      using typename BaseType::CoordinatesType;
-      using typename BaseType::MeshFunctionType;
-      using typename BaseType::MeshFunctionPointer;
-      using typename BaseType::VelocityFieldType;
-      using typename BaseType::VelocityFieldPointer;
-      using BaseType::Dimensions;      
-
-      template< typename MeshFunction, typename MeshEntity >
-      __cuda_callable__
-      Real operator()( const MeshFunction& e,
-                       const MeshEntity& entity,
-                       const RealType& time = 0.0 ) const
-      {
-         static_assert( MeshEntity::getEntityDimension() == 3, "Wrong mesh entity dimensions." ); 
-         static_assert( MeshFunction::getEntitiesDimension() == 3, "Wrong preimage function" ); 
-         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities(); 
- 
-         const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1, 0,  0 >(); 
-         const RealType& hyInverse = entity.getMesh().template getSpaceStepsProducts< 0, -1,  0 >(); 
-         const RealType& hzInverse = entity.getMesh().template getSpaceStepsProducts< 0,  0, -1 >(); 
-         const IndexType& center = entity.getIndex(); 
-         const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0,  0 >(); 
-         const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0,  0 >(); 
-         const IndexType& north = neighborEntities.template getEntityIndex<  0,  1,  0 >(); 
-         const IndexType& south = neighborEntities.template getEntityIndex<  0, -1,  0 >();
-         const IndexType& up    = neighborEntities.template getEntityIndex<  0,  0,  1 >(); 
-         const IndexType& down  = neighborEntities.template getEntityIndex<  0,  0, -1 >();
-         
-         const RealType& pressure_west  = this->pressure.template getData< DeviceType >()[ west ];
-         const RealType& pressure_east  = this->pressure.template getData< DeviceType >()[ east ];
-         const RealType& pressure_north = this->pressure.template getData< DeviceType >()[ north ];
-         const RealType& pressure_south = this->pressure.template getData< DeviceType >()[ south ];
-         const RealType& pressure_up    = this->pressure.template getData< DeviceType >()[ up ];
-         const RealType& pressure_down  = this->pressure.template getData< DeviceType >()[ down ];
-         
-         const RealType& velocity_x_east  = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ east ];
-         const RealType& velocity_x_west  = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ west ];
-         const RealType& velocity_y_north = this->velocity.template getData< DeviceType >()[ 1 ].template getData< DeviceType >()[ north ];
-         const RealType& velocity_y_south = this->velocity.template getData< DeviceType >()[ 1 ].template getData< DeviceType >()[ south ];
-         const RealType& velocity_z_up    = this->velocity.template getData< DeviceType >()[ 2 ].template getData< DeviceType >()[ up ];
-         const RealType& velocity_z_down  = this->velocity.template getData< DeviceType >()[ 2 ].template getData< DeviceType >()[ down ];         
-         
-         return 1.0 / ( 6.0 * this->tau ) * this->artificialViscosity *
-                 ( e[ west ] + e[ east ] + e[ south ] + e[ north ] + e[ up ] + e[ down ] - 6.0 * e[ center ] ) 
-                - 0.5 * ( ( ( ( e[ east ] + pressure_east ) * velocity_x_east )
-                           -( ( e[ west ] + pressure_west ) * velocity_x_west ) ) * hxInverse
-                        + ( ( ( e[ north ] + pressure_north ) * velocity_y_north )
-                           -( ( e[ south ] + pressure_south ) * velocity_y_south ) ) * hyInverse
-                        + ( ( ( e[ up ] + pressure_up ) * velocity_z_up )
-                           -( ( e[ down ] + pressure_down ) * velocity_z_down ) ) * hzInverse );
-      }
-
-      /*template< typename MeshEntity >
-      __cuda_callable__
-      Index getLinearSystemRowLength( const MeshType& mesh,
-                                      const IndexType& index,
-                                      const MeshEntity& entity ) const;
-
-      template< typename MeshEntity, typename Vector, typename MatrixRow >
-      __cuda_callable__
-      void updateLinearSystem( const RealType& time,
-                               const RealType& tau,
-                               const MeshType& mesh,
-                               const IndexType& index,
-                               const MeshEntity& entity,
-                               const MeshFunctionType& u,
-                               Vector& b,
-                               MatrixRow& matrixRow ) const;*/
-};
-
-} //namespace TNL
diff --git a/examples/inviscid-flow/LaxFridrichsMomentumBase.h b/examples/inviscid-flow/LaxFridrichsMomentumBase.h
deleted file mode 100644
index 67dae9fdf8256cecf032a731dd5d616d715ca0fe..0000000000000000000000000000000000000000
--- a/examples/inviscid-flow/LaxFridrichsMomentumBase.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/***************************************************************************
-                          LaxFridrichsMomentumBase.h  -  description
-                             -------------------
-    begin                : Feb 17, 2017
-    copyright            : (C) 2017 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-
-#pragma once
-
-namespace TNL {
-
-template< typename Mesh,
-          typename Real = typename Mesh::RealType,
-          typename Index = typename Mesh::IndexType >
-class LaxFridrichsMomentumBase
-{
-   public:
-      
-      typedef Real RealType;
-      typedef Index IndexType;
-      typedef Mesh MeshType;
-      typedef typename MeshType::DeviceType DeviceType;
-      typedef typename MeshType::CoordinatesType CoordinatesType;
-      typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      static const int Dimensions = MeshType::getMeshDimension();
-      typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
-      
-      LaxFridrichsMomentumBase()
-       : artificialViscosity( 1.0 ){};
-
-      void setTau(const Real& tau)
-      {
-          this->tau = tau;
-      };
-      
-      void setVelocity( const VelocityFieldPointer& velocity )
-      {
-          this->velocity = velocity;
-      };
-      
-      void setPressure( const MeshFunctionPointer& pressure )
-      {
-          this->pressure = pressure;
-      };
-
-      void setArtificialViscosity( const RealType& artificialViscosity )
-      {
-         this->artificialViscosity = artificialViscosity;
-      }
-
-      protected:
-         
-         RealType tau;
-         
-         VelocityFieldPointer velocity;
-         
-         MeshFunctionPointer pressure;
-         
-         RealType artificialViscosity;
-};
-
-} //namespace TNL
diff --git a/examples/inviscid-flow/PhysicalVariablesGetter.h b/examples/inviscid-flow/PhysicalVariablesGetter.h
deleted file mode 100644
index f1ba6bd1222b8653faeaac041606c101a071e188..0000000000000000000000000000000000000000
--- a/examples/inviscid-flow/PhysicalVariablesGetter.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/***************************************************************************
-                          CompressibleConservativeVariables.h  -  description
-                             -------------------
-    begin                : Feb 12, 2017
-    copyright            : (C) 2017 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <TNL/SharedPointer.h>
-#include <TNL/Functions/MeshFunction.h>
-#include <TNL/Functions/VectorField.h>
-#include <TNL/Functions/MeshFunctionEvaluator.h>
-#include "CompressibleConservativeVariables.h"
-
-namespace TNL {
-   
-template< typename Mesh >
-class PhysicalVariablesGetter
-{
-   public:
-      
-      typedef Mesh MeshType;
-      typedef typename MeshType::RealType RealType;
-      typedef typename MeshType::DeviceType DeviceType;
-      typedef typename MeshType::IndexType IndexType;
-      static const int Dimensions = MeshType::getMeshDimension();
-      
-      typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef CompressibleConservativeVariables< MeshType > ConservativeVariablesType;
-      typedef SharedPointer< ConservativeVariablesType > ConservativeVariablesPointer;
-      typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
-      
-      class VelocityGetter : public Functions::Domain< Dimensions, Functions::MeshDomain >
-      {
-         public:
-            typedef typename MeshType::RealType RealType;
-            
-            VelocityGetter( MeshFunctionPointer density, 
-                            MeshFunctionPointer momentum )
-            : density( density ), momentum( momentum ) {}
-            
-            template< typename EntityType >
-            __cuda_callable__
-            RealType operator()( const EntityType& meshEntity,
-                                        const RealType& time = 0.0 ) const
-            {
-               if( density.template getData< DeviceType >()( meshEntity ) == 0.0 )
-                  return 0;
-               else
-                  return momentum.template getData< DeviceType >()( meshEntity ) / 
-                         density.template getData< DeviceType >()( meshEntity );
-            }
-            
-         protected:
-            const MeshFunctionPointer density, momentum;
-      };
-      
-      class PressureGetter : public Functions::Domain< Dimensions, Functions::MeshDomain >
-      {
-         public:
-            typedef typename MeshType::RealType RealType;
-            
-            PressureGetter( MeshFunctionPointer density,
-                            MeshFunctionPointer energy, 
-                            VelocityFieldPointer momentum,
-                            const RealType& gamma )
-            : density( density ), energy( energy ), momentum( momentum ), gamma( gamma ) {}
-            
-            template< typename EntityType >
-            __cuda_callable__
-            RealType operator()( const EntityType& meshEntity,
-                                 const RealType& time = 0.0 ) const
-            {
-               const RealType e = energy.template getData< DeviceType >()( meshEntity );
-               const RealType rho = density.template getData< DeviceType >()( meshEntity );
-               const RealType momentumNorm = momentum.template getData< DeviceType >().getVector( meshEntity ).lpNorm( 2.0 );
-               if( rho == 0.0 )
-                  return 0;
-               else
-                  return ( gamma - 1.0 ) * ( e - 0.5 * momentumNorm * momentumNorm / rho );
-            }
-            
-         protected:
-            const MeshFunctionPointer density, energy;
-            const VelocityFieldPointer momentum;
-            const RealType gamma;
-      };      
-
-      
-      void getVelocity( const ConservativeVariablesPointer& conservativeVariables,
-                        VelocityFieldPointer& velocity )
-      {
-         Functions::MeshFunctionEvaluator< MeshFunctionType, VelocityGetter > evaluator;
-         for( int i = 0; i < Dimensions; i++ )
-         {
-            SharedPointer< VelocityGetter, DeviceType > velocityGetter( conservativeVariables->getDensity(),
-                                                                        ( *conservativeVariables->getMomentum() )[ i ] );
-            evaluator.evaluate( ( *velocity )[ i ], velocityGetter );
-         }
-      }
-      
-      void getPressure( const ConservativeVariablesPointer& conservativeVariables,
-                        const RealType& gamma,
-                        MeshFunctionPointer& pressure )
-      {
-         Functions::MeshFunctionEvaluator< MeshFunctionType, PressureGetter > evaluator;
-         SharedPointer< PressureGetter, DeviceType > pressureGetter( conservativeVariables->getDensity(),
-                                                                     conservativeVariables->getEnergy(),
-                                                                     conservativeVariables->getMomentum(),
-                                                                     gamma );
-         evaluator.evaluate( pressure, pressureGetter );
-      }
-      
-};
-   
-} //namespace TNL
diff --git a/share/CMakeLists.txt b/share/CMakeLists.txt
index 3559626d0dd91df5cdf0affcc0cea967bd5a2e21..2a6f286925ef2a17b6cd7adc902fdefbc930fb39 100644
--- a/share/CMakeLists.txt
+++ b/share/CMakeLists.txt
@@ -1 +1,2 @@
-add_subdirectory( Tools )
+add_subdirectory (cmake)
+add_subdirectory (pkgconfig)
diff --git a/share/Tools/CMakeLists.txt b/share/Tools/CMakeLists.txt
deleted file mode 100644
index 2a6f286925ef2a17b6cd7adc902fdefbc930fb39..0000000000000000000000000000000000000000
--- a/share/Tools/CMakeLists.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-add_subdirectory (cmake)
-add_subdirectory (pkgconfig)
diff --git a/share/Tools/cmake/CMakeFindTNL.cmake b/share/cmake/CMakeFindTNL.cmake
similarity index 100%
rename from share/Tools/cmake/CMakeFindTNL.cmake
rename to share/cmake/CMakeFindTNL.cmake
diff --git a/share/Tools/cmake/CMakeLists.txt b/share/cmake/CMakeLists.txt
similarity index 100%
rename from share/Tools/cmake/CMakeLists.txt
rename to share/cmake/CMakeLists.txt
diff --git a/share/Tools/pkgconfig/CMakeLists.txt b/share/pkgconfig/CMakeLists.txt
similarity index 100%
rename from share/Tools/pkgconfig/CMakeLists.txt
rename to share/pkgconfig/CMakeLists.txt
diff --git a/share/Tools/pkgconfig/cuda.pc.in b/share/pkgconfig/cuda.pc.in
similarity index 100%
rename from share/Tools/pkgconfig/cuda.pc.in
rename to share/pkgconfig/cuda.pc.in
diff --git a/share/Tools/pkgconfig/tnl-cuda.pc.in b/share/pkgconfig/tnl-cuda.pc.in
similarity index 100%
rename from share/Tools/pkgconfig/tnl-cuda.pc.in
rename to share/pkgconfig/tnl-cuda.pc.in
diff --git a/share/Tools/pkgconfig/tnl-openmp.pc.in b/share/pkgconfig/tnl-openmp.pc.in
similarity index 100%
rename from share/Tools/pkgconfig/tnl-openmp.pc.in
rename to share/pkgconfig/tnl-openmp.pc.in
diff --git a/share/Tools/pkgconfig/tnl.pc.in b/share/pkgconfig/tnl.pc.in
similarity index 100%
rename from share/Tools/pkgconfig/tnl.pc.in
rename to share/pkgconfig/tnl.pc.in
diff --git a/src/Benchmarks/BLAS/CMakeLists.txt b/src/Benchmarks/BLAS/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc396c46e60512b5cab5f5f7455756f609b76c47
--- /dev/null
+++ b/src/Benchmarks/BLAS/CMakeLists.txt
@@ -0,0 +1,10 @@
+if( BUILD_CUDA )
+    CUDA_ADD_EXECUTABLE( tnl-benchmark-blas tnl-benchmark-blas.cu )
+    CUDA_ADD_CUBLAS_TO_TARGET( tnl-benchmark-blas )
+    TARGET_LINK_LIBRARIES( tnl-benchmark-blas tnl )
+else()
+    ADD_EXECUTABLE( tnl-benchmark-blas tnl-benchmark-blas.cpp )
+    TARGET_LINK_LIBRARIES( tnl-benchmark-blas tnl )
+endif()
+
+install( TARGETS tnl-benchmark-blas RUNTIME DESTINATION bin )
diff --git a/src/Benchmarks/BLAS/array-operations.h b/src/Benchmarks/BLAS/array-operations.h
new file mode 100644
index 0000000000000000000000000000000000000000..aacdb9cc65315af377ddec49c167a9a197483bd5
--- /dev/null
+++ b/src/Benchmarks/BLAS/array-operations.h
@@ -0,0 +1,165 @@
+/***************************************************************************
+                          array-operations.h  -  description
+                             -------------------
+    begin                : Dec 30, 2015
+    copyright            : (C) 2015 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// Implemented by: Jakub Klinkovsky
+
+#pragma once
+
+#include "../Benchmarks.h"
+
+#include <TNL/Containers/Array.h>
+
+namespace TNL {
+namespace Benchmarks {
+
+template< typename Real = double,
+          typename Index = int >
+bool
+benchmarkArrayOperations( Benchmark & benchmark,
+                          const int & loops,
+                          const long & size )
+{
+   typedef Containers::Array< Real, Devices::Host, Index > HostArray;
+   typedef Containers::Array< Real, Devices::Cuda, Index > CudaArray;
+   using namespace std;
+
+   double datasetSize = ( double ) ( loops * size ) * sizeof( Real ) / oneGB;
+
+   HostArray hostArray, hostArray2;
+   CudaArray deviceArray, deviceArray2;
+   hostArray.setSize( size );
+   hostArray2.setSize( size );
+#ifdef HAVE_CUDA
+   deviceArray.setSize( size );
+   deviceArray2.setSize( size );
+#endif
+
+   Real resultHost, resultDevice;
+
+
+   // reset functions
+   auto reset1 = [&]() {
+      hostArray.setValue( 1.0 );
+#ifdef HAVE_CUDA
+      deviceArray.setValue( 1.0 );
+#endif
+   };
+   auto reset2 = [&]() {
+      hostArray2.setValue( 1.0 );
+#ifdef HAVE_CUDA
+      deviceArray2.setValue( 1.0 );
+#endif
+   };
+   auto reset12 = [&]() {
+      reset1();
+      reset2();
+   };
+
+
+   reset12();
+
+
+   auto compareHost = [&]() {
+      resultHost = (int) hostArray == hostArray2;
+   };
+   auto compareCuda = [&]() {
+      resultDevice = (int) deviceArray == deviceArray2;
+   };
+   benchmark.setOperation( "comparison (operator==)", 2 * datasetSize );
+   benchmark.time( reset1, "CPU", compareHost );
+#ifdef HAVE_CUDA
+   benchmark.time( reset1, "GPU", compareCuda );
+#endif
+
+
+   auto copyAssignHostHost = [&]() {
+      hostArray = hostArray2;
+   };
+   auto copyAssignCudaCuda = [&]() {
+      deviceArray = deviceArray2;
+   };
+   benchmark.setOperation( "copy (operator=)", 2 * datasetSize );
+   // copyBasetime is used later inside HAVE_CUDA guard, so the compiler will
+   // complain when compiling without CUDA
+   const double copyBasetime = benchmark.time( reset1, "CPU", copyAssignHostHost );
+#ifdef HAVE_CUDA
+   benchmark.time( reset1, "GPU", copyAssignCudaCuda );
+#endif
+
+
+   auto copyAssignHostCuda = [&]() {
+      deviceArray = hostArray;
+   };
+   auto copyAssignCudaHost = [&]() {
+      hostArray = deviceArray;
+   };
+#ifdef HAVE_CUDA
+   benchmark.setOperation( "copy (operator=)", datasetSize, copyBasetime );
+   benchmark.time( reset1,
+                   "CPU->GPU", copyAssignHostCuda,
+                   "GPU->CPU", copyAssignCudaHost );
+#endif
+
+
+   auto setValueHost = [&]() {
+      hostArray.setValue( 3.0 );
+   };
+   auto setValueCuda = [&]() {
+      deviceArray.setValue( 3.0 );
+   };
+   benchmark.setOperation( "setValue", datasetSize );
+   benchmark.time( reset1, "CPU", setValueHost );
+#ifdef HAVE_CUDA
+   benchmark.time( reset1, "GPU", setValueCuda );
+#endif
+
+
+   auto setSizeHost = [&]() {
+      hostArray.setSize( size );
+   };
+   auto setSizeCuda = [&]() {
+      deviceArray.setSize( size );
+   };
+   auto resetSize1 = [&]() {
+      hostArray.reset();
+#ifdef HAVE_CUDA
+      deviceArray.reset();
+#endif
+   };
+   benchmark.setOperation( "allocation (setSize)", datasetSize );
+   benchmark.time( resetSize1, "CPU", setSizeHost );
+#ifdef HAVE_CUDA
+   benchmark.time( resetSize1, "GPU", setSizeCuda );
+#endif
+
+
+   auto resetSizeHost = [&]() {
+      hostArray.reset();
+   };
+   auto resetSizeCuda = [&]() {
+      deviceArray.reset();
+   };
+   auto setSize1 = [&]() {
+      hostArray.setSize( size );
+#ifdef HAVE_CUDA
+      deviceArray.setSize( size );
+#endif
+   };
+   benchmark.setOperation( "deallocation (reset)", datasetSize );
+   benchmark.time( setSize1, "CPU", resetSizeHost );
+#ifdef HAVE_CUDA
+   benchmark.time( setSize1, "GPU", resetSizeCuda );
+#endif
+
+   return true;
+}
+
+} // namespace Benchmarks
+} // namespace TNL
diff --git a/tests/benchmarks/cublasWrappers.h b/src/Benchmarks/BLAS/cublasWrappers.h
similarity index 74%
rename from tests/benchmarks/cublasWrappers.h
rename to src/Benchmarks/BLAS/cublasWrappers.h
index 6b71a4ed7befc12664440cf5425f5975e6feec0c..1e63e139d6faa513706ed1b18a207e87ea1a079d 100644
--- a/tests/benchmarks/cublasWrappers.h
+++ b/src/Benchmarks/BLAS/cublasWrappers.h
@@ -8,14 +8,14 @@ inline cublasStatus_t
 cublasIgamax( cublasHandle_t handle, int n,
               const float           *x, int incx, int *result )
 {
-    return cublasIsamax( handle, n, x, incx, result );
+   return cublasIsamax( handle, n, x, incx, result );
 }
 
 inline cublasStatus_t
 cublasIgamax( cublasHandle_t handle, int n,
               const double          *x, int incx, int *result )
 {
-    return cublasIdamax( handle, n, x, incx, result );
+   return cublasIdamax( handle, n, x, incx, result );
 }
 
 
@@ -23,14 +23,14 @@ inline cublasStatus_t
 cublasIgamin( cublasHandle_t handle, int n,
               const float           *x, int incx, int *result )
 {
-    return cublasIsamin( handle, n, x, incx, result );
+   return cublasIsamin( handle, n, x, incx, result );
 }
 
 inline cublasStatus_t
 cublasIgamin( cublasHandle_t handle, int n,
               const double          *x, int incx, int *result )
 {
-    return cublasIdamin( handle, n, x, incx, result );
+   return cublasIdamin( handle, n, x, incx, result );
 }
 
 
@@ -38,14 +38,14 @@ inline cublasStatus_t
 cublasGasum( cublasHandle_t handle, int n,
              const float           *x, int incx, float  *result )
 {
-    return cublasSasum( handle, n, x, incx, result );
+   return cublasSasum( handle, n, x, incx, result );
 }
 
 inline cublasStatus_t
 cublasGasum( cublasHandle_t handle, int n,
              const double          *x, int incx, double *result )
 {
-    return cublasDasum( handle, n, x, incx, result );
+   return cublasDasum( handle, n, x, incx, result );
 }
 
 
@@ -55,7 +55,7 @@ cublasGaxpy( cublasHandle_t handle, int n,
              const float           *x, int incx,
              float                 *y, int incy )
 {
-    return cublasSaxpy( handle, n, alpha, x, incx, y, incy );
+   return cublasSaxpy( handle, n, alpha, x, incx, y, incy );
 }
 
 inline cublasStatus_t
@@ -64,7 +64,7 @@ cublasGaxpy( cublasHandle_t handle, int n,
              const double          *x, int incx,
              double                *y, int incy )
 {
-    return cublasDaxpy( handle, n, alpha, x, incx, y, incy );
+   return cublasDaxpy( handle, n, alpha, x, incx, y, incy );
 }
 
 
@@ -74,7 +74,7 @@ cublasGdot( cublasHandle_t handle, int n,
             const float        *y, int incy,
             float         *result )
 {
-    return cublasSdot( handle, n, x, incx, y, incy, result );
+   return cublasSdot( handle, n, x, incx, y, incy, result );
 }
 
 inline cublasStatus_t
@@ -83,7 +83,7 @@ cublasGdot( cublasHandle_t handle, int n,
             const double       *y, int incy,
             double        *result )
 {
-    return cublasDdot( handle, n, x, incx, y, incy, result );
+   return cublasDdot( handle, n, x, incx, y, incy, result );
 }
 
 
@@ -91,14 +91,14 @@ inline cublasStatus_t
 cublasGnrm2( cublasHandle_t handle, int n,
              const float           *x, int incx, float  *result )
 {
-    return cublasSnrm2( handle, n, x, incx, result );
+   return cublasSnrm2( handle, n, x, incx, result );
 }
 
 inline cublasStatus_t
 cublasGnrm2( cublasHandle_t handle, int n,
              const double          *x, int incx, double *result )
 {
-    return cublasDnrm2( handle, n, x, incx, result );
+   return cublasDnrm2( handle, n, x, incx, result );
 }
 
 
@@ -107,7 +107,7 @@ cublasGscal( cublasHandle_t handle, int n,
              const float           *alpha,
              float           *x, int incx )
 {
-    return cublasSscal( handle, n, alpha, x, incx );
+   return cublasSscal( handle, n, alpha, x, incx );
 }
 
 inline cublasStatus_t
@@ -115,7 +115,7 @@ cublasGscal( cublasHandle_t handle, int n,
              const double          *alpha,
              double          *x, int incx )
 {
-    return cublasDscal( handle, n, alpha, x, incx );
+   return cublasDscal( handle, n, alpha, x, incx );
 }
 
 #endif
diff --git a/src/Benchmarks/BLAS/spmv.h b/src/Benchmarks/BLAS/spmv.h
new file mode 100644
index 0000000000000000000000000000000000000000..a6840af9f8c1f1dedeee876fd2bca258a55c64ba
--- /dev/null
+++ b/src/Benchmarks/BLAS/spmv.h
@@ -0,0 +1,189 @@
+/***************************************************************************
+                          spmv.h  -  description
+                             -------------------
+    begin                : Dec 30, 2015
+    copyright            : (C) 2015 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// Implemented by: Jakub Klinkovsky
+
+#pragma once
+
+#include "../Benchmarks.h"
+
+#include <TNL/Containers/List.h>
+#include <TNL/Pointers/DevicePointer.h>
+#include <TNL/Matrices/CSR.h>
+#include <TNL/Matrices/Ellpack.h>
+#include <TNL/Matrices/SlicedEllpack.h>
+#include <TNL/Matrices/ChunkedEllpack.h>
+
+namespace TNL {
+namespace Benchmarks {
+
+// silly alias to match the number of template parameters with other formats
+template< typename Real, typename Device, typename Index >
+using SlicedEllpack = Matrices::SlicedEllpack< Real, Device, Index >;
+
+template< typename Matrix >
+int setHostTestMatrix( Matrix& matrix,
+                       const int elementsPerRow )
+{
+   const int size = matrix.getRows();
+   int elements( 0 );
+   for( int row = 0; row < size; row++ ) {
+      int col = row - elementsPerRow / 2;
+      for( int element = 0; element < elementsPerRow; element++ ) {
+         if( col + element >= 0 &&
+            col + element < size )
+         {
+            matrix.setElement( row, col + element, element + 1 );
+            elements++;
+         }
+      }
+   }
+   return elements;
+}
+
+#ifdef HAVE_CUDA
+template< typename Matrix >
+__global__ void setCudaTestMatrixKernel( Matrix* matrix,
+                                         const int elementsPerRow,
+                                         const int gridIdx )
+{
+   const int rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   if( rowIdx >= matrix->getRows() )
+      return;
+   int col = rowIdx - elementsPerRow / 2;
+   for( int element = 0; element < elementsPerRow; element++ ) {
+      if( col + element >= 0 &&
+         col + element < matrix->getColumns() )
+         matrix->setElementFast( rowIdx, col + element, element + 1 );
+   }
+}
+#endif
+
+template< typename Matrix >
+void setCudaTestMatrix( Matrix& matrix,
+                        const int elementsPerRow )
+{
+#ifdef HAVE_CUDA
+   typedef typename Matrix::IndexType IndexType;
+   typedef typename Matrix::RealType RealType;
+   Pointers::DevicePointer< Matrix > kernel_matrix( matrix );
+   dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
+   const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
+   const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
+   for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) {
+      if( gridIdx == cudaGrids - 1 )
+         cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
+      setCudaTestMatrixKernel< Matrix >
+         <<< cudaGridSize, cudaBlockSize >>>
+         ( &kernel_matrix.template modifyData< Devices::Cuda >(), elementsPerRow, gridIdx );
+        TNL_CHECK_CUDA_DEVICE;
+   }
+#endif
+}
+
+
+// TODO: rename as benchmark_SpMV_synthetic and move to spmv-synthetic.h
+template< typename Real,
+          template< typename, typename, typename > class Matrix,
+          template< typename, typename, typename > class Vector = Containers::Vector >
+bool
+benchmarkSpMV( Benchmark & benchmark,
+               const int & loops,
+               const int & size,
+               const int elementsPerRow = 5 )
+{
+   typedef Matrix< Real, Devices::Host, int > HostMatrix;
+   typedef Matrix< Real, Devices::Cuda, int > DeviceMatrix;
+   typedef Containers::Vector< Real, Devices::Host, int > HostVector;
+   typedef Containers::Vector< Real, Devices::Cuda, int > CudaVector;
+
+   HostMatrix hostMatrix;
+   DeviceMatrix deviceMatrix;
+   Containers::Vector< int, Devices::Host, int > hostRowLengths;
+   Containers::Vector< int, Devices::Cuda, int > deviceRowLengths;
+   HostVector hostVector, hostVector2;
+   CudaVector deviceVector, deviceVector2;
+
+   // create benchmark group
+   Containers::List< String > parsedType;
+   parseObjectType( HostMatrix::getType(), parsedType );
+   benchmark.createHorizontalGroup( parsedType[ 0 ], 2 );
+
+   hostRowLengths.setSize( size );
+   hostMatrix.setDimensions( size, size );
+   hostVector.setSize( size );
+   hostVector2.setSize( size );
+#ifdef HAVE_CUDA
+   deviceRowLengths.setSize( size );
+   deviceMatrix.setDimensions( size, size );
+   deviceVector.setSize( size );
+   deviceVector2.setSize( size );
+#endif
+
+   hostRowLengths.setValue( elementsPerRow );
+#ifdef HAVE_CUDA
+   deviceRowLengths.setValue( elementsPerRow );
+#endif
+
+   hostMatrix.setCompressedRowLengths( hostRowLengths );
+#ifdef HAVE_CUDA
+   deviceMatrix.setCompressedRowLengths( deviceRowLengths );
+#endif
+
+   const int elements = setHostTestMatrix< HostMatrix >( hostMatrix, elementsPerRow );
+   setCudaTestMatrix< DeviceMatrix >( deviceMatrix, elementsPerRow );
+   const double datasetSize = ( double ) loops * elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
+
+   // reset function
+   auto reset = [&]() {
+      hostVector.setValue( 1.0 );
+      hostVector2.setValue( 0.0 );
+#ifdef HAVE_CUDA
+      deviceVector.setValue( 1.0 );
+      deviceVector2.setValue( 0.0 );
+#endif
+   };
+
+   // compute functions
+   auto spmvHost = [&]() {
+      hostMatrix.vectorProduct( hostVector, hostVector2 );
+   };
+   auto spmvCuda = [&]() {
+      deviceMatrix.vectorProduct( deviceVector, deviceVector2 );
+   };
+
+   benchmark.setOperation( datasetSize );
+   benchmark.time( reset, "CPU", spmvHost );
+#ifdef HAVE_CUDA
+   benchmark.time( reset, "GPU", spmvCuda );
+#endif
+
+   return true;
+}
+
+template< typename Real = double,
+          typename Index = int >
+bool
+benchmarkSpmvSynthetic( Benchmark & benchmark,
+                        const int & loops,
+                        const int & size,
+                        const int & elementsPerRow )
+{
+   bool result = true;
+   // TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats)
+   result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, loops, size, elementsPerRow );
+   result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, loops, size, elementsPerRow );
+   result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, loops, size, elementsPerRow );
+   result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, loops, size, elementsPerRow );
+   return result;
+}
+
+} // namespace Benchmarks
+} // namespace TNL
diff --git a/tests/benchmarks/tnl-benchmark-blas.cpp b/src/Benchmarks/BLAS/tnl-benchmark-blas.cpp
similarity index 100%
rename from tests/benchmarks/tnl-benchmark-blas.cpp
rename to src/Benchmarks/BLAS/tnl-benchmark-blas.cpp
diff --git a/tests/benchmarks/tnl-benchmark-blas.cu b/src/Benchmarks/BLAS/tnl-benchmark-blas.cu
similarity index 100%
rename from tests/benchmarks/tnl-benchmark-blas.cu
rename to src/Benchmarks/BLAS/tnl-benchmark-blas.cu
diff --git a/src/Benchmarks/BLAS/tnl-benchmark-blas.h b/src/Benchmarks/BLAS/tnl-benchmark-blas.h
new file mode 100644
index 0000000000000000000000000000000000000000..73ea0b375a23440105ea6dfbae599c63fb108adb
--- /dev/null
+++ b/src/Benchmarks/BLAS/tnl-benchmark-blas.h
@@ -0,0 +1,192 @@
+/***************************************************************************
+                          tnl-benchmark-blas.h  -  description
+                             -------------------
+    begin                : Jan 27, 2010
+    copyright            : (C) 2010 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// Implemented by: Jakub Klinkovsky
+
+#pragma once
+
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/CudaDeviceInfo.h>
+#include <TNL/Devices/SystemInfo.h>
+#include <TNL/Config/ConfigDescription.h>
+#include <TNL/Config/ParameterContainer.h>
+
+#include "array-operations.h"
+#include "vector-operations.h"
+#include "spmv.h"
+
+using namespace TNL;
+using namespace TNL::Benchmarks;
+
+
+// TODO: should benchmarks check the result of the computation?
+
+
+template< typename Real >
+void
+runBlasBenchmarks( Benchmark & benchmark,
+                   Benchmark::MetadataMap metadata,
+                   const std::size_t & minSize,
+                   const std::size_t & maxSize,
+                   const double & sizeStepFactor,
+                   const unsigned & loops,
+                   const unsigned & elementsPerRow )
+{
+   const String precision = getType< Real >();
+   metadata["precision"] = precision;
+
+   // Array operations
+   benchmark.newBenchmark( String("Array operations (") + precision + ")",
+                           metadata );
+   for( std::size_t size = minSize; size <= maxSize; size *= 2 ) {
+      benchmark.setMetadataColumns( Benchmark::MetadataColumns({
+         {"size", size},
+      } ));
+      benchmarkArrayOperations< Real >( benchmark, loops, size );
+   }
+
+   // Vector operations
+   benchmark.newBenchmark( String("Vector operations (") + precision + ")",
+                           metadata );
+   for( std::size_t size = minSize; size <= maxSize; size *= sizeStepFactor ) {
+      benchmark.setMetadataColumns( Benchmark::MetadataColumns({
+         {"size", size},
+      } ));
+      benchmarkVectorOperations< Real >( benchmark, loops, size );
+   }
+
+   // Sparse matrix-vector multiplication
+   benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")",
+                           metadata );
+   for( std::size_t size = minSize; size <= maxSize; size *= 2 ) {
+      benchmark.setMetadataColumns( Benchmark::MetadataColumns({
+         {"rows", size},
+         {"columns", size},
+         {"elements per row", elementsPerRow},
+      } ));
+      benchmarkSpmvSynthetic< Real >( benchmark, loops, size, elementsPerRow );
+   }
+}
+
+void
+setupConfig( Config::ConfigDescription & config )
+{
+   config.addDelimiter( "Benchmark settings:" );
+   config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-blas.log");
+   config.addEntry< String >( "output-mode", "Mode for opening the log file.", "overwrite" );
+   config.addEntryEnum( "append" );
+   config.addEntryEnum( "overwrite" );
+   config.addEntry< String >( "precision", "Precision of the arithmetics.", "double" );
+   config.addEntryEnum( "float" );
+   config.addEntryEnum( "double" );
+   config.addEntryEnum( "all" );
+   config.addEntry< int >( "min-size", "Minimum size of arrays/vectors used in the benchmark.", 100000 );
+   config.addEntry< int >( "max-size", "Minimum size of arrays/vectors used in the benchmark.", 10000000 );
+   config.addEntry< int >( "size-step-factor", "Factor determining the size of arrays/vectors used in the benchmark. First size is min-size and each following size is stepFactor*previousSize, up to max-size.", 2 );
+   config.addEntry< int >( "loops", "Number of iterations for every computation.", 10 );
+   config.addEntry< int >( "elements-per-row", "Number of elements per row of the sparse matrix used in the matrix-vector multiplication benchmark.", 5 );
+   config.addEntry< int >( "verbose", "Verbose mode.", 1 );
+
+   config.addDelimiter( "Device settings:" );
+   Devices::Host::configSetup( config );
+   Devices::Cuda::configSetup( config );
+}
+
+int
+main( int argc, char* argv[] )
+{
+   Config::ParameterContainer parameters;
+   Config::ConfigDescription conf_desc;
+
+   setupConfig( conf_desc );
+
+   if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) {
+      conf_desc.printUsage( argv[ 0 ] );
+      return 1;
+   }
+
+   Devices::Host::setup( parameters );
+   Devices::Cuda::setup( parameters );
+
+   const String & logFileName = parameters.getParameter< String >( "log-file" );
+   const String & outputMode = parameters.getParameter< String >( "output-mode" );
+   const String & precision = parameters.getParameter< String >( "precision" );
+   // FIXME: getParameter< std::size_t >() does not work with parameters added with addEntry< int >(),
+   // which have a default value. The workaround below works for int values, but it is not possible
+   // to pass 64-bit integer values
+//   const std::size_t minSize = parameters.getParameter< std::size_t >( "min-size" );
+//   const std::size_t maxSize = parameters.getParameter< std::size_t >( "max-size" );
+   const std::size_t minSize = parameters.getParameter< int >( "min-size" );
+   const std::size_t maxSize = parameters.getParameter< int >( "max-size" );
+   const unsigned sizeStepFactor = parameters.getParameter< unsigned >( "size-step-factor" );
+   const unsigned loops = parameters.getParameter< unsigned >( "loops" );
+   const unsigned elementsPerRow = parameters.getParameter< unsigned >( "elements-per-row" );
+   const unsigned verbose = parameters.getParameter< unsigned >( "verbose" );
+
+   if( sizeStepFactor <= 1 ) {
+       std::cerr << "The value of --size-step-factor must be greater than 1." << std::endl;
+       return EXIT_FAILURE;
+   }
+
+   // open log file
+   auto mode = std::ios::out;
+   if( outputMode == "append" )
+       mode |= std::ios::app;
+   std::ofstream logFile( logFileName.getString(), mode );
+
+   // init benchmark and common metadata
+   Benchmark benchmark( loops, verbose );
+
+   // prepare global metadata
+   const int cpu_id = 0;
+   Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id );
+   String cacheInfo = String( cacheSizes.L1data ) + ", "
+                       + String( cacheSizes.L1instruction ) + ", "
+                       + String( cacheSizes.L2 ) + ", "
+                       + String( cacheSizes.L3 );
+#ifdef HAVE_CUDA
+   const int activeGPU = Devices::CudaDeviceInfo::getActiveDevice();
+   const String deviceArch = String( Devices::CudaDeviceInfo::getArchitectureMajor( activeGPU ) ) + "." +
+                             String( Devices::CudaDeviceInfo::getArchitectureMinor( activeGPU ) );
+#endif
+   Benchmark::MetadataMap metadata {
+      { "host name", Devices::SystemInfo::getHostname() },
+      { "architecture", Devices::SystemInfo::getArchitecture() },
+      { "system", Devices::SystemInfo::getSystemName() },
+      { "system release", Devices::SystemInfo::getSystemRelease() },
+      { "start time", Devices::SystemInfo::getCurrentTime() },
+      { "CPU model name", Devices::SystemInfo::getCPUModelName( cpu_id ) },
+      { "CPU cores", Devices::SystemInfo::getNumberOfCores( cpu_id ) },
+      { "CPU threads per core", Devices::SystemInfo::getNumberOfThreads( cpu_id ) / Devices::SystemInfo::getNumberOfCores( cpu_id ) },
+      { "CPU max frequency (MHz)", Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 },
+      { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo },
+#ifdef HAVE_CUDA
+      { "GPU name", Devices::CudaDeviceInfo::getDeviceName( activeGPU ) },
+      { "GPU architecture", deviceArch },
+      { "GPU CUDA cores", Devices::CudaDeviceInfo::getCudaCores( activeGPU ) },
+      { "GPU clock rate (MHz)", (double) Devices::CudaDeviceInfo::getClockRate( activeGPU ) / 1e3 },
+      { "GPU global memory (GB)", (double) Devices::CudaDeviceInfo::getGlobalMemory( activeGPU ) / 1e9 },
+      { "GPU memory clock rate (MHz)", (double) Devices::CudaDeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 },
+      { "GPU memory ECC enabled", Devices::CudaDeviceInfo::getECCEnabled( activeGPU ) },
+#endif
+   };
+
+   if( precision == "all" || precision == "float" )
+      runBlasBenchmarks< float >( benchmark, metadata, minSize, maxSize, sizeStepFactor, loops, elementsPerRow );
+   if( precision == "all" || precision == "double" )
+      runBlasBenchmarks< double >( benchmark, metadata, minSize, maxSize, sizeStepFactor, loops, elementsPerRow );
+
+   if( ! benchmark.save( logFile ) ) {
+      std::cerr << "Failed to write the benchmark results to file '" << parameters.getParameter< String >( "log-file" ) << "'." << std::endl;
+      return EXIT_FAILURE;
+   }
+
+   return EXIT_SUCCESS;
+}
diff --git a/src/Benchmarks/BLAS/vector-operations.h b/src/Benchmarks/BLAS/vector-operations.h
new file mode 100644
index 0000000000000000000000000000000000000000..e65f8980b1066e042206e328d15b50e32c81432f
--- /dev/null
+++ b/src/Benchmarks/BLAS/vector-operations.h
@@ -0,0 +1,440 @@
+/***************************************************************************
+                          vector-operations.h  -  description
+                             -------------------
+    begin                : Dec 30, 2015
+    copyright            : (C) 2015 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// Implemented by: Jakub Klinkovsky
+
+#pragma once
+
+#include <stdlib.h> // srand48
+
+#include "../Benchmarks.h"
+
+#include <TNL/Containers/Vector.h>
+
+#ifdef HAVE_CUDA
+#include "cublasWrappers.h"
+#endif
+
+namespace TNL {
+namespace Benchmarks {
+
+template< typename Real = double,
+          typename Index = int >
+bool
+benchmarkVectorOperations( Benchmark & benchmark,
+                           const int & loops,
+                           const long & size )
+{
+   typedef Containers::Vector< Real, Devices::Host, Index > HostVector;
+   typedef Containers::Vector< Real, Devices::Cuda, Index > CudaVector;
+   using namespace std;
+
+   double datasetSize = ( double ) ( loops * size ) * sizeof( Real ) / oneGB;
+
+   HostVector hostVector, hostVector2;
+   CudaVector deviceVector, deviceVector2;
+   hostVector.setSize( size );
+   hostVector2.setSize( size );
+#ifdef HAVE_CUDA
+   deviceVector.setSize( size );
+   deviceVector2.setSize( size );
+#endif
+
+   Real resultHost, resultDevice;
+
+#ifdef HAVE_CUDA
+   cublasHandle_t cublasHandle;
+   cublasCreate( &cublasHandle );
+#endif
+
+
+   // reset functions
+   // (Make sure to always use some in benchmarks, even if it's not necessary
+   // to assure correct result - it helps to clear cache and avoid optimizations
+   // of the benchmark loop.)
+   auto reset1 = [&]() {
+      hostVector.setValue( 1.0 );
+#ifdef HAVE_CUDA
+      deviceVector.setValue( 1.0 );
+#endif
+      // A relatively harmless call to keep the compiler from realizing we
+      // don't actually do any useful work with the result of the reduciton.
+      srand48(resultHost);
+      resultHost = resultDevice = 0.0;
+   };
+   auto reset2 = [&]() {
+      hostVector2.setValue( 1.0 );
+#ifdef HAVE_CUDA
+      deviceVector2.setValue( 1.0 );
+#endif
+   };
+   auto reset12 = [&]() {
+      reset1();
+      reset2();
+   };
+
+
+   reset12();
+
+
+   auto maxHost = [&]() {
+      resultHost = hostVector.max();
+   };
+   auto maxHostGeneral = [&]() {
+      Real result( 0 );
+      Containers::Algorithms::ParallelReductionMax< Real > operation;
+      Containers::Algorithms::Reduction< Devices::Host >::reduce(
+              operation,
+              hostVector.getSize(),
+              hostVector.getData(),
+              ( Real* ) 0,
+              result );
+      return result;
+   };
+   auto maxCuda = [&]() {
+      resultDevice = deviceVector.max();
+   };
+   benchmark.setOperation( "max", datasetSize );
+   benchmark.time( reset1, "CPU", maxHost );
+   benchmark.time( reset1, "CPU (general)", maxHostGeneral );
+#ifdef HAVE_CUDA
+   benchmark.time( reset1, "GPU", maxCuda );
+#endif
+
+
+   auto minHost = [&]() {
+      resultHost = hostVector.min();
+   };
+   auto minHostGeneral = [&]() {
+      Real result( 0 );
+      Containers::Algorithms::ParallelReductionMin< Real > operation;
+      Containers::Algorithms::Reduction< Devices::Host >::reduce(
+              operation,
+              hostVector.getSize(),
+              hostVector.getData(),
+              ( Real* ) 0,
+              result );
+      return result;
+   };
+   auto minCuda = [&]() {
+      resultDevice = deviceVector.min();
+   };
+   benchmark.setOperation( "min", datasetSize );
+   benchmark.time( reset1, "CPU", minHost );
+   benchmark.time( reset1, "CPU (general)", minHostGeneral );
+#ifdef HAVE_CUDA
+   benchmark.time( reset1, "GPU", minCuda );
+#endif
+
+
+   auto absMaxHost = [&]() {
+      resultHost = hostVector.absMax();
+   };
+   auto absMaxHostGeneral = [&]() {
+      Real result( 0 );
+      Containers::Algorithms::ParallelReductionAbsMax< Real > operation;
+      Containers::Algorithms::Reduction< Devices::Host >::reduce(
+              operation,
+              hostVector.getSize(),
+              hostVector.getData(),
+              ( Real* ) 0,
+              result );
+      return result;
+   };
+   auto absMaxCuda = [&]() {
+      resultDevice = deviceVector.absMax();
+   };
+#ifdef HAVE_CUDA
+   auto absMaxCublas = [&]() {
+      int index = 0;
+      cublasIgamax( cublasHandle, size,
+                    deviceVector.getData(), 1,
+                    &index );
+      resultDevice = deviceVector.getElement( index );
+   };
+#endif
+   benchmark.setOperation( "absMax", datasetSize );
+   benchmark.time( reset1, "CPU", absMaxHost );
+   benchmark.time( reset1, "CPU (general)", absMaxHostGeneral );
+#ifdef HAVE_CUDA
+   benchmark.time( reset1, "GPU", absMaxCuda );
+   benchmark.time( reset1, "cuBLAS", absMaxCublas );
+#endif
+
+
+   auto absMinHost = [&]() {
+      resultHost = hostVector.absMin();
+   };
+   auto absMinHostGeneral = [&]() {
+      Real result( 0 );
+      Containers::Algorithms::ParallelReductionAbsMin< Real > operation;
+      Containers::Algorithms::Reduction< Devices::Host >::reduce(
+              operation,
+              hostVector.getSize(),
+              hostVector.getData(),
+              ( Real* ) 0,
+              result );
+      return result;
+   };
+   auto absMinCuda = [&]() {
+      resultDevice = deviceVector.absMin();
+   };
+#ifdef HAVE_CUDA
+   auto absMinCublas = [&]() {
+      int index = 0;
+      cublasIgamin( cublasHandle, size,
+                    deviceVector.getData(), 1,
+                    &index );
+      resultDevice = deviceVector.getElement( index );
+   };
+#endif
+   benchmark.setOperation( "absMin", datasetSize );
+   benchmark.time( reset1, "CPU", absMinHost );
+   benchmark.time( reset1, "CPU (general)", absMinHostGeneral );
+#ifdef HAVE_CUDA
+   benchmark.time( reset1, "GPU", absMinCuda );
+   benchmark.time( reset1, "cuBLAS", absMinCublas );
+#endif
+
+
+   auto sumHost = [&]() {
+      resultHost = hostVector.sum();
+   };
+   auto sumHostGeneral = [&]() {
+      Real result( 0 );
+      Containers::Algorithms::ParallelReductionSum< Real > operation;
+      Containers::Algorithms::Reduction< Devices::Host >::reduce(
+              operation,
+              hostVector.getSize(),
+              hostVector.getData(),
+              ( Real* ) 0,
+              result );
+      return result;
+   };
+   auto sumCuda = [&]() {
+      resultDevice = deviceVector.sum();
+   };
+   benchmark.setOperation( "sum", datasetSize );
+   benchmark.time( reset1, "CPU", sumHost );
+   benchmark.time( reset1, "CPU (general)", sumHostGeneral );
+#ifdef HAVE_CUDA
+   benchmark.time( reset1, "GPU", sumCuda );
+#endif
+
+
+   auto l1normHost = [&]() {
+      resultHost = hostVector.lpNorm( 1.0 );
+   };
+   auto l1normHostGeneral = [&]() {
+      Real result( 0 );
+      Containers::Algorithms::ParallelReductionAbsSum< Real > operation;
+      Containers::Algorithms::Reduction< Devices::Host >::reduce(
+              operation,
+              hostVector.getSize(),
+              hostVector.getData(),
+              ( Real* ) 0,
+              result );
+      return result;
+   };
+   auto l1normCuda = [&]() {
+      resultDevice = deviceVector.lpNorm( 1.0 );
+   };
+#ifdef HAVE_CUDA
+   auto l1normCublas = [&]() {
+      cublasGasum( cublasHandle, size,
+                   deviceVector.getData(), 1,
+                   &resultDevice );
+   };
+#endif
+   benchmark.setOperation( "l1 norm", datasetSize );
+   benchmark.time( reset1, "CPU", l1normHost );
+   benchmark.time( reset1, "CPU (general)", l1normHostGeneral );
+#ifdef HAVE_CUDA
+   benchmark.time( reset1, "GPU", l1normCuda );
+   benchmark.time( reset1, "cuBLAS", l1normCublas );
+#endif
+
+
+   auto l2normHost = [&]() {
+      resultHost = hostVector.lpNorm( 2.0 );
+   };
+   auto l2normHostGeneral = [&]() {
+      Real result( 0 );
+      Containers::Algorithms::ParallelReductionL2Norm< Real > operation;
+      Containers::Algorithms::Reduction< Devices::Host >::reduce(
+              operation,
+              hostVector.getSize(),
+              hostVector.getData(),
+              ( Real* ) 0,
+              result );
+      return result;
+   };
+   auto l2normCuda = [&]() {
+      resultDevice = deviceVector.lpNorm( 2.0 );
+   };
+#ifdef HAVE_CUDA
+   auto l2normCublas = [&]() {
+      cublasGnrm2( cublasHandle, size,
+                   deviceVector.getData(), 1,
+                   &resultDevice );
+   };
+#endif
+   benchmark.setOperation( "l2 norm", datasetSize );
+   benchmark.time( reset1, "CPU", l2normHost );
+   benchmark.time( reset1, "CPU (general)", l2normHostGeneral );
+#ifdef HAVE_CUDA
+   benchmark.time( reset1, "GPU", l2normCuda );
+   benchmark.time( reset1, "cuBLAS", l2normCublas );
+#endif
+
+
+   auto l3normHost = [&]() {
+      resultHost = hostVector.lpNorm( 3.0 );
+   };
+   auto l3normHostGeneral = [&]() {
+      Real result( 0 );
+      Containers::Algorithms::ParallelReductionLpNorm< Real > operation;
+      operation.setPower( 3.0 );
+      Containers::Algorithms::Reduction< Devices::Host >::reduce(
+              operation,
+              hostVector.getSize(),
+              hostVector.getData(),
+              ( Real* ) 0,
+              result );
+      return result;
+   };
+   auto l3normCuda = [&]() {
+      resultDevice = deviceVector.lpNorm( 3.0 );
+   };
+   benchmark.setOperation( "l3 norm", datasetSize );
+   benchmark.time( reset1, "CPU", l3normHost );
+   benchmark.time( reset1, "CPU (general)", l3normHostGeneral );
+#ifdef HAVE_CUDA
+   benchmark.time( reset1, "GPU", l3normCuda );
+#endif
+
+
+   auto scalarProductHost = [&]() {
+      resultHost = hostVector.scalarProduct( hostVector2 );
+   };
+   auto scalarProductHostGeneral = [&]() {
+      Real result( 0 );
+      Containers::Algorithms::ParallelReductionScalarProduct< Real, Real > operation;
+      Containers::Algorithms::Reduction< Devices::Host >::reduce(
+              operation,
+              hostVector.getSize(),
+              hostVector.getData(),
+              hostVector2.getData(),
+              result );
+      return result;
+   };
+   auto scalarProductCuda = [&]() {
+      resultDevice = deviceVector.scalarProduct( deviceVector2 );
+   };
+#ifdef HAVE_CUDA
+   auto scalarProductCublas = [&]() {
+      cublasGdot( cublasHandle, size,
+                  deviceVector.getData(), 1,
+                  deviceVector2.getData(), 1,
+                  &resultDevice );
+   };
+#endif
+   benchmark.setOperation( "scalar product", 2 * datasetSize );
+   benchmark.time( reset1, "CPU", scalarProductHost );
+   benchmark.time( reset1, "CPU (general)", scalarProductHostGeneral );
+#ifdef HAVE_CUDA
+   benchmark.time( reset1, "GPU", scalarProductCuda );
+   benchmark.time( reset1, "cuBLAS", scalarProductCublas );
+#endif
+
+   /*
+   std::cout << "Benchmarking prefix-sum:" << std::endl;
+   timer.reset();
+   timer.start();
+   hostVector.computePrefixSum();
+   timer.stop();
+   timeHost = timer.getTime();
+   bandwidth = 2 * datasetSize / loops / timer.getTime();
+   std::cout << "  CPU: bandwidth: " << bandwidth << " GB/sec, time: " << timer.getTime() << " sec." << std::endl;
+
+   timer.reset();
+   timer.start();
+   deviceVector.computePrefixSum();
+   timer.stop();
+   timeDevice = timer.getTime();
+   bandwidth = 2 * datasetSize / loops / timer.getTime();
+   std::cout << "  GPU: bandwidth: " << bandwidth << " GB/sec, time: " << timer.getTime() << " sec." << std::endl;
+   std::cout << "  CPU/GPU speedup: " << timeHost / timeDevice << std::endl;
+
+   HostVector auxHostVector;
+   auxHostVector.setLike( deviceVector );
+   auxHostVector = deviceVector;
+   for( int i = 0; i < size; i++ )
+      if( hostVector.getElement( i ) != auxHostVector.getElement( i ) )
+      {
+         std::cerr << "Error in prefix sum at position " << i << ":  " << hostVector.getElement( i ) << " != " << auxHostVector.getElement( i ) << std::endl;
+      }
+   */
+
+
+   auto multiplyHost = [&]() {
+      hostVector *= 0.5;
+   };
+   auto multiplyCuda = [&]() {
+      deviceVector *= 0.5;
+   };
+#ifdef HAVE_CUDA
+   auto multiplyCublas = [&]() {
+      const Real alpha = 0.5;
+      cublasGscal( cublasHandle, size,
+                   &alpha,
+                   deviceVector.getData(), 1 );
+   };
+#endif
+   benchmark.setOperation( "scalar multiplication", 2 * datasetSize );
+   benchmark.time( reset1, "CPU", multiplyHost );
+#ifdef HAVE_CUDA
+   benchmark.time( reset1, "GPU", multiplyCuda );
+   benchmark.time( reset1, "cuBLAS", multiplyCublas );
+#endif
+
+
+   auto addVectorHost = [&]() {
+      hostVector.addVector( hostVector2 );
+   };
+   auto addVectorCuda = [&]() {
+      deviceVector.addVector( deviceVector2 );
+   };
+#ifdef HAVE_CUDA
+   auto addVectorCublas = [&]() {
+      const Real alpha = 1.0;
+      cublasGaxpy( cublasHandle, size,
+                   &alpha,
+                   deviceVector2.getData(), 1,
+                   deviceVector.getData(), 1 );
+   };
+#endif
+   benchmark.setOperation( "vector addition", 3 * datasetSize );
+   benchmark.time( reset1, "CPU", addVectorHost );
+#ifdef HAVE_CUDA
+   benchmark.time( reset1, "GPU", addVectorCuda );
+   benchmark.time( reset1, "cuBLAS", addVectorCublas );
+#endif
+
+
+#ifdef HAVE_CUDA
+   cublasDestroy( cublasHandle );
+#endif
+
+   return true;
+}
+
+} // namespace Benchmarks
+} // namespace TNL
diff --git a/src/Benchmarks/Benchmarks.h b/src/Benchmarks/Benchmarks.h
new file mode 100644
index 0000000000000000000000000000000000000000..60decddc8c04c9422451b1581f8db044655a93ba
--- /dev/null
+++ b/src/Benchmarks/Benchmarks.h
@@ -0,0 +1,456 @@
+/***************************************************************************
+                          benchmarks.h  -  description
+                             -------------------
+    begin                : Dec 30, 2015
+    copyright            : (C) 2015 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// Implemented by: Jakub Klinkovsky
+
+#pragma once
+
+#include <iostream>
+#include <iomanip>
+#include <map>
+#include <vector>
+
+#include <TNL/Timer.h>
+#include <TNL/String.h>
+#include <TNL/Solvers/IterativeSolverMonitor.h>
+
+namespace TNL {
+namespace Benchmarks {
+
+const double oneGB = 1024.0 * 1024.0 * 1024.0;
+
+template< typename ComputeFunction,
+          typename ResetFunction,
+          typename Monitor = TNL::Solvers::IterativeSolverMonitor< double, int > >
+double
+timeFunction( ComputeFunction compute,
+              ResetFunction reset,
+              int loops,
+              Monitor && monitor = Monitor() )
+{
+   // the timer is constructed zero-initialized and stopped
+   Timer timer;
+
+   // set timer to the monitor
+   monitor.setTimer( timer );
+
+   // warm up
+   reset();
+   compute();
+
+   for(int i = 0; i < loops; ++i) {
+      // abuse the monitor's "time" for loops
+      monitor.setTime( i + 1 );
+
+      reset();
+
+      // Explicit synchronization of the CUDA device
+      // TODO: not necessary for host computations
+#ifdef HAVE_CUDA
+      cudaDeviceSynchronize();
+#endif
+      timer.start();
+      compute();
+#ifdef HAVE_CUDA
+      cudaDeviceSynchronize();
+#endif
+      timer.stop();
+   }
+
+   return timer.getRealTime();
+}
+
+
+class Logging
+{
+public:
+   using MetadataElement = std::pair< const char*, String >;
+   using MetadataMap = std::map< const char*, String >;
+   using MetadataColumns = std::vector<MetadataElement>;
+
+   using HeaderElements = std::initializer_list< String >;
+   using RowElements = std::initializer_list< double >;
+
+   Logging( bool verbose = true )
+   : verbose(verbose)
+   {}
+
+   void
+   writeTitle( const String & title )
+   {
+      if( verbose )
+         std::cout << std::endl << "== " << title << " ==" << std::endl << std::endl;
+      log << ": title = " << title << std::endl;
+   }
+
+   void
+   writeMetadata( const MetadataMap & metadata )
+   {
+      if( verbose )
+         std::cout << "properties:" << std::endl;
+
+      for( auto & it : metadata ) {
+         if( verbose )
+            std::cout << "   " << it.first << " = " << it.second << std::endl;
+         log << ": " << it.first << " = " << it.second << std::endl;
+      }
+      if( verbose )
+         std::cout << std::endl;
+   }
+
+   void
+   writeTableHeader( const String & spanningElement,
+                     const HeaderElements & subElements )
+   {
+      using namespace std;
+
+      if( verbose && header_changed ) {
+         for( auto & it : metadataColumns ) {
+            std::cout << std::setw( 20 ) << it.first;
+         }
+
+         // spanning element is printed as usual column to stdout,
+         // but is excluded from header
+         std::cout << std::setw( 15 ) << "";
+
+         for( auto & it : subElements ) {
+            std::cout << std::setw( 15 ) << it;
+         }
+         std::cout << std::endl;
+
+         header_changed = false;
+      }
+
+      // initial indent string
+      header_indent = "!";
+      log << std::endl;
+      for( auto & it : metadataColumns ) {
+         log << header_indent << " " << it.first << std::endl;
+      }
+
+      // dump stacked spanning columns
+      if( horizontalGroups.size() > 0 )
+         while( horizontalGroups.back().second <= 0 ) {
+            horizontalGroups.pop_back();
+            header_indent.pop_back();
+         }
+      for( size_t i = 0; i < horizontalGroups.size(); i++ ) {
+         if( horizontalGroups[ i ].second > 0 ) {
+            log << header_indent << " " << horizontalGroups[ i ].first << std::endl;
+            header_indent += "!";
+         }
+      }
+
+      log << header_indent << " " << spanningElement << std::endl;
+      for( auto & it : subElements ) {
+         log << header_indent << "! " << it << std::endl;
+      }
+
+      if( horizontalGroups.size() > 0 ) {
+         horizontalGroups.back().second--;
+         header_indent.pop_back();
+      }
+   }
+
+   void
+   writeTableRow( const String & spanningElement,
+                  const RowElements & subElements )
+   {
+      using namespace std;
+
+      if( verbose ) {
+         for( auto & it : metadataColumns ) {
+            std::cout << std::setw( 20 ) << it.second;
+         }
+         // spanning element is printed as usual column to stdout
+         std::cout << std::setw( 15 ) << spanningElement;
+         for( auto & it : subElements ) {
+            std::cout << std::setw( 15 );
+            if( it != 0.0 )std::cout << it;
+            else std::cout << "N/A";
+         }
+         std::cout << std::endl;
+      }
+
+      // only when changed (the header has been already adjusted)
+      // print each element on separate line
+      for( auto & it : metadataColumns ) {
+         log << it.second << std::endl;
+      }
+
+      // benchmark data are indented
+      const String indent = "    ";
+      for( auto & it : subElements ) {
+         if( it != 0.0 ) log << indent << it << std::endl;
+         else log << indent << "N/A" << std::endl;
+      }
+   }
+
+   void
+   writeErrorMessage( const char* msg,
+                      int colspan = 1 )
+   {
+      // initial indent string
+      header_indent = "!";
+      log << std::endl;
+      for( auto & it : metadataColumns ) {
+         log << header_indent << " " << it.first << std::endl;
+      }
+
+      // make sure there is a header column for the message
+      if( horizontalGroups.size() == 0 )
+         horizontalGroups.push_back( {"", 1} );
+
+      // dump stacked spanning columns
+      while( horizontalGroups.back().second <= 0 ) {
+         horizontalGroups.pop_back();
+         header_indent.pop_back();
+      }
+      for( size_t i = 0; i < horizontalGroups.size(); i++ ) {
+         if( horizontalGroups[ i ].second > 0 ) {
+            log << header_indent << " " << horizontalGroups[ i ].first << std::endl;
+            header_indent += "!";
+         }
+      }
+      if( horizontalGroups.size() > 0 ) {
+         horizontalGroups.back().second -= colspan;
+         header_indent.pop_back();
+      }
+
+      // only when changed (the header has been already adjusted)
+      // print each element on separate line
+      for( auto & it : metadataColumns ) {
+         log << it.second << std::endl;
+      }
+      log << msg << std::endl;
+   }
+
+   void
+   closeTable()
+   {
+      log << std::endl;
+      header_indent = body_indent = "";
+      header_changed = true;
+      horizontalGroups.clear();
+   }
+
+   bool save( std::ostream & logFile )
+   {
+      closeTable();
+      logFile << log.str();
+      if( logFile.good() ) {
+         log.str() = "";
+         return true;
+      }
+      return false;
+   }
+
+protected:
+
+   // manual double -> String conversion with fixed precision
+   static String
+   _to_string( double num, int precision = 0, bool fixed = false )
+   {
+      std::stringstream str;
+      if( fixed )
+         str << std::fixed;
+      if( precision )
+         str << std::setprecision( precision );
+      str << num;
+      return String( str.str().data() );
+   }
+
+   std::stringstream log;
+   std::string header_indent;
+   std::string body_indent;
+
+   bool verbose;
+   MetadataColumns metadataColumns;
+   bool header_changed = true;
+   std::vector< std::pair< String, int > > horizontalGroups;
+};
+
+
+class Benchmark
+: protected Logging
+{
+public:
+   using Logging::MetadataElement;
+   using Logging::MetadataMap;
+   using Logging::MetadataColumns;
+
+   Benchmark( int loops = 10,
+              bool verbose = true )
+   : Logging(verbose), loops(loops)
+   {}
+
+   // TODO: ensure that this is not called in the middle of the benchmark
+   // (or just remove it completely?)
+   void
+   setLoops( int loops )
+   {
+      this->loops = loops;
+   }
+
+   // Marks the start of a new benchmark
+   void
+   newBenchmark( const String & title )
+   {
+      closeTable();
+      writeTitle( title );
+      monitor.setStage( title.getString() );
+   }
+
+   // Marks the start of a new benchmark (with custom metadata)
+   void
+   newBenchmark( const String & title,
+                 MetadataMap metadata )
+   {
+      closeTable();
+      writeTitle( title );
+      monitor.setStage( title.getString() );
+      // add loops to metadata
+      metadata["loops"] = String(loops);
+      writeMetadata( metadata );
+   }
+
+   // Sets metadata columns -- values used for all subsequent rows until
+   // the next call to this function.
+   void
+   setMetadataColumns( const MetadataColumns & metadata )
+   {
+      if( metadataColumns != metadata )
+         header_changed = true;
+      metadataColumns = metadata;
+   }
+
+   // TODO: maybe should be renamed to createVerticalGroup and ensured that vertical and horizontal groups are not used within the same "Benchmark"
+   // Sets current operation -- operations expand the table vertically
+   //  - baseTime should be reset to 0.0 for most operations, but sometimes
+   //    it is useful to override it
+   //  - Order of operations inside a "Benchmark" does not matter, rows can be
+   //    easily sorted while converting to HTML.)
+   void
+   setOperation( const String & operation,
+                 const double datasetSize = 0.0, // in GB
+                 const double baseTime = 0.0 )
+   {
+      if( metadataColumns.size() > 0 && String(metadataColumns[ 0 ].first) == "operation" ) {
+         metadataColumns[ 0 ].second = operation;
+      }
+      else {
+         metadataColumns.insert( metadataColumns.begin(), {"operation", operation} );
+      }
+      setOperation( datasetSize, baseTime );
+      header_changed = true;
+   }
+
+   void
+   setOperation( const double datasetSize = 0.0,
+                 const double baseTime = 0.0 )
+   {
+      this->datasetSize = datasetSize;
+      this->baseTime = baseTime;
+   }
+
+   // Creates new horizontal groups inside a benchmark -- increases the number
+   // of columns in the "Benchmark", implies column spanning.
+   // (Useful e.g. for SpMV formats, different configurations etc.)
+   void
+   createHorizontalGroup( const String & name,
+                          int subcolumns )
+   {
+      if( horizontalGroups.size() == 0 ) {
+         horizontalGroups.push_back( {name, subcolumns} );
+      }
+      else {
+         auto & last = horizontalGroups.back();
+         if( last.first != name && last.second > 0 ) {
+            horizontalGroups.push_back( {name, subcolumns} );
+         }
+         else {
+            last.first = name;
+            last.second = subcolumns;
+         }
+      }
+   }
+
+   // Times a single ComputeFunction. Subsequent calls implicitly split
+   // the current "horizontal group" into sub-columns identified by
+   // "performer", which are further split into "bandwidth", "time" and
+   // "speedup" columns.
+   // TODO: allow custom columns bound to lambda functions (e.g. for Gflops calculation)
+   // Also terminates the recursion of the following variadic template.
+   template< typename ResetFunction,
+             typename ComputeFunction >
+   double
+   time( ResetFunction reset,
+         const String & performer,
+         ComputeFunction & compute )
+   {
+      double time;
+      if( verbose ) {
+         // run the monitor main loop
+         Solvers::SolverMonitorThread monitor_thread( monitor );
+         time = timeFunction( compute, reset, loops, monitor );
+      }
+      else {
+         time = timeFunction( compute, reset, loops, monitor );
+      }
+
+      const double bandwidth = datasetSize / time;
+      const double speedup = this->baseTime / time;
+      if( this->baseTime == 0.0 )
+         this->baseTime = time;
+
+      writeTableHeader( performer, HeaderElements({"bandwidth", "time", "speedup"}) );
+      writeTableRow( performer, RowElements({ bandwidth, time, speedup }) );
+
+      return this->baseTime;
+   }
+
+   // Recursive template function to deal with multiple computations with the
+   // same reset function.
+   template< typename ResetFunction,
+             typename ComputeFunction,
+             typename... NextComputations >
+   inline double
+   time( ResetFunction reset,
+         const String & performer,
+         ComputeFunction & compute,
+         NextComputations & ... nextComputations )
+   {
+      time( reset, performer, compute );
+      time( reset, nextComputations... );
+      return this->baseTime;
+   }
+
+   // Adds an error message to the log. Should be called in places where the
+   // "time" method could not be called (e.g. due to failed allocation).
+   void
+   addErrorMessage( const char* msg,
+                    int numberOfComputations = 1 )
+   {
+      // each computation has 3 subcolumns
+      const int colspan = 3 * numberOfComputations;
+      writeErrorMessage( msg, colspan );
+   }
+
+   using Logging::save;
+
+protected:
+   int loops;
+   double datasetSize = 0.0;
+   double baseTime = 0.0;
+   Solvers::IterativeSolverMonitor< double, int > monitor;
+};
+
+} // namespace Benchmarks
+} // namespace TNL
diff --git a/src/Benchmarks/CMakeLists.txt b/src/Benchmarks/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e34ade5be305bdee495749a38db1b9af004a6a92
--- /dev/null
+++ b/src/Benchmarks/CMakeLists.txt
@@ -0,0 +1,10 @@
+add_subdirectory( HeatEquation )
+add_subdirectory( BLAS )
+add_subdirectory( SpMV )
+add_subdirectory( LinearSolvers )
+
+set( headers
+         Benchmarks.h
+)
+
+install( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Benchmarks )
diff --git a/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace.h b/src/Benchmarks/HeatEquation/BenchmarkLaplace.h
similarity index 91%
rename from tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace.h
rename to src/Benchmarks/HeatEquation/BenchmarkLaplace.h
index 51d957f1bf421ecbce3bb8606e6d2b8f0172410a..0a5494e2b157bbfa6ae5164b579d17e1f6aab43b 100644
--- a/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace.h
+++ b/src/Benchmarks/HeatEquation/BenchmarkLaplace.h
@@ -88,6 +88,21 @@ class BenchmarkLaplace< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, Real, Ind
       Real operator()( const MeshFunction& u,
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const;
+      
+      template< typename MeshEntity >
+      __cuda_callable__
+      Real operator()( const RealType* u,
+                       const MeshEntity& entity,
+                       const RealType& time = 0.0 ) const;
+      
+
+      //template< typename MeshFunction >//, typename MeshEntity >
+      __cuda_callable__
+      Real operator()( const MeshType& mesh,
+                       const RealType* u,
+                       const IndexType& entityIndex,
+                       const typename MeshType::CoordinatesType coordinates,
+                       const RealType& time = 0.0 ) const;      
 
       template< typename MeshEntity >
       __cuda_callable__
diff --git a/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace_impl.h b/src/Benchmarks/HeatEquation/BenchmarkLaplace_impl.h
similarity index 82%
rename from tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace_impl.h
rename to src/Benchmarks/HeatEquation/BenchmarkLaplace_impl.h
index 4e260b0f711851d114f29122e8c02ba299ffcb72..34a2e245aad4fcc17c662cf9b72b009df9a7dcfb 100644
--- a/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace_impl.h
+++ b/src/Benchmarks/HeatEquation/BenchmarkLaplace_impl.h
@@ -161,6 +161,68 @@ operator()( const MeshFunction& u,
           ( u[ c - xSize ] - 2.0 * u[ c ] + u[ c + xSize ] ) * hySquareInverse;
 }
 
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+template< typename MeshEntity >
+__cuda_callable__
+Real
+BenchmarkLaplace< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index >::
+operator()( const RealType* u,
+            const MeshEntity& entity,
+            const Real& time ) const
+{
+   /****
+    * Implement your explicit form of the differential operator here.
+    * The following example is the Laplace operator approximated 
+    * by the Finite difference method.
+    */
+   /*static_assert( MeshEntity::getEntityDimension() == 2, "Wrong mesh entity dimension." ); 
+   static_assert( MeshFunction::getEntitiesDimension() == 2, "Wrong preimage function" ); 
+   const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities(); 
+
+   const RealType& hxSquareInverse = entity.getMesh().template getSpaceStepsProducts< -2, 0 >(); 
+   const RealType& hySquareInverse = entity.getMesh().template getSpaceStepsProducts< 0, -2 >(); 
+   const IndexType& center = entity.getIndex(); 
+   const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0 >(); 
+   const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0 >(); 
+   const IndexType& north = neighborEntities.template getEntityIndex<  0,  1 >(); 
+   const IndexType& south = neighborEntities.template getEntityIndex<  0, -1 >(); */
+
+   const IndexType& xSize = entity.getMesh().getDimensions().x();
+   const IndexType& c = entity.getIndex();
+   const RealType& hxSquareInverse = entity.getMesh().template getSpaceStepsProducts< -2, 0 >(); 
+   const RealType& hySquareInverse = entity.getMesh().template getSpaceStepsProducts< 0, -2 >(); 
+   return ( u[ c - 1 ] - 2.0 * u[ c ] + u[ c + 1 ]  ) * hxSquareInverse +
+          ( u[ c - xSize ] - 2.0 * u[ c ] + u[ c + xSize ] ) * hySquareInverse;
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+   //template< typename MeshFunction > //, typename MeshEntity >
+__cuda_callable__
+Real
+BenchmarkLaplace< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index >::
+operator()( const MeshType& mesh,
+            const Real* u,
+            const IndexType& entityIndex,
+            const typename MeshType::CoordinatesType coordinates,
+            const RealType& time ) const
+{
+   //const MeshType& mesh = u.template getMesh< Device >();
+   const IndexType& xSize = mesh.getDimensions().x();
+   const IndexType& c = entityIndex;
+   const RealType& hxSquareInverse = mesh.template getSpaceStepsProducts< -2, 0 >(); 
+   const RealType& hySquareInverse = mesh.template getSpaceStepsProducts< 0, -2 >(); 
+   return ( u[ c - 1 ] - 2.0 * u[ c ] + u[ c + 1 ]  ) * hxSquareInverse +
+          ( u[ c - xSize ] - 2.0 * u[ c ] + u[ c + xSize ] ) * hySquareInverse;
+}
+
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
diff --git a/tests/benchmarks/heat-equation-benchmark/CMakeLists.txt b/src/Benchmarks/HeatEquation/CMakeLists.txt
similarity index 100%
rename from tests/benchmarks/heat-equation-benchmark/CMakeLists.txt
rename to src/Benchmarks/HeatEquation/CMakeLists.txt
diff --git a/src/Benchmarks/HeatEquation/DirichletBoundaryConditions.h b/src/Benchmarks/HeatEquation/DirichletBoundaryConditions.h
new file mode 100644
index 0000000000000000000000000000000000000000..32c36343f2d5f06fd95f6d008aabd9d299884599
--- /dev/null
+++ b/src/Benchmarks/HeatEquation/DirichletBoundaryConditions.h
@@ -0,0 +1,142 @@
+/***************************************************************************
+                          DirichletBoundaryConditions.h  -  description
+                             -------------------
+    begin                : Nov 17, 2014
+    copyright            : (C) 2014 by oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Operators/Operator.h>
+#include <TNL/Functions/Analytic/Constant.h>
+#include <TNL/Functions/FunctionAdapter.h>
+#include <TNL/Functions/MeshFunction.h>
+
+namespace TNL {
+
+template< typename Mesh,
+          typename Function = Functions::Analytic::Constant< Mesh::getMeshDimension(), typename Mesh::RealType >,
+          int MeshEntitiesDimension = Mesh::getMeshDimension(),
+          typename Real = typename Mesh::RealType,
+          typename Index = typename Mesh::GlobalIndexType >
+class DirichletBoundaryConditions
+: public Operators::Operator< Mesh,
+                              Functions::MeshBoundaryDomain,
+                              MeshEntitiesDimension,
+                              MeshEntitiesDimension,
+                              Real,
+                              Index >
+{
+   public:
+
+      typedef Mesh MeshType;
+      typedef Function FunctionType;
+      typedef Real RealType;
+      typedef typename MeshType::DeviceType DeviceType;
+      typedef Index IndexType;
+      
+      typedef Pointers::SharedPointer<  Mesh > MeshPointer;
+      typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
+      typedef typename MeshType::PointType PointType;
+
+      static constexpr int getMeshDimension() { return MeshType::getMeshDimension(); }
+
+      static void configSetup( Config::ConfigDescription& config,
+                               const String& prefix = "" )
+      {
+         Function::configSetup( config, prefix );
+      }
+ 
+      bool setup( const MeshPointer& meshPointer,
+                  const Config::ParameterContainer& parameters,
+                  const String& prefix = "" )
+      {
+         return Functions::FunctionAdapter< MeshType, FunctionType >::template setup< MeshPointer >( this->function, meshPointer, parameters, prefix );
+      }
+
+      void setFunction( const Function& function )
+      {
+         this->function = function;
+      }
+
+      Function& getFunction()
+      {
+         return this->function;
+      }
+ 
+      const Function& getFunction() const
+      {
+         return this->function;
+      }
+
+      template< typename EntityType,
+                typename MeshFunction >
+      __cuda_callable__
+      const RealType operator()( const MeshFunction& u,
+                                 const EntityType& entity,
+                                 const RealType& time = 0 ) const
+      {
+         //static_assert( EntityType::getDimension() == MeshEntitiesDimension, "Wrong mesh entity dimension." );
+         return Functions::FunctionAdapter< MeshType, Function >::template getValue( this->function, entity, time );
+      }
+      
+      __cuda_callable__
+      Real operator()( const MeshType& mesh,
+                       const RealType* u,
+                       const IndexType& entityIndex,
+                       const typename MeshType::CoordinatesType coordinates,
+                       const RealType& time = 0.0 ) const      
+      {
+         //static_assert( EntityType::getDimension() == MeshEntitiesDimension, "Wrong mesh entity dimension." );
+         return 0.0; //Functions::FunctionAdapter< MeshType, Function >::template getValue( this->function, entity, time );
+      }
+      
+
+      template< typename EntityType >
+      __cuda_callable__
+      IndexType getLinearSystemRowLength( const MeshType& mesh,
+                                          const IndexType& index,
+                                          const EntityType& entity ) const
+      {
+         return 1;
+      }
+
+      template< typename PreimageFunction,
+                typename MeshEntity,
+                typename Matrix,
+                typename Vector >
+      __cuda_callable__
+      void setMatrixElements( const PreimageFunction& u,
+                              const MeshEntity& entity,
+                              const RealType& time,
+                              const RealType& tau,
+                              Matrix& matrix,
+                              Vector& b ) const
+      {
+         typename Matrix::MatrixRow matrixRow = matrix.getRow( entity.getIndex() );
+         const IndexType& index = entity.getIndex();
+         matrixRow.setElement( 0, index, 1.0 );
+         b[ index ] = Functions::FunctionAdapter< MeshType, Function >::getValue( this->function, entity, time );
+      }
+ 
+
+   protected:
+
+      Function function;
+ 
+   //static_assert( Device::DeviceType == Function::Device::DeviceType );
+};
+
+
+template< typename Mesh,
+          typename Function >
+std::ostream& operator << ( std::ostream& str, const DirichletBoundaryConditions< Mesh, Function >& bc )
+{
+   str << "Dirichlet boundary conditions: vector = " << bc.getVector();
+   return str;
+}
+
+} // namespace TNL
diff --git a/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkBuildConfigTag.h b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkBuildConfigTag.h
similarity index 100%
rename from tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkBuildConfigTag.h
rename to src/Benchmarks/HeatEquation/HeatEquationBenchmarkBuildConfigTag.h
diff --git a/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem.h b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem.h
similarity index 78%
rename from tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem.h
rename to src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem.h
index b80597e0421219fe298fd8d888c96c14ec28d92f..998be646d6a296c272f69560ebe75ec507e98dd2 100644
--- a/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem.h
+++ b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem.h
@@ -4,6 +4,7 @@
 #include <TNL/Problems/PDEProblem.h>
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Solvers/PDE/ExplicitUpdater.h>
+#include "Tuning/ExplicitUpdater.h"
 
 using namespace TNL;
 using namespace TNL::Problems;
@@ -11,8 +12,8 @@ using namespace TNL::Problems;
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
-          typename Communicator,
-          typename DifferentialOperator >
+          typename DifferentialOperator,
+          typename Communicator >
 class HeatEquationBenchmarkProblem:
    public PDEProblem< Mesh,
                       Communicator,
@@ -26,11 +27,11 @@ class HeatEquationBenchmarkProblem:
       typedef typename Mesh::DeviceType DeviceType;
       typedef typename DifferentialOperator::IndexType IndexType;
       typedef Functions::MeshFunction< Mesh > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
       typedef PDEProblem< Mesh, Communicator, RealType, DeviceType, IndexType > BaseType;
-      typedef SharedPointer< DifferentialOperator > DifferentialOperatorPointer;
-      typedef SharedPointer< BoundaryCondition > BoundaryConditionPointer;
-      typedef SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
+      typedef Pointers::SharedPointer< DifferentialOperator > DifferentialOperatorPointer;
+      typedef Pointers::SharedPointer< BoundaryCondition > BoundaryConditionPointer;
+      typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
       
       typedef Communicator CommunicatorType;
 
@@ -68,6 +69,9 @@ class HeatEquationBenchmarkProblem:
                               const RealType& tau,
                               DofVectorPointer& _uPointer,
                               DofVectorPointer& _fuPointer );
+      
+      void applyBoundaryConditions( const RealType& time,
+                                       DofVectorPointer& dofs );        
 
       template< typename MatrixPointer >
       void assemblyLinearSystem( const RealType& time,
@@ -93,7 +97,8 @@ class HeatEquationBenchmarkProblem:
       RightHandSide* cudaRightHandSide;
       DifferentialOperator* cudaDifferentialOperator;
       
-      Solvers::PDE::ExplicitUpdater< Mesh, MeshFunctionType, DifferentialOperator, BoundaryCondition, RightHandSide > explicitUpdater;
+      TNL::ExplicitUpdater< Mesh, MeshFunctionType, DifferentialOperator, BoundaryCondition, RightHandSide > tuningExplicitUpdater;
+      TNL::Solvers::PDE::ExplicitUpdater< Mesh, MeshFunctionType, DifferentialOperator, BoundaryCondition, RightHandSide > explicitUpdater;
       
 };
 
diff --git a/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem_impl.h b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h
similarity index 65%
rename from tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem_impl.h
rename to src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h
index 0aa2094f650c8e72812d900b801bce0fed35aaca..d4587e75f0f851f555a71cd17649e8ec69e70f76 100644
--- a/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem_impl.h
+++ b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h
@@ -5,15 +5,24 @@
 #include <TNL/Matrices/MatrixSetter.h>
 #include <TNL/Solvers/PDE/LinearSystemAssembler.h>
 #include <TNL/Solvers/PDE/BackwardTimeDiscretisation.h>
+#include <TNL/Solvers/PDE/ExplicitUpdater.h>
 #include "TestGridEntity.h"
+#include "Tuning/tunning.h"
+#include "Tuning/SimpleCell.h"
+#include "Tuning/GridTraverser.h"
+
+//#define WITH_TNL  // In the 'tunning' part, this serves for comparison of performance 
+                  // when using common TNL structures compared to the benchmark ones
+
+
 
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
-          typename Communicator,
-          typename DifferentialOperator >
+          typename DifferentialOperator,
+          typename Communicator >
 String
-HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
+HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator, Communicator >::
 getType()
 {
    return String( "HeatEquationBenchmarkProblem< " ) + Mesh :: getType() + " >";
@@ -22,9 +31,9 @@ getType()
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
-          typename Communicator,
-          typename DifferentialOperator >
-HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
+          typename DifferentialOperator,
+          typename Communicator >
+HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator, Communicator >::
 HeatEquationBenchmarkProblem()
 : cudaMesh( 0 ),
   cudaBoundaryConditions( 0 ),
@@ -36,22 +45,29 @@ HeatEquationBenchmarkProblem()
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
-          typename Communicator,
-          typename DifferentialOperator >
+          typename DifferentialOperator,
+          typename Communicator >
 String
-HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
+HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator, Communicator >::
 getPrologHeader() const
 {
-   return String( "Heat Equation Benchmark" );
+   if( this->cudaKernelType == "pure-c" )
+      return String( "Heat Equation Benchmark PURE-C test" );
+   if( this->cudaKernelType == "templated" )
+      return String( "Heat Equation Benchmark TEMPLATED test" );
+   if( this->cudaKernelType == "templated-compact" )
+      return String( "Heat Equation Benchmark TEMPLATED COMPACT test" );
+   if( this->cudaKernelType == "tunning" )
+      return String( "Heat Equation Benchmark TUNNIG test" );            
 }
 
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
-          typename Communicator,
-          typename DifferentialOperator >
+          typename DifferentialOperator,
+          typename Communicator >
 void
-HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
+HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator, Communicator >::
 writeProlog( Logger& logger, const Config::ParameterContainer& parameters ) const
 {
    /****
@@ -63,10 +79,10 @@ writeProlog( Logger& logger, const Config::ParameterContainer& parameters ) cons
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
-          typename Communicator,
-          typename DifferentialOperator >
+          typename DifferentialOperator,
+          typename Communicator >
 bool
-HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
+HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator, Communicator >::
 setup( const Config::ParameterContainer& parameters,
        const String& prefix )
 {
@@ -81,17 +97,19 @@ setup( const Config::ParameterContainer& parameters,
       this->cudaRightHandSide = Devices::Cuda::passToDevice( *this->rightHandSidePointer );
       this->cudaDifferentialOperator = Devices::Cuda::passToDevice( *this->differentialOperatorPointer );
    }
-   
+   this->explicitUpdater.setDifferentialOperator( this->differentialOperatorPointer );
+   this->explicitUpdater.setBoundaryConditions( this->boundaryConditionPointer );
+   this->explicitUpdater.setRightHandSide( this->rightHandSidePointer );   
    return true;
 }
 
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
-          typename Communicator,
-          typename DifferentialOperator >
-typename HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::IndexType
-HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
+          typename DifferentialOperator,
+          typename Communicator >
+typename HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator, Communicator >::IndexType
+HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator, Communicator >::
 getDofs() const
 {
    /****
@@ -104,21 +122,22 @@ getDofs() const
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
-          typename Communicator,
-          typename DifferentialOperator >
+          typename DifferentialOperator,
+          typename Communicator >
 void
-HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
+HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator, Communicator >::
 bindDofs( DofVectorPointer& dofsPointer )
 {
+   this->u->bind( this->getMesh(), dofsPointer );
 }
 
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
-          typename Communicator,
-          typename DifferentialOperator >
+          typename DifferentialOperator,
+          typename Communicator >
 bool
-HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
+HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator, Communicator >::
 setInitialCondition( const Config::ParameterContainer& parameters,
                      DofVectorPointer& dofsPointer )
 {
@@ -135,11 +154,11 @@ setInitialCondition( const Config::ParameterContainer& parameters,
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
-          typename Communicator,
-          typename DifferentialOperator >
+          typename DifferentialOperator,
+          typename Communicator >
    template< typename Matrix >
 bool
-HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
+HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator, Communicator >::
 setupLinearSystem( Matrix& matrix )
 {
    const IndexType dofs = this->getDofs();
@@ -161,10 +180,10 @@ setupLinearSystem( Matrix& matrix )
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
-          typename Communicator,
-          typename DifferentialOperator >
+          typename DifferentialOperator,
+          typename Communicator >
 bool
-HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
+HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator, Communicator >::
 makeSnapshot( const RealType& time,
               const IndexType& step,
               DofVectorPointer& dofsPointer )
@@ -327,7 +346,6 @@ template< typename GridType,
           typename GridEntity,
           typename DifferentialOperator,
           typename RightHandSide,
-          typename Communicator,
           typename MeshFunction >
 __global__ void 
 heatEquationTemplatedCompact( const GridType* grid,
@@ -376,10 +394,10 @@ heatEquationTemplatedCompact( const GridType* grid,
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
-          typename Communicator,
-          typename DifferentialOperator >
+          typename DifferentialOperator,
+          typename Communicator >
 void
-HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
+HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator, Communicator >::
 getExplicitUpdate( const RealType& time,
                    const RealType& tau,
                    DofVectorPointer& uDofs,
@@ -442,17 +460,7 @@ getExplicitUpdate( const RealType& time,
                             gridYSize / 16 + ( gridYSize % 16 != 0 ) );
 
          int cudaErr;
-         boundaryConditionsKernel<<< cudaGridSize, cudaBlockSize >>>( uDofs->getData(), fuDofs->getData(), gridXSize, gridYSize );
-         if( ( cudaErr = cudaGetLastError() ) != cudaSuccess )
-         {
-            std::cerr << "Setting of boundary conditions failed. " << cudaErr << std::endl;
-            return;
-         }
 
-         /****
-          * Laplace operator
-          */
-         //cout << "Laplace operator ... " <<std::endl;
          heatEquationKernel<<< cudaGridSize, cudaBlockSize >>>
             ( uDofs->getData(), fuDofs->getData(), tau, hx_inv, hy_inv, gridXSize, gridYSize );
          if( cudaGetLastError() != cudaSuccess )
@@ -460,6 +468,14 @@ getExplicitUpdate( const RealType& time,
             std::cerr << "Laplace operator failed." << std::endl;
             return;
          }
+         
+         boundaryConditionsKernel<<< cudaGridSize, cudaBlockSize >>>( uDofs->getData(), fuDofs->getData(), gridXSize, gridYSize );
+         if( ( cudaErr = cudaGetLastError() ) != cudaSuccess )
+         {
+            std::cerr << "Setting of boundary conditions failed. " << cudaErr << std::endl;
+            return;
+         }
+         
       }
       if( this->cudaKernelType == "templated-compact" )
       {
@@ -520,7 +536,6 @@ getExplicitUpdate( const RealType& time,
          cudaThreadSynchronize();         
          TNL_CHECK_CUDA_DEVICE;
       }
-      #endif
       if( this->cudaKernelType == "templated" )
       {
          //if( !this->cudaMesh )
@@ -528,23 +543,198 @@ getExplicitUpdate( const RealType& time,
          this->u->bind( mesh, uDofs );
          this->fu->bind( mesh, fuDofs );         
          //explicitUpdater.setGPUTransferTimer( this->gpuTransferTimer ); 
-         explicitUpdater.setDifferentialOperator( this->differentialOperatorPointer );
-         explicitUpdater.setBoundaryConditions( this->boundaryConditionPointer );
-         explicitUpdater.setRightHandSide( this->rightHandSidePointer );
-         
          this->explicitUpdater.template update< typename Mesh::Cell, CommunicatorType >( time, tau, mesh, this->u, this->fu );
       }
+      if( this->cudaKernelType == "tunning" )
+      {
+         if( std::is_same< DeviceType, Devices::Cuda >::value )
+         {   
+            this->u->bind( mesh, uDofs );
+            this->fu->bind( mesh, fuDofs );                     
+            
+            
+            /*this->explicitUpdater.template update< typename Mesh::Cell >( time, tau, mesh, this->u, this->fu );
+            return;*/
+            
+#ifdef WITH_TNL
+            using ExplicitUpdaterType = TNL::Solvers::PDE::ExplicitUpdater< Mesh, MeshFunctionType, DifferentialOperator, BoundaryCondition, RightHandSide >;
+            using Cell = typename MeshType::Cell;
+            using MeshTraverserType = Meshes::Traverser< MeshType, Cell >;
+            using UserData = TNL::Solvers::PDE::ExplicitUpdaterTraverserUserData< RealType,
+               MeshFunctionType,
+               DifferentialOperator,
+               BoundaryCondition,
+               RightHandSide >;
+            
+#else
+            //using CellConfig = Meshes::GridEntityNoStencilStorage;
+            using CellConfig = Meshes::GridEntityCrossStencilStorage< 1 >;
+            using ExplicitUpdaterType = ExplicitUpdater< Mesh, MeshFunctionType, DifferentialOperator, BoundaryCondition, RightHandSide >;
+            using Cell = typename MeshType::Cell; 
+            //using Cell = SimpleCell< Mesh, CellConfig >;
+            using MeshTraverserType = Traverser< MeshType, Cell >;
+            using UserData = ExplicitUpdaterTraverserUserData< RealType,
+               MeshFunctionType,
+               DifferentialOperator,
+               BoundaryCondition,
+               RightHandSide >;
+#endif            
+
+            using InteriorEntitiesProcessor = typename ExplicitUpdaterType::TraverserInteriorEntitiesProcessor;
+            using BoundaryEntitiesProcessor = typename ExplicitUpdaterType::TraverserBoundaryEntitiesProcessor;
+            
+            UserData userData;
+            userData.time = time;
+            userData.differentialOperator = &this->differentialOperatorPointer.template getData< Devices::Cuda >();
+            userData.boundaryConditions = &this->boundaryConditionPointer.template getData< Devices::Cuda >();
+            userData.rightHandSide = &this->rightHandSidePointer.template getData< Devices::Cuda >();
+            userData.u = &this->u.template modifyData< Devices::Cuda >(); //uDofs->getData();
+            userData.fu = &this->fu.template modifyData< Devices::Cuda >(); //fuDofs->getData();
+#ifndef WITH_TNL
+            userData.real_u = uDofs->getData();
+            userData.real_fu = fuDofs->getData();
+#endif                        
+            const IndexType gridXSize = mesh->getDimensions().x();
+            const IndexType gridYSize = mesh->getDimensions().y();
+            dim3 cudaBlockSize( 16, 16 );
+            dim3 cudaGridSize( gridXSize / 16 + ( gridXSize % 16 != 0 ),
+                               gridYSize / 16 + ( gridYSize % 16 != 0 ) );
+            
+            TNL::Devices::Cuda::synchronizeDevice();
+            int cudaErr;
+            Meshes::Traverser< MeshType, Cell > meshTraverser;
+            meshTraverser.template processInteriorEntities< UserData,
+                                                      InteriorEntitiesProcessor >
+                                                          ( mesh,
+                                                            userData );
+             // */
+            /*_heatEquationKernel< InteriorEntitiesProcessor, UserData, MeshType, RealType, IndexType >
+            <<< cudaGridSize, cudaBlockSize >>>
+               ( &mesh.template getData< Devices::Cuda >(),
+                userData );
+                //&userDataPtr.template modifyData< Devices::Cuda >() );*/
+            if( cudaGetLastError() != cudaSuccess )
+            {
+               std::cerr << "Laplace operator failed." << std::endl;
+               return;
+            }
+            
+            meshTraverser.template processBoundaryEntities< UserData,
+                                                      BoundaryEntitiesProcessor >
+                                                          ( mesh,
+                                                            userData );
+            // */
+           /*_boundaryConditionsKernel< BoundaryEntitiesProcessor, UserData, MeshType, RealType, IndexType >
+            <<< cudaGridSize, cudaBlockSize >>>
+               ( &mesh.template getData< Devices::Cuda >(),
+                userData );
+                //&userDataPtr.template modifyData< Devices::Cuda >() );
+            // */ 
+            if( ( cudaErr = cudaGetLastError() ) != cudaSuccess )
+            {
+               std::cerr << "Setting of boundary conditions failed. " << cudaErr << std::endl;
+               return;
+            }
+
+            
+            
+         }
+      }      
+      #endif
    }
 }
 
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
-          typename Communicator,
-          typename DifferentialOperator >
+          typename DifferentialOperator,
+          typename Communicator >
+void 
+HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator, Communicator >::
+applyBoundaryConditions( const RealType& time,
+                            DofVectorPointer& uDofs )
+{
+   const MeshPointer& mesh = this->getMesh();
+   if( this->cudaKernelType == "templated" )
+   {
+      this->bindDofs( uDofs );
+      this->explicitUpdater.template applyBoundaryConditions< typename Mesh::Cell >( this->getMesh(), time, this->u );
+   }
+   if( this->cudaKernelType == "tunning" )
+   {
+      /*
+      return;
+      this->bindDofs( uDofs );
+      this->explicitUpdater.template applyBoundaryConditions< typename Mesh::Cell >( this->getMesh(), time, this->u );
+      return;*/
+      
+#ifdef HAVE_CUDA
+/*
+#ifdef WITH_TNL
+      using ExplicitUpdaterType = TNL::Solvers::PDE::ExplicitUpdater< Mesh, MeshFunctionType, DifferentialOperator, BoundaryCondition, RightHandSide >;
+      using Cell = typename MeshType::Cell;
+      using MeshTraverserType = Meshes::Traverser< MeshType, Cell >;
+      using UserData = TNL::Solvers::PDE::ExplicitUpdaterTraverserUserData< RealType,
+         MeshFunctionType,
+         DifferentialOperator,
+         BoundaryCondition,
+         RightHandSide >;
+            
+#else
+      //using CellConfig = Meshes::GridEntityNoStencilStorage;
+      using CellConfig = Meshes::GridEntityCrossStencilStorage< 1 >;
+      using ExplicitUpdaterType = ExplicitUpdater< Mesh, MeshFunctionType, DifferentialOperator, BoundaryCondition, RightHandSide >;
+      //using Cell = typename MeshType::Cell; 
+      using Cell = SimpleCell< Mesh, CellConfig >;
+      using MeshTraverserType = Traverser< MeshType, Cell >;
+      using UserData = ExplicitUpdaterTraverserUserData< RealType,
+         MeshFunctionType,
+         DifferentialOperator,
+         BoundaryCondition,
+         RightHandSide >;
+#endif            
+         using InteriorEntitiesProcessor = typename ExplicitUpdaterType::TraverserInteriorEntitiesProcessor;
+         using BoundaryEntitiesProcessor = typename ExplicitUpdaterType::TraverserBoundaryEntitiesProcessor;
+
+         UserData userData;
+         userData.time = time;
+         userData.differentialOperator = &this->differentialOperatorPointer.template getData< Devices::Cuda >();
+         userData.rightHandSide = &this->rightHandSidePointer.template getData< Devices::Cuda >();
+         userData.u = &this->u.template modifyData< Devices::Cuda >(); //uDofs->getData();
+#ifndef WITH_TNL
+         userData.real_u = uDofs->getData();
+#endif
+      userData.boundaryConditions = &this->boundaryConditionPointer.template getData< Devices::Cuda >();
+      Meshes::Traverser< MeshType, Cell > meshTraverser;
+      /*meshTraverser.template processBoundaryEntities< UserData,
+                                                BoundaryEntitiesProcessor >
+                                                    ( mesh,
+                                                      userData );*/
+      // */
+      /*_boundaryConditionsKernel< BoundaryEntitiesProcessor, UserData, MeshType, RealType, IndexType >
+      <<< cudaGridSize, cudaBlockSize >>>
+         ( &mesh.template getData< Devices::Cuda >(),
+          userData );
+          //&userDataPtr.template modifyData< Devices::Cuda >() );
+      // */ 
+      int cudaErr;
+      if( ( cudaErr = ::cudaGetLastError() ) != cudaSuccess )
+      {
+         std::cerr << "Setting of boundary conditions failed. " << cudaErr << std::endl;
+         return;
+      }
+#endif
+   }
+}
+
+template< typename Mesh,
+          typename BoundaryCondition,
+          typename RightHandSide,
+          typename DifferentialOperator,
+          typename Communicator >
    template< typename MatrixPointer >
 void
-HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
+HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator, Communicator >::
 assemblyLinearSystem( const RealType& time,
                       const RealType& tau,
                       DofVectorPointer& _u,
@@ -561,7 +751,7 @@ assemblyLinearSystem( const RealType& time,
                              typename DofVectorPointer::ObjectType > systemAssembler;
 
    typedef Functions::MeshFunction< Mesh > MeshFunctionType;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
    MeshFunctionPointer u( this->getMesh(), *_u );
    systemAssembler.setDifferentialOperator( this->differentialOperator );
    systemAssembler.setBoundaryConditions( this->boundaryCondition );
@@ -572,9 +762,9 @@ assemblyLinearSystem( const RealType& time,
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
-          typename Communicator,
-          typename DifferentialOperator >
-HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
+          typename DifferentialOperator,
+          typename Communicator >
+HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator, Communicator >::
 ~HeatEquationBenchmarkProblem()
 {
    if( this->cudaMesh ) Devices::Cuda::freeFromDevice( this->cudaMesh );
diff --git a/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkRhs.h b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkRhs.h
similarity index 100%
rename from tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkRhs.h
rename to src/Benchmarks/HeatEquation/HeatEquationBenchmarkRhs.h
diff --git a/tests/benchmarks/heat-equation-benchmark/TestGridEntity.h b/src/Benchmarks/HeatEquation/TestGridEntity.h
similarity index 100%
rename from tests/benchmarks/heat-equation-benchmark/TestGridEntity.h
rename to src/Benchmarks/HeatEquation/TestGridEntity.h
diff --git a/src/Benchmarks/HeatEquation/Tuning/ExplicitUpdater.h b/src/Benchmarks/HeatEquation/Tuning/ExplicitUpdater.h
new file mode 100644
index 0000000000000000000000000000000000000000..62f0c55c903a83eb5749d73a6e36ea59cdeef8a9
--- /dev/null
+++ b/src/Benchmarks/HeatEquation/Tuning/ExplicitUpdater.h
@@ -0,0 +1,215 @@
+/***************************************************************************
+                          ExplicitUpdater.h  -  description
+                             -------------------
+    begin                : Jul 29, 2014
+    copyright            : (C) 2014 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Functions/FunctionAdapter.h>
+#include <TNL/Timer.h>
+#include <TNL/Pointers/SharedPointer.h>
+#include <type_traits>
+#include "Traverser_Grid2D.h"
+
+namespace TNL {
+
+template< typename Real,
+          typename MeshFunction,
+          typename DifferentialOperator,
+          typename BoundaryConditions,
+          typename RightHandSide >
+class ExplicitUpdaterTraverserUserData
+{
+   public:
+      
+      using RealType = Real;
+      using MeshFunctionType = MeshFunction;
+      using DifferentialOperatorType = DifferentialOperator;
+      using BoundaryConditionsType = BoundaryConditions;
+      using RightHandSideType = RightHandSide;
+      
+      Real time;
+
+      const DifferentialOperator* differentialOperator;
+
+      const BoundaryConditions* boundaryConditions;
+
+      const RightHandSide* rightHandSide;
+
+      MeshFunction *u, *fu;
+      
+      Real *real_u, *real_fu;
+      
+      ExplicitUpdaterTraverserUserData()
+      : time( 0.0 ),
+        differentialOperator( NULL ),
+        boundaryConditions( NULL ),
+        rightHandSide( NULL ),
+        u( NULL ),
+        fu( NULL )
+      {}
+};
+
+
+template< typename Mesh,
+          typename MeshFunction,
+          typename DifferentialOperator,
+          typename BoundaryConditions,
+          typename RightHandSide >
+class ExplicitUpdater
+{
+   public:
+      typedef Mesh MeshType;
+      typedef Pointers::SharedPointer<  MeshType > MeshPointer;
+      typedef typename MeshFunction::RealType RealType;
+      typedef typename MeshFunction::DeviceType DeviceType;
+      typedef typename MeshFunction::IndexType IndexType;
+      typedef ExplicitUpdaterTraverserUserData< RealType,
+                                                MeshFunction,
+                                                DifferentialOperator,
+                                                BoundaryConditions,
+                                                RightHandSide > TraverserUserData;
+      typedef Pointers::SharedPointer<  DifferentialOperator, DeviceType > DifferentialOperatorPointer;
+      typedef Pointers::SharedPointer<  BoundaryConditions, DeviceType > BoundaryConditionsPointer;
+      typedef Pointers::SharedPointer<  RightHandSide, DeviceType > RightHandSidePointer;
+      typedef Pointers::SharedPointer<  MeshFunction, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer<  TraverserUserData, DeviceType > TraverserUserDataPointer;
+      
+      void setDifferentialOperator( const DifferentialOperatorPointer& differentialOperatorPointer )
+      {
+         this->userDataPointer->differentialOperator = &differentialOperatorPointer.template getData< DeviceType >();
+      }
+      
+      void setBoundaryConditions( const BoundaryConditionsPointer& boundaryConditionsPointer )
+      {
+         this->userDataPointer->boundaryConditions = &boundaryConditionsPointer.template getData< DeviceType >();
+      }
+      
+      void setRightHandSide( const RightHandSidePointer& rightHandSidePointer )
+      {
+         this->userDataPointer->rightHandSide = &rightHandSidePointer.template getData< DeviceType >();
+      }
+            
+      template< typename EntityType >
+      void update( const RealType& time,
+                   const RealType& tau,
+                   const MeshPointer& meshPointer,
+                   MeshFunctionPointer& uPointer,
+                   MeshFunctionPointer& fuPointer )
+      {
+         static_assert( std::is_same< MeshFunction,
+                                      Containers::Vector< typename MeshFunction::RealType,
+                                                 typename MeshFunction::DeviceType,
+                                                 typename MeshFunction::IndexType > >::value != true,
+            "Error: I am getting Vector instead of MeshFunction or similar object. You might forget to bind DofVector into MeshFunction in you method getExplicitUpdate."  );
+            
+         TNL_ASSERT_TRUE( this->userDataPointer->differentialOperator,
+                          "The differential operator is not correctly set-up. Use method setDifferentialOperator() to do it." );
+         TNL_ASSERT_TRUE( this->userDataPointer->boundaryConditions, 
+                          "The boundary conditions are not correctly set-up. Use method setBoundaryCondtions() to do it." );
+         TNL_ASSERT_TRUE( this->userDataPointer->rightHandSide, 
+                          "The right-hand side is not correctly set-up. Use method setRightHandSide() to do it." );
+         
+         
+         this->userDataPointer->time = time;
+         this->userDataPointer->u = &uPointer->template modifyData< DeviceType >();
+         this->userDataPointer->fu = &fuPointer->template modifyData< DeviceType >();
+         this->userDataPointer->real_u = uPointer->getData().getData();
+         this->userDataPointer->real_fu = fuPointer->getData().getData();         
+         TNL::Traverser< MeshType, EntityType > meshTraverser;
+         meshTraverser.template processInteriorEntities< TraverserUserData,
+                                                         TraverserInteriorEntitiesProcessor >
+                                                       ( meshPointer,
+                                                         userDataPointer );
+         this->userDataPointer->time = time + tau;
+      }
+      
+      template< typename EntityType >
+      void applyBoundaryConditions( const MeshPointer& meshPointer,
+                                    const RealType& time,
+                                    MeshFunctionPointer& uPointer )
+      {
+         this->userDataPointer->time = time;
+         this->userDataPointer->u = uPointer->getData().getData();
+         Meshes::Traverser< MeshType, EntityType > meshTraverser;
+         meshTraverser.template processBoundaryEntities< TraverserUserData,
+                                             TraverserBoundaryEntitiesProcessor >
+                                           ( meshPointer,
+                                             *userDataPointer );         
+      }      
+      
+         
+      class TraverserBoundaryEntitiesProcessor
+      {
+         public:
+
+            template< typename GridEntity >
+            __cuda_callable__
+            static inline void processEntity( const MeshType& mesh,
+                                              TraverserUserData& userData,
+                                              const GridEntity& entity )
+            {
+               ( *userData.u )( entity ) = ( *userData.boundaryConditions )
+                  ( *userData.u, entity, userData.time );
+            }
+            
+
+            __cuda_callable__
+            static inline void processEntity( const MeshType& mesh,
+                                              TraverserUserData& userData,
+                                              const IndexType& entityIndex,
+                                              const typename MeshType::CoordinatesType& coordinates )
+            {
+               userData.real_u[ entityIndex ] = 0.0; /* ( *userData.boundaryConditions )
+                  ( *userData.u, entity, userData.time );*/
+            }
+            
+      };
+      
+
+      class TraverserInteriorEntitiesProcessor
+      {
+         public:
+
+            typedef typename MeshType::PointType PointType;
+
+            template< typename EntityType >
+            __cuda_callable__
+            static inline void processEntity( const MeshType& mesh,
+                                              TraverserUserData& userData,
+                                              const EntityType& entity )
+            {
+               typedef Functions::FunctionAdapter< MeshType, RightHandSide > FunctionAdapter;
+               ( *userData.fu )( entity )  = 
+                  ( *userData.differentialOperator )( *userData.u, entity, userData.time );
+                   + FunctionAdapter::getValue( *userData.rightHandSide, entity, userData.time );
+               
+            }
+
+            __cuda_callable__
+            static inline void processEntity( const MeshType& mesh,
+                                              TraverserUserData& userData,
+                                              const IndexType& entityIndex,
+                                              const typename MeshType::CoordinatesType& coordinates )
+            {
+               typedef Functions::FunctionAdapter< MeshType, RightHandSide > FunctionAdapter;
+               userData.real_fu[ entityIndex ] = 
+                       ( *userData.differentialOperator )( mesh, userData.real_u, entityIndex, coordinates, userData.time );
+                    //   + 0.0;
+            }
+            
+      }; 
+
+   protected:
+
+      TraverserUserDataPointer userDataPointer;
+
+};
+
+} // namepsace TNL
+
diff --git a/src/Benchmarks/HeatEquation/Tuning/GridTraverser.h b/src/Benchmarks/HeatEquation/Tuning/GridTraverser.h
new file mode 100644
index 0000000000000000000000000000000000000000..cdbc4922ca07eda7da0ba442340705f6646d8430
--- /dev/null
+++ b/src/Benchmarks/HeatEquation/Tuning/GridTraverser.h
@@ -0,0 +1,108 @@
+/***************************************************************************
+                          GridTraverser.h  -  description
+                             -------------------
+    begin                : Jan 2, 2016
+    copyright            : (C) 2016 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Meshes/Grid.h>
+#include <TNL/Pointers/SharedPointer.h>
+#include <TNL/CudaStreamPool.h>
+
+namespace TNL {
+
+/****
+ * This is only a helper class for Traverser specializations for Grid.
+ */
+template< typename Grid, typename Cell >
+class GridTraverser
+{
+};
+
+/****
+ * 2D grid, Devices::Host
+ */
+template< typename Real,
+          typename Index,
+          typename Cell >
+class GridTraverser< Meshes::Grid< 2, Real, Devices::Host, Index >, Cell >
+{
+   public:
+      
+      typedef Meshes::Grid< 2, Real, Devices::Host, Index > GridType;
+      typedef Pointers::SharedPointer<  GridType > GridPointer;
+      typedef Real RealType;
+      typedef Devices::Host DeviceType;
+      typedef Index IndexType;
+      typedef typename GridType::CoordinatesType CoordinatesType;
+ 
+      template<
+         typename GridEntity,
+         typename EntitiesProcessor,
+         typename UserData,
+         bool processOnlyBoundaryEntities,
+         int XOrthogonalBoundary = 1,
+         int YOrthogonalBoundary = 1,
+         typename... GridEntityParameters >
+      static void
+      processEntities(
+         const GridPointer& gridPointer,
+         const CoordinatesType begin,
+         const CoordinatesType end,
+         UserData& userData,
+         // FIXME: hack around nvcc bug (error: default argument not at end of parameter list)
+//         const int& stream = 0,
+         const int& stream,
+         // gridEntityParameters are passed to GridEntity's constructor
+         // (i.e. orientation and basis for faces)
+         const GridEntityParameters&... gridEntityParameters );
+};
+
+/****
+ * 2D grid, Devices::Cuda
+ */
+template< typename Real,
+          typename Index,
+          typename Cell >
+class GridTraverser< Meshes::Grid< 2, Real, Devices::Cuda, Index >, Cell >
+{
+   public:
+      
+      typedef Meshes::Grid< 2, Real, Devices::Cuda, Index > GridType;
+      typedef Pointers::SharedPointer<  GridType > GridPointer;
+      typedef Real RealType;
+      typedef Devices::Cuda DeviceType;
+      typedef Index IndexType;
+      typedef typename GridType::CoordinatesType CoordinatesType;
+ 
+      template<
+         typename GridEntity,
+         typename EntitiesProcessor,
+         typename UserData,
+         bool processOnlyBoundaryEntities,
+         int XOrthogonalBoundary = 1,
+         int YOrthogonalBoundary = 1,
+         typename... GridEntityParameters >
+      static void
+      processEntities(
+         const GridPointer& gridPointer,
+         const CoordinatesType& begin,
+         const CoordinatesType& end,
+         UserData& userData,
+         // FIXME: hack around nvcc bug (error: default argument not at end of parameter list)
+//         const int& stream = 0,
+         const int& stream,
+         // gridEntityParameters are passed to GridEntity's constructor
+         // (i.e. orientation and basis for faces)
+         const GridEntityParameters&... gridEntityParameters );
+};
+
+} // namespace TNL
+
+#include "GridTraverser_impl.h"
+
diff --git a/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h b/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..f3d9fbeec528dae97e4f3304f44b8440318d4529
--- /dev/null
+++ b/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h
@@ -0,0 +1,307 @@
+/***************************************************************************
+                          GridTraverser_impl.h  -  description
+                             -------------------
+    begin                : Jan 2, 2016
+    copyright            : (C) 2016 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Devices/MIC.h>
+
+#pragma once
+
+#include "GridTraverser.h"
+
+#include <TNL/Exceptions/CudaSupportMissing.h>
+
+namespace TNL {
+
+/****
+ * 2D traverser, host
+ */
+template< typename Real,
+          typename Index, 
+          typename Cell >
+   template<
+      typename GridEntity,
+      typename EntitiesProcessor,
+      typename UserData,
+      bool processOnlyBoundaryEntities,
+      int XOrthogonalBoundary,
+      int YOrthogonalBoundary,
+      typename... GridEntityParameters >
+void
+GridTraverser< Meshes::Grid< 2, Real, Devices::Host, Index >, Cell >::
+processEntities(
+   const GridPointer& gridPointer,
+   const CoordinatesType begin,
+   const CoordinatesType end,
+   UserData& userData,
+   const int& stream,
+   const GridEntityParameters&... gridEntityParameters )
+{
+   if( processOnlyBoundaryEntities )
+   {
+      GridEntity entity( *gridPointer, begin, gridEntityParameters... );
+      
+      if( YOrthogonalBoundary )
+         for( entity.getCoordinates().x() = begin.x();
+              entity.getCoordinates().x() <= end.x();
+              entity.getCoordinates().x() ++ )
+         {
+            entity.getCoordinates().y() = begin.y();
+            entity.refresh();
+            EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
+            entity.getCoordinates().y() = end.y();
+            entity.refresh();
+            EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
+         }
+      if( XOrthogonalBoundary )
+         for( entity.getCoordinates().y() = begin.y();
+              entity.getCoordinates().y() <= end.y();
+              entity.getCoordinates().y() ++ )
+         {
+            entity.getCoordinates().x() = begin.x();
+            entity.refresh();
+            EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
+            entity.getCoordinates().x() = end.x();
+            entity.refresh();
+            EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
+         }
+   }
+   else
+   {
+      //TODO: This does not work with gcc-5.4 and older, should work at gcc 6.x
+/*#pragma omp parallel for firstprivate( entity, begin, end ) if( Devices::Host::isOMPEnabled() )
+      for( entity.getCoordinates().y() = begin.y();
+           entity.getCoordinates().y() <= end.y();
+           entity.getCoordinates().y() ++ )
+         for( entity.getCoordinates().x() = begin.x();
+              entity.getCoordinates().x() <= end.x();
+              entity.getCoordinates().x() ++ )
+         {
+            entity.refresh();
+            EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity );
+         }*/
+#ifdef HAVE_OPENMP
+#pragma omp parallel firstprivate( begin, end ) if( Devices::Host::isOMPEnabled() )
+#endif
+      {
+         GridEntity entity( *gridPointer, begin, gridEntityParameters... );
+#ifdef HAVE_OPENMP
+#pragma omp for 
+#endif
+         for( IndexType y = begin.y(); y <= end.y(); y ++ )
+            for( IndexType x = begin.x(); x <= end.x(); x ++ )
+            {
+               entity.getCoordinates().x() = x;
+               entity.getCoordinates().y() = y;
+               entity.refresh();
+               EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
+            }      
+      }
+   }
+}
+
+/****
+ * 2D traverser, CUDA
+ */
+#ifdef HAVE_CUDA
+template< typename Real,
+          typename Index,
+          typename GridEntity,
+          typename UserData,
+          typename EntitiesProcessor,
+          bool processOnlyBoundaryEntities,
+          typename... GridEntityParameters >
+__global__ void 
+_GridTraverser2D(
+   const Meshes::Grid< 2, Real, Devices::Cuda, Index >* grid,
+   UserData* userData,
+   const typename GridEntity::CoordinatesType begin,
+   const typename GridEntity::CoordinatesType end,
+   const dim3 gridIdx,
+   const GridEntityParameters... gridEntityParameters )
+{
+   typedef Meshes::Grid< 2, Real, Devices::Cuda, Index > GridType;
+   typename GridType::CoordinatesType coordinates;
+
+   coordinates.x() = begin.x() + Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
+   coordinates.y() = begin.y() + Devices::Cuda::getGlobalThreadIdx_y( gridIdx );
+   
+   if( coordinates <= end )
+   {
+      //GridEntity entity( *grid, coordinates, gridEntityParameters... );
+      //entity.refresh();
+      /*if( ! processOnlyBoundaryEntities || 
+         ( coordinates.x() == 0 || coordinates.y() == 0 ||
+           coordinates.x() == grid->getDimensions().x() - 1 || coordinates.y() == grid->getDimensions().y() - 1 ) )*/
+         //entity.isBoundaryEntity() )
+      {
+         EntitiesProcessor::processEntity
+         ( *grid,
+           *userData,
+           coordinates.y() * grid->getDimensions().x() + coordinates.x(),
+           coordinates
+            );
+      }
+   }
+}
+
+
+template< typename Real,
+          typename Index,
+          typename GridEntity,
+          typename UserData,
+          typename EntitiesProcessor,
+          bool processOnlyBoundaryEntities,
+          typename... GridEntityParameters >
+__global__ void 
+_GridTraverser2DBoundary(
+   const Meshes::Grid< 2, Real, Devices::Cuda, Index >* grid,
+   UserData userData,
+   const Index beginX,
+   const Index endX,
+   const Index beginY,
+   const Index endY,
+   const dim3 gridIdx,
+   const GridEntityParameters... gridEntityParameters )
+{
+   using GridType = Meshes::Grid< 2, Real, Devices::Cuda, Index >;
+   using CoordinatesType = typename GridType::CoordinatesType;
+   
+   Index entitiesAlongX = endX - beginX + 1;
+   Index entitiesAlongY = endY - beginY;
+   
+   Index threadId = Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
+   if( threadId < entitiesAlongX )
+   {
+      GridEntity entity( *grid, 
+         CoordinatesType( beginX + threadId, beginY ),
+         gridEntityParameters... );
+      //printf( "X1: Thread %d -> %d %d x %d %d \n ", threadId, 
+      //   entity.getCoordinates().x(), entity.getCoordinates().y(),
+      //   grid->getDimensions().x(), grid->getDimensions().y() );
+      entity.refresh();
+      EntitiesProcessor::processEntity( *grid, userData, entity );
+   }
+   else if( ( threadId -= entitiesAlongX ) < entitiesAlongX && threadId >= 0 )
+   {
+      GridEntity entity( *grid, 
+         CoordinatesType( beginX + threadId, endY ),
+         gridEntityParameters... );
+      entity.refresh();
+      //printf( "X2: Thread %d -> %d %d \n ", threadId, entity.getCoordinates().x(), entity.getCoordinates().y() );
+      EntitiesProcessor::processEntity( *grid, userData, entity );
+   }
+   else if( ( ( threadId -= entitiesAlongX ) < entitiesAlongY - 1 ) && threadId >= 0 )
+   {
+      GridEntity entity( *grid,
+         CoordinatesType( beginX, beginY + threadId + 1 ),
+      gridEntityParameters... );
+      entity.refresh();
+      //printf( "Y1: Thread %d -> %d %d \n ", threadId, entity.getCoordinates().x(), entity.getCoordinates().y() );
+      EntitiesProcessor::processEntity( *grid, userData, entity );      
+   }
+   else if( ( ( threadId -= entitiesAlongY - 1 ) < entitiesAlongY - 1  ) && threadId >= 0 )
+   {
+      GridEntity entity( *grid,
+         CoordinatesType( endX, beginY + threadId + 1 ),
+      gridEntityParameters... );
+      entity.refresh();
+      //printf( "Y2: Thread %d -> %d %d \n ", threadId, entity.getCoordinates().x(), entity.getCoordinates().y() );
+      EntitiesProcessor::processEntity( *grid, userData, entity );
+   }
+}
+
+#endif
+
+template< typename Real,
+          typename Index,
+          typename Cell >
+   template<
+      typename GridEntity,
+      typename EntitiesProcessor,
+      typename UserData,
+      bool processOnlyBoundaryEntities,
+         int XOrthogonalBoundary,
+         int YOrthogonalBoundary,
+      typename... GridEntityParameters >
+void
+GridTraverser< Meshes::Grid< 2, Real, Devices::Cuda, Index >, Cell >::
+processEntities(
+   const GridPointer& gridPointer,
+   const CoordinatesType& begin,
+   const CoordinatesType& end,
+   UserData& userData,
+   const int& stream,
+   const GridEntityParameters&... gridEntityParameters )
+{
+#ifdef HAVE_CUDA
+   if( processOnlyBoundaryEntities && 
+       ( GridEntity::getEntityDimension() == 2 || GridEntity::getEntityDimension() == 0 ) )
+   {
+      dim3 cudaBlockSize( 256 );      
+      dim3 cudaBlocksCount, cudaGridsCount;
+      IndexType cudaThreadsCount = 2 * ( end.x() - begin.x() + end.y() - begin.y() + 1 );
+      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, cudaThreadsCount );
+      dim3 gridIdx, cudaGridSize;
+      Devices::Cuda::synchronizeDevice();
+      for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x++ )
+      {
+         Devices::Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
+         _GridTraverser2DBoundary< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
+               <<< cudaGridSize, cudaBlockSize >>>
+               ( &gridPointer.template getData< Devices::Cuda >(),
+                 userData,
+                 begin.x(),
+                 end.x(),
+                 begin.y(),
+                 end.y(),
+                 gridIdx,
+                 gridEntityParameters... );
+      }            
+   }
+   else
+   {
+      dim3 cudaBlockSize( 16, 16 );
+      dim3 cudaBlocksCount, cudaGridsCount;
+      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount,
+                                   end.x() - begin.x() + 1,
+                                   end.y() - begin.y() + 1 );
+      
+      auto& pool = CudaStreamPool::getInstance();
+      const cudaStream_t& s = pool.getStream( stream );
+
+      Devices::Cuda::synchronizeDevice();
+      dim3 gridIdx, cudaGridSize;
+      for( gridIdx.y = 0; gridIdx.y < cudaGridsCount.y; gridIdx.y ++ )
+         for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x ++ )
+         {
+            Devices::Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
+	    //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCount, cudaGridSize, cudaGridsCount );
+            TNL::_GridTraverser2D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
+               <<< cudaGridSize, cudaBlockSize, 0, s >>>
+               ( &gridPointer.template getData< Devices::Cuda >(),
+                 &userData,
+                 begin,
+                 end,
+                 gridIdx,
+                 gridEntityParameters... );
+         }
+
+      // only launches into the stream 0 are synchronized
+      if( stream == 0 )
+      {
+         cudaStreamSynchronize( s );
+         TNL_CHECK_CUDA_DEVICE;
+      }
+   }
+#else
+   throw Exceptions::CudaSupportMissing();
+#endif
+}
+
+} // namespace TNL
diff --git a/src/Benchmarks/HeatEquation/Tuning/SimpleCell.h b/src/Benchmarks/HeatEquation/Tuning/SimpleCell.h
new file mode 100644
index 0000000000000000000000000000000000000000..67254ab3607c9c318f8d6a624387c5d875ee2484
--- /dev/null
+++ b/src/Benchmarks/HeatEquation/Tuning/SimpleCell.h
@@ -0,0 +1,144 @@
+/***************************************************************************
+                          SimpleCell.h  -  description
+                             -------------------
+    begin                : Aug 24, 2018
+    copyright            : (C) 2018 by oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Meshes/GridDetails/NeighborGridEntitiesStorage.h>
+#include <TNL/Meshes/GridEntityConfig.h>
+
+#define SIMPLE_CELL_HAVE_NEIGHBOR_ENTITIES_STORAGE
+
+template< typename Grid, typename Config = Meshes::GridEntityNoStencilStorage >
+class SimpleCell
+{
+   public:
+ 
+      typedef Grid GridType;
+      typedef GridType MeshType;
+      typedef typename GridType::RealType RealType;
+      typedef typename GridType::IndexType IndexType;
+      typedef typename GridType::CoordinatesType CoordinatesType;
+      typedef typename GridType::PointType PointType;
+      typedef SimpleCell< GridType, Config > ThisType;
+      typedef Meshes::NeighborGridEntitiesStorage< ThisType, Config >
+         NeighborGridEntitiesStorageType;
+      typedef Config ConfigType;
+      
+      constexpr static int getMeshDimension() { return GridType::getMeshDimension(); };
+ 
+      constexpr static int getEntityDimension() { return getMeshDimension(); };
+       
+      __cuda_callable__ inline
+      SimpleCell( const GridType& grid )
+      :grid( grid )
+#ifdef SIMPLE_CELL_HAVE_NEIGHBOR_ENTITIES_STORAGE         
+      , neighborEntitiesStorage( *this )
+#endif      
+      {};
+ 
+      __cuda_callable__ inline
+      SimpleCell( const GridType& grid,
+                  const CoordinatesType& coordinates,
+                  const CoordinatesType& orientation = CoordinatesType( ( IndexType ) 0 ),
+                  const CoordinatesType& basis = CoordinatesType( ( IndexType ) 1 ) )
+      : grid( grid ),
+        coordinates( coordinates )
+#ifdef SIMPLE_CELL_HAVE_NEIGHBOR_ENTITIES_STORAGE               
+      , neighborEntitiesStorage( *this )
+#endif      
+      {};
+ 
+      __cuda_callable__ inline
+      const CoordinatesType& getCoordinates() const { return this->coordinates; };
+ 
+      __cuda_callable__ inline
+      CoordinatesType& getCoordinates() { return this->coordinates; };
+ 
+      __cuda_callable__ inline
+      void setCoordinates( const CoordinatesType& coordinates ) { this->coordinates = coordinates; };
+
+      /***
+       * Call this method every time the coordinates are changed
+       * to recompute the mesh entity index. The reason for this strange
+       * mechanism is a performance.
+       */
+      __cuda_callable__ inline
+      void refresh() 
+      { 
+         this->entityIndex = this->grid.getEntityIndex( *this );
+         this->neighborEntitiesStorage.refresh( this->grid, this->entityIndex );
+      };
+
+      __cuda_callable__ inline
+      IndexType getIndex() const { return this->entityIndex; };
+ 
+      /*__cuda_callable__ inline
+      const EntityOrientationType getOrientation() const;
+ 
+      __cuda_callable__ inline
+      void setOrientation( const EntityOrientationType& orientation ){};
+ 
+      __cuda_callable__ inline
+      const EntityBasisType getBasis() const;
+ 
+      __cuda_callable__ inline
+      void setBasis( const EntityBasisType& basis ){};
+ 
+      template< int NeighborEntityDimension = Dimension >
+      __cuda_callable__ inline
+      const NeighborEntities< NeighborEntityDimension >&
+      getNeighborEntities() const;
+      */
+      __cuda_callable__ inline
+      bool isBoundaryEntity() const
+      {
+         return false;
+         /*return( this->getCoordinates().x() == 0 ||
+                 this->getCoordinates().y() == 0 ||
+                 this->getCoordinates().x() == this->getMesh().getDimensions().x() - 1 ||
+                 this->getCoordinates().y() == this->getMesh().getDimensions().y() - 1 );*/
+      };
+      
+ 
+      __cuda_callable__ inline
+      PointType getCenter() const
+      {
+         return PointType(
+            grid.getOrigin().x() + ( coordinates.x() + 0.5 ) * grid.getSpaceSteps().x(),
+            grid.getOrigin().y() + ( coordinates.y() + 0.5 ) * grid.getSpaceSteps().y() );
+      };
+ 
+      /*__cuda_callable__ inline
+      const RealType& getMeasure() const;
+ 
+      __cuda_callable__ inline
+      const PointType& getEntityProportions() const;*/
+ 
+      __cuda_callable__ inline
+      const GridType& getMesh() const { return this->grid; };
+
+   protected:
+ 
+      const GridType& grid;
+ 
+      IndexType entityIndex;
+ 
+      CoordinatesType coordinates;
+       
+#ifdef SIMPLE_CELL_HAVE_NEIGHBOR_ENTITIES_STORAGE               
+      NeighborGridEntitiesStorageType neighborEntitiesStorage;
+#endif
+      
+      
+      // TODO: Test of boundary entity will likely be more
+      // complicated with MPI. It might be more efficient to resolve it
+      // before.
+      //bool isBoundaryEnity;
+};
diff --git a/src/Benchmarks/HeatEquation/Tuning/Traverser_Grid2D.h b/src/Benchmarks/HeatEquation/Tuning/Traverser_Grid2D.h
new file mode 100644
index 0000000000000000000000000000000000000000..8a5fcbb29a2915cbf45bf39ea9be052e58d7052f
--- /dev/null
+++ b/src/Benchmarks/HeatEquation/Tuning/Traverser_Grid2D.h
@@ -0,0 +1,74 @@
+/***************************************************************************
+                          Traverser_Grid2D.h  -  description
+                             -------------------
+    begin                : Jul 29, 2014
+    copyright            : (C) 2014 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+//#include <TNL/Meshes/Traverser.h>
+#include <TNL/Pointers/SharedPointer.h>
+
+namespace TNL {
+   
+template< typename Mesh,
+          typename MeshEntity,
+          int EntitiesDimension = MeshEntity::getEntityDimension() >
+class Traverser
+{
+   public:
+      using MeshType = Mesh;
+      using MeshPointer = Pointers::SharedPointer<  MeshType >;
+      using DeviceType = typename MeshType::DeviceType;
+
+      template< typename UserData,
+                typename EntitiesProcessor >
+      void processBoundaryEntities( const MeshPointer& meshPointer,
+                                    UserData& userData ) const;
+
+      template< typename UserData,
+                typename EntitiesProcessor >
+      void processInteriorEntities( const MeshPointer& meshPointer,
+                                    UserData& userData ) const;
+
+      template< typename UserData,
+                typename EntitiesProcessor >
+      void processAllEntities( const MeshPointer& meshPointer,
+                               UserData& userData ) const;
+}; 
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename GridEntity >
+class Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 2 >
+{
+   public:
+      typedef Meshes::Grid< 2, Real, Device, Index > GridType;
+      typedef Pointers::SharedPointer<  GridType > GridPointer;
+      typedef typename GridType::CoordinatesType CoordinatesType;
+
+      template< typename UserData,
+                typename EntitiesProcessor >
+      void processBoundaryEntities( const GridPointer& gridPointer,
+                                    UserData& userData ) const;
+
+      template< typename UserData,
+                typename EntitiesProcessor >
+      void processInteriorEntities( const GridPointer& gridPointer,
+                                    UserData& userData ) const;
+      template< typename UserData,
+                typename EntitiesProcessor >
+      void processAllEntities( const GridPointer& gridPointer,
+                               UserData& userData ) const;
+ 
+};
+
+
+} // namespace TNL
+
+#include "Traverser_Grid2D_impl.h"
diff --git a/src/Benchmarks/HeatEquation/Tuning/Traverser_Grid2D_impl.h b/src/Benchmarks/HeatEquation/Tuning/Traverser_Grid2D_impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..16fc3a83d980763891adf1a5064054eb215085b0
--- /dev/null
+++ b/src/Benchmarks/HeatEquation/Tuning/Traverser_Grid2D_impl.h
@@ -0,0 +1,92 @@
+/***************************************************************************
+                          Traverser_Grid2D_impl.h  -  description
+                             -------------------
+    begin                : Jul 29, 2014
+    copyright            : (C) 2014 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include "GridTraverser.h"
+
+namespace TNL {
+
+/****
+ * Grid 2D, cells
+ */
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename GridEntity >
+   template< typename UserData,
+             typename EntitiesProcessor >
+void
+Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 2 >::
+processBoundaryEntities( const GridPointer& gridPointer,
+                         UserData& userData ) const
+{
+   /****
+    * Boundary cells
+    */
+   static_assert( GridEntity::getEntityDimension() == 2, "The entity has wrong dimension." );
+
+   GridTraverser< GridType,GridEntity >::template processEntities< GridEntity, EntitiesProcessor, UserData, true, 1, 1 >(
+      gridPointer,
+      CoordinatesType( 0, 0 ),
+      gridPointer->getDimensions() - CoordinatesType( 1, 1 ),
+      userData,
+      0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename GridEntity >
+   template< typename UserData,
+             typename EntitiesProcessor >
+void
+Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 2 >::
+processInteriorEntities( const GridPointer& gridPointer,
+                         UserData& userData ) const
+{
+   /****
+    * Interior cells
+    */
+   static_assert( GridEntity::getEntityDimension() == 2, "The entity has wrong dimension." );
+
+   GridTraverser< GridType, GridEntity >::template processEntities< GridEntity, EntitiesProcessor, UserData, false >(
+      gridPointer,
+      CoordinatesType( 1, 1 ),
+      gridPointer->getDimensions() - CoordinatesType( 2, 2 ),
+      userData,
+      0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename GridEntity >
+   template< typename UserData,
+             typename EntitiesProcessor >
+void
+Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 2 >::
+processAllEntities( const GridPointer& gridPointer,
+                    UserData& userData ) const
+{
+   /****
+    * All cells
+    */
+   static_assert( GridEntity::getEntityDimension() == 2, "The entity has wrong dimension." );
+ 
+   GridTraverser< GridType, GridEntity >::template processEntities< GridEntity, EntitiesProcessor, UserData, false >(
+      gridPointer,
+      CoordinatesType( 0, 0 ),
+      gridPointer->getDimensions() - CoordinatesType( 1, 1 ),
+      userData,
+      0 );
+}
+
+} // namespace TNL
diff --git a/src/Benchmarks/HeatEquation/Tuning/tunning.h b/src/Benchmarks/HeatEquation/Tuning/tunning.h
new file mode 100644
index 0000000000000000000000000000000000000000..07a5d7f62bdd7e08fb66a7df0f1e9c8a869cb17b
--- /dev/null
+++ b/src/Benchmarks/HeatEquation/Tuning/tunning.h
@@ -0,0 +1,146 @@
+/***************************************************************************
+                          tunning.h  -  description
+                             -------------------
+    begin                : Aug 24, 2018
+    copyright            : (C) 2018 by oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+
+#pragma once
+
+#ifdef HAVE_CUDA
+#include<cuda.h>
+#endif
+
+#include "SimpleCell.h"
+
+/****
+ * Just testing data for measuring performance
+ * with different ways of passing data to kernels.
+ */
+struct Data
+{
+   double time, tau;
+   TNL::Containers::StaticVector< 2, double > c1, c2, c3, c4;
+   TNL::Meshes::Grid< 2, double > grid;
+};
+
+
+#ifdef HAVE_CUDA
+
+#define WITH_CELL  // Serves for comparison of performance when using SimpleCell
+                   // vs. using only cell index and coordinates
+
+template< typename BoundaryEntitiesProcessor, typename UserData, typename Grid, typename Real, typename Index >
+__global__ void _boundaryConditionsKernel( const Grid* grid,
+                                           UserData userData )
+{
+   //Real* u = userData.u;
+   //const typename UserData::BoundaryConditionsType* bc = userData.boundaryConditions;
+   using Coordinates = typename Grid::CoordinatesType;
+   const Index& gridXSize = grid->getDimensions().x();
+   const Index& gridYSize = grid->getDimensions().y();
+#ifdef WITH_CELL   
+   SimpleCell< Grid > cell( *grid,
+      Coordinates( ( blockIdx.x ) * blockDim.x + threadIdx.x,
+                   ( blockIdx.y ) * blockDim.y + threadIdx.y ) );
+   Coordinates& coordinates = cell.getCoordinates();
+   cell.refresh();   
+#else   
+   Coordinates coordinates( ( blockIdx.x ) * blockDim.x + threadIdx.x,
+                             ( blockIdx.y ) * blockDim.y + threadIdx.y );
+   Index entityIndex = coordinates.y() * gridXSize + coordinates.x();
+#endif   
+   
+   
+   if( coordinates.x() == 0 && coordinates.y() < gridYSize )
+   {
+      //u[ c ] = ( *bc )( *grid, u, c, coordinates, 0 );
+#ifdef WITH_CELL
+      BoundaryEntitiesProcessor::processEntity( *grid, userData, cell );
+#else
+      BoundaryEntitiesProcessor::processEntity( *grid, userData, entityIndex, coordinates );
+#endif 
+   }
+   if( coordinates.x() == gridXSize - 1 && coordinates.y() < gridYSize )
+   {
+      //u[ c ] = ( *bc )( *grid, u, c, coordinates, 0 );
+
+#ifdef WITH_CELL
+      BoundaryEntitiesProcessor::processEntity( *grid, userData, cell );
+#else
+      BoundaryEntitiesProcessor::processEntity( *grid, userData, entityIndex, coordinates );
+#endif      
+   }
+   if( coordinates.y() == 0 && coordinates.x() < gridXSize )
+   {
+      //u[ c ] = ( *bc )( *grid, u, c, coordinates, 0 );
+#ifdef WITH_CELL
+      BoundaryEntitiesProcessor::processEntity( *grid, userData, cell );
+#else
+      BoundaryEntitiesProcessor::processEntity( *grid, userData, entityIndex, coordinates );
+#endif      
+   }
+   if( coordinates.y() == gridYSize -1  && coordinates.x() < gridXSize )
+   {
+      //u[ c ] = ( *bc )( *grid, u, c, coordinates, 0 );
+
+#ifdef WITH_CELL
+      BoundaryEntitiesProcessor::processEntity( *grid, userData, cell );
+#else
+      BoundaryEntitiesProcessor::processEntity( *grid, userData, entityIndex, coordinates );
+#endif      
+   }         
+}
+
+template< typename InteriorEntitiesProcessor, typename UserData, typename Grid, typename Real, typename Index >
+__global__ void _heatEquationKernel( const Grid* grid,
+                                     UserData userData )
+{
+   /*Real* u = userData.u;
+   Real* fu = userData.fu;
+   const typename UserData::DifferentialOperatorType* op = userData.differentialOperator;*/
+
+   const Index& gridXSize = grid->getDimensions().x();
+   const Index& gridYSize = grid->getDimensions().y();
+   const Real& hx_inv = grid->template getSpaceStepsProducts< -2,  0 >();
+   const Real& hy_inv = grid->template getSpaceStepsProducts<  0, -2 >();
+   
+   SimpleCell< Grid > cell( *grid );
+   cell.getCoordinates().x() = blockIdx.x * blockDim.x + threadIdx.x;
+   cell.getCoordinates().y() = blockIdx.y * blockDim.y + threadIdx.y;
+   
+   /*using Coordinates = typename Grid::CoordinatesType;
+   Coordinates coordinates( blockIdx.x * blockDim.x + threadIdx.x, 
+                            blockIdx.y * blockDim.y + threadIdx.y );*/
+
+   if( cell.getCoordinates().x() > 0 && cell.getCoordinates().x() < gridXSize - 1 &&
+       cell.getCoordinates().y() > 0 && cell.getCoordinates().y() < gridYSize - 1 )
+   //if( coordinates.x() > 0 && coordinates.x() < gridXSize - 1 &&
+   //    coordinates.y() > 0 && coordinates.y() < gridYSize - 1 )      
+   {
+#ifdef WITH_CELL      
+      cell.refresh();
+      InteriorEntitiesProcessor::processEntity( *grid, userData, cell );
+#else      
+      //const Index entityIndex = cell.getCoordinates().y() * gridXSize + cell.getCoordinates().x();
+      const Index entityIndex = coordinates.y() * gridXSize + coordinates.x();
+      InteriorEntitiesProcessor::processEntity( *grid, userData, entityIndex, cell.getCoordinates() );
+#endif      
+      
+      
+      //fu[ entityIndex ] = ( *op )( *grid, userData.u, entityIndex, coordinates, userData.time ); // + 0.1;
+      
+      //fu[ entityIndex ] = ( ( u[ entityIndex - 1 ]         - 2.0 * u[ entityIndex ] + u[ entityIndex + 1 ]         ) * hx_inv +
+      //                    ( u[ entityIndex - gridXSize ] - 2.0 * u[ entityIndex ] + u[ entityIndex + gridXSize ] ) * hy_inv );
+
+
+   }  
+}
+
+
+#endif
+
diff --git a/tests/benchmarks/heat-equation-benchmark/pure-c-rhs.h b/src/Benchmarks/HeatEquation/pure-c-rhs.h
similarity index 99%
rename from tests/benchmarks/heat-equation-benchmark/pure-c-rhs.h
rename to src/Benchmarks/HeatEquation/pure-c-rhs.h
index 4a23b65582717799444419fb185fa23e5b63d453..fc1fe779fadf2dd4c370112cbe3883772d5e3b84 100644
--- a/tests/benchmarks/heat-equation-benchmark/pure-c-rhs.h
+++ b/src/Benchmarks/HeatEquation/pure-c-rhs.h
@@ -82,6 +82,7 @@ __global__ void heatEquationKernel( const Real* u,
       const Index c = j * gridXSize + i;
       aux[ c ] = ( ( u[ c - 1 ]         - 2.0 * u[ c ] + u[ c + 1 ]         ) * hx_inv +
                    ( u[ c - gridXSize ] - 2.0 * u[ c ] + u[ c + gridXSize ] ) * hy_inv );
+      //aux[ c ] += 0.1;
       //aux[ c ] = ( ( __ldg( &u[ c - 1 ] ) - 2.0 * __ldg( &u[ c ] ) + __ldg( &u[ c + 1 ] ) ) * hx_inv +
       //                   ( __ldg( &u[ c - gridXSize ] ) - 2.0 * __ldg( &u[ c ] ) + __ldg( &u[ c + gridXSize ] ) ) * hy_inv );
    }  
diff --git a/tests/benchmarks/heat-equation-benchmark/run-HeatEquationBenchmark b/src/Benchmarks/HeatEquation/run-HeatEquationBenchmark
similarity index 100%
rename from tests/benchmarks/heat-equation-benchmark/run-HeatEquationBenchmark
rename to src/Benchmarks/HeatEquation/run-HeatEquationBenchmark
diff --git a/tests/benchmarks/heat-equation-benchmark/tnl-benchmark-heat-equation.cpp b/src/Benchmarks/HeatEquation/tnl-benchmark-heat-equation.cpp
similarity index 100%
rename from tests/benchmarks/heat-equation-benchmark/tnl-benchmark-heat-equation.cpp
rename to src/Benchmarks/HeatEquation/tnl-benchmark-heat-equation.cpp
diff --git a/tests/benchmarks/heat-equation-benchmark/tnl-benchmark-heat-equation.cu b/src/Benchmarks/HeatEquation/tnl-benchmark-heat-equation.cu
similarity index 100%
rename from tests/benchmarks/heat-equation-benchmark/tnl-benchmark-heat-equation.cu
rename to src/Benchmarks/HeatEquation/tnl-benchmark-heat-equation.cu
diff --git a/tests/benchmarks/heat-equation-benchmark/tnl-benchmark-heat-equation.h b/src/Benchmarks/HeatEquation/tnl-benchmark-heat-equation.h
similarity index 88%
rename from tests/benchmarks/heat-equation-benchmark/tnl-benchmark-heat-equation.h
rename to src/Benchmarks/HeatEquation/tnl-benchmark-heat-equation.h
index 758003470a0167d52ccfe1b14c3654682a6240dd..a617dcc6bf1f1ba9085aec8d81e3a1a90d20f75e 100644
--- a/tests/benchmarks/heat-equation-benchmark/tnl-benchmark-heat-equation.h
+++ b/src/Benchmarks/HeatEquation/tnl-benchmark-heat-equation.h
@@ -6,6 +6,7 @@
 #include <TNL/Functions/Analytic/Constant.h>
 #include "HeatEquationBenchmarkProblem.h"
 #include "BenchmarkLaplace.h"
+#include "DirichletBoundaryConditions.h"
 #include "HeatEquationBenchmarkRhs.h"
 #include "HeatEquationBenchmarkBuildConfigTag.h"
 
@@ -35,6 +36,7 @@ template< typename ConfigTag >class HeatEquationBenchmarkConfig
             config.addEntryEnum< String >( "pure-c" );
             config.addEntryEnum< String >( "templated" );
             config.addEntryEnum< String >( "templated-compact" );
+            config.addEntryEnum< String >( "tunning" );
 
          /****
           * Add definition of your solver command line arguments.
@@ -77,27 +79,27 @@ class HeatEquationBenchmarkSetter
              if( boundaryConditionsType == "dirichlet" )
              {
                 typedef Operators::DirichletBoundaryConditions< MeshType, Constant, MeshType::getMeshDimension(), Real, Index > BoundaryConditions;
-                typedef HeatEquationBenchmarkProblem< MeshType, BoundaryConditions, RightHandSide, CommunicatorType, ApproximateOperator > Problem;
+                typedef HeatEquationBenchmarkProblem< MeshType, BoundaryConditions, RightHandSide, ApproximateOperator, CommunicatorType > Problem;
                 SolverStarter solverStarter;
                 return solverStarter.template run< Problem >( parameters );
              }
-             typedef Operators::NeumannBoundaryConditions< MeshType, Constant, Real, Index > BoundaryConditions;
-             typedef HeatEquationBenchmarkProblem< MeshType, BoundaryConditions, RightHandSide, CommunicatorType, ApproximateOperator > Problem;
+             /*typedef Operators::NeumannBoundaryConditions< MeshType, Constant, Real, Index > BoundaryConditions;
+             typedef HeatEquationBenchmarkProblem< MeshType, BoundaryConditions, RightHandSide, ApproximateOperator, CommunicatorType > Problem;
              SolverStarter solverStarter;
-             return solverStarter.template run< Problem >( parameters );
+             return solverStarter.template run< Problem >( parameters );*/
           }
-          typedef Functions::MeshFunction< MeshType > MeshFunction;
+          /*typedef Functions::MeshFunction< MeshType > MeshFunction;
           if( boundaryConditionsType == "dirichlet" )
           {
              typedef Operators::DirichletBoundaryConditions< MeshType, MeshFunction, MeshType::getMeshDimension(), Real, Index > BoundaryConditions;
-             typedef HeatEquationBenchmarkProblem< MeshType, BoundaryConditions, RightHandSide, CommunicatorType, ApproximateOperator > Problem;
+             typedef HeatEquationBenchmarkProblem< MeshType, BoundaryConditions, RightHandSide, ApproximateOperator, CommunicatorType > Problem;
              SolverStarter solverStarter;
              return solverStarter.template run< Problem >( parameters );
           }
           typedef Operators::NeumannBoundaryConditions< MeshType, MeshFunction, Real, Index > BoundaryConditions;
-          typedef HeatEquationBenchmarkProblem< MeshType, BoundaryConditions, RightHandSide, CommunicatorType, ApproximateOperator > Problem;
+          typedef HeatEquationBenchmarkProblem< MeshType, BoundaryConditions, RightHandSide, ApproximateOperator, CommunicatorType > Problem;
           SolverStarter solverStarter;
-          return solverStarter.template run< Problem >( parameters );
+          return solverStarter.template run< Problem >( parameters );*/
       }
 
 };
diff --git a/tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation-bug.cu b/src/Benchmarks/HeatEquation/tnl-benchmark-simple-heat-equation-bug.cu
similarity index 100%
rename from tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation-bug.cu
rename to src/Benchmarks/HeatEquation/tnl-benchmark-simple-heat-equation-bug.cu
diff --git a/tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation-bug.h b/src/Benchmarks/HeatEquation/tnl-benchmark-simple-heat-equation-bug.h
similarity index 100%
rename from tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation-bug.h
rename to src/Benchmarks/HeatEquation/tnl-benchmark-simple-heat-equation-bug.h
diff --git a/tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation.cpp b/src/Benchmarks/HeatEquation/tnl-benchmark-simple-heat-equation.cpp
similarity index 100%
rename from tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation.cpp
rename to src/Benchmarks/HeatEquation/tnl-benchmark-simple-heat-equation.cpp
diff --git a/tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation.cu b/src/Benchmarks/HeatEquation/tnl-benchmark-simple-heat-equation.cu
similarity index 100%
rename from tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation.cu
rename to src/Benchmarks/HeatEquation/tnl-benchmark-simple-heat-equation.cu
diff --git a/tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation.h b/src/Benchmarks/HeatEquation/tnl-benchmark-simple-heat-equation.h
similarity index 99%
rename from tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation.h
rename to src/Benchmarks/HeatEquation/tnl-benchmark-simple-heat-equation.h
index f3acb774600eca22a8938084b24d53f34308edea..e8798609898c03831a165b9173dccadffdd4a1ba 100644
--- a/tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation.h
+++ b/src/Benchmarks/HeatEquation/tnl-benchmark-simple-heat-equation.h
@@ -313,7 +313,7 @@ bool solveHeatEquationCuda( const Config::ParameterContainer& parameters,
    
    typedef Meshes::Grid< 2, Real, Devices::Cuda, Index > GridType;
    typedef typename GridType::PointType PointType;
-   typedef SharedPointer< GridType > GridPointer;
+   typedef Pointers::SharedPointer<  GridType > GridPointer;
    GridPointer gridPointer;
    gridPointer->setDimensions( gridXSize, gridYSize );
    gridPointer->setDomain( PointType( 0.0, 0.0 ), PointType( domainXSize, domainYSize ) );
@@ -541,7 +541,7 @@ bool solveHeatEquationHost( const Config::ParameterContainer& parameters,
     */
    typedef Meshes::Grid< 2, Real, Devices::Host, Index > GridType;
    typedef typename GridType::PointType PointType;
-   SharedPointer< GridType > gridPointer;
+   Pointers::SharedPointer<  GridType > gridPointer;
    gridPointer->setDimensions( gridXSize, gridYSize );
    gridPointer->setDomain( PointType( 0.0, 0.0 ), PointType( domainXSize, domainYSize ) );
    Containers::Vector< Real, Devices::Host, Index > vecU;
diff --git a/tests/benchmarks/heat-equation-benchmark/tnlTestGrid2D.h b/src/Benchmarks/HeatEquation/tnlTestGrid2D.h
similarity index 100%
rename from tests/benchmarks/heat-equation-benchmark/tnlTestGrid2D.h
rename to src/Benchmarks/HeatEquation/tnlTestGrid2D.h
diff --git a/tests/benchmarks/heat-equation-benchmark/tnlTestGridEntity.h b/src/Benchmarks/HeatEquation/tnlTestGridEntity.h
similarity index 100%
rename from tests/benchmarks/heat-equation-benchmark/tnlTestGridEntity.h
rename to src/Benchmarks/HeatEquation/tnlTestGridEntity.h
diff --git a/tests/benchmarks/heat-equation-benchmark/tnlTestNeighbourGridEntitiesStorage.h b/src/Benchmarks/HeatEquation/tnlTestNeighbourGridEntitiesStorage.h
similarity index 100%
rename from tests/benchmarks/heat-equation-benchmark/tnlTestNeighbourGridEntitiesStorage.h
rename to src/Benchmarks/HeatEquation/tnlTestNeighbourGridEntitiesStorage.h
diff --git a/tests/benchmarks/heat-equation-benchmark/tnlTestNeighbourGridEntityGetter.h b/src/Benchmarks/HeatEquation/tnlTestNeighbourGridEntityGetter.h
similarity index 100%
rename from tests/benchmarks/heat-equation-benchmark/tnlTestNeighbourGridEntityGetter.h
rename to src/Benchmarks/HeatEquation/tnlTestNeighbourGridEntityGetter.h
diff --git a/tests/benchmarks/heat-equation-benchmark/tnlTestNeighbourGridEntityGetter2D_impl.h b/src/Benchmarks/HeatEquation/tnlTestNeighbourGridEntityGetter2D_impl.h
similarity index 100%
rename from tests/benchmarks/heat-equation-benchmark/tnlTestNeighbourGridEntityGetter2D_impl.h
rename to src/Benchmarks/HeatEquation/tnlTestNeighbourGridEntityGetter2D_impl.h
diff --git a/src/Benchmarks/LinearSolvers/CMakeLists.txt b/src/Benchmarks/LinearSolvers/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a95c92f791c7d7a0176415fd4968d8aa6bb8982
--- /dev/null
+++ b/src/Benchmarks/LinearSolvers/CMakeLists.txt
@@ -0,0 +1,9 @@
+if( BUILD_CUDA )
+    CUDA_ADD_EXECUTABLE( tnl-benchmark-linear-solvers tnl-benchmark-linear-solvers.cu )
+    TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers tnl )
+else()
+    ADD_EXECUTABLE( tnl-benchmark-linear-solvers tnl-benchmark-linear-solvers.cpp )
+    TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers tnl )
+endif()
+
+install( TARGETS tnl-benchmark-linear-solvers RUNTIME DESTINATION bin )
diff --git a/tests/benchmarks/tnl-benchmark-linear-solvers.cpp b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.cpp
similarity index 100%
rename from tests/benchmarks/tnl-benchmark-linear-solvers.cpp
rename to src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.cpp
diff --git a/tests/benchmarks/tnl-benchmark-linear-solvers.cu b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.cu
similarity index 100%
rename from tests/benchmarks/tnl-benchmark-linear-solvers.cu
rename to src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.cu
diff --git a/tests/benchmarks/tnl-benchmark-linear-solvers.h b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
similarity index 91%
rename from tests/benchmarks/tnl-benchmark-linear-solvers.h
rename to src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
index 3638d0ab4a4aa4ad947a19ac385414da231f13a9..3e50f973e5c4b36cd3721fa53ea224d7dfa49f66 100644
--- a/tests/benchmarks/tnl-benchmark-linear-solvers.h
+++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
@@ -18,7 +18,7 @@
 #include <TNL/Config/ConfigDescription.h>
 #include <TNL/Config/ParameterContainer.h>
 #include <TNL/Timer.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 #include <TNL/Matrices/Dense.h>
 #include <TNL/Matrices/Tridiagonal.h>
 #include <TNL/Matrices/Multidiagonal.h>
@@ -31,10 +31,8 @@
 #include <TNL/Solvers/Linear/CG.h>
 #include <TNL/Solvers/Linear/BICGStab.h>
 #include <TNL/Solvers/Linear/TFQMR.h>
-#include <TNL/Solvers/Linear/LinearResidueGetter.h>
 #include <TNL/Solvers/IterativeSolverMonitor.h>
 
-using namespace std;
 using namespace TNL;
 using namespace TNL::Matrices;
 
@@ -67,22 +65,21 @@ void configSetup( Config::ConfigDescription& config )
 
 template< typename Solver >
 bool benchmarkSolver( const Config::ParameterContainer& parameters,
-                      SharedPointer< typename Solver::MatrixType >& matrix)
+                      Pointers::SharedPointer<  typename Solver::MatrixType >& matrix)
 {
    typedef typename Solver::MatrixType MatrixType;
    typedef typename MatrixType::RealType RealType;
    typedef typename MatrixType::DeviceType DeviceType;
    typedef typename MatrixType::IndexType IndexType;
    typedef Containers::Vector< RealType, DeviceType, IndexType > VectorType;
-   typedef SharedPointer< VectorType > VectorPointer;
-   typedef SharedPointer< MatrixType > MatrixPointer;
+   typedef Pointers::SharedPointer<  MatrixType > MatrixPointer;
 
-   VectorPointer x, y, b;
-   x->setSize( matrix->getColumns() );
-   x->setValue( 1.0 / ( RealType ) matrix->getColumns() );
-   y->setSize( matrix->getColumns() );
-   b->setSize( matrix->getRows() );
-   matrix->vectorProduct( *x, *b );
+   VectorType x, y, b;
+   x.setSize( matrix->getColumns() );
+   x.setValue( 1.0 / ( RealType ) matrix->getColumns() );
+   y.setSize( matrix->getColumns() );
+   b.setSize( matrix->getRows() );
+   matrix->vectorProduct( x, b );
 
    Solver solver;
    Solvers::IterativeSolverMonitor< RealType, IndexType > monitor;
@@ -90,8 +87,8 @@ bool benchmarkSolver( const Config::ParameterContainer& parameters,
    solver.setSolverMonitor( monitor );
    solver.setMatrix( matrix );
    solver.setConvergenceResidue( 1.0e-6 );
-   solver.template solve< VectorType, Solvers::Linear::LinearResidueGetter< MatrixType, VectorType > >( *b, *y );
-  std::cout <<std::endl;
+   solver.solve( b, y );
+   std::cout << std::endl;
    return true;
 }
 
@@ -136,7 +133,7 @@ template< typename Matrix >
 bool resolveLinearSolver( const Config::ParameterContainer& parameters )
 {
    const String& solver = parameters.getParameter< String >( "solver" );
-   typedef SharedPointer< Matrix > MatrixPointer;
+   typedef Pointers::SharedPointer<  Matrix > MatrixPointer;
 
    MatrixPointer matrix;
    if( ! readMatrix( parameters, *matrix ) )
diff --git a/src/Benchmarks/SpMV/CMakeLists.txt b/src/Benchmarks/SpMV/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a73e6738cdb8bb4effd889960f3ffcd5b7255b90
--- /dev/null
+++ b/src/Benchmarks/SpMV/CMakeLists.txt
@@ -0,0 +1,9 @@
+if( BUILD_CUDA )
+    CUDA_ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cu )
+    TARGET_LINK_LIBRARIES( tnl-benchmark-spmv tnl ${CUDA_cusparse_LIBRARY} )
+else()
+    ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cpp )
+    TARGET_LINK_LIBRARIES( tnl-benchmark-spmv tnl )
+endif()
+
+install( TARGETS tnl-benchmark-spmv RUNTIME DESTINATION bin )
diff --git a/tests/benchmarks/tnl-benchmark-spmv.cpp b/src/Benchmarks/SpMV/tnl-benchmark-spmv.cpp
similarity index 100%
rename from tests/benchmarks/tnl-benchmark-spmv.cpp
rename to src/Benchmarks/SpMV/tnl-benchmark-spmv.cpp
diff --git a/tests/benchmarks/tnl-benchmark-spmv.cu b/src/Benchmarks/SpMV/tnl-benchmark-spmv.cu
similarity index 100%
rename from tests/benchmarks/tnl-benchmark-spmv.cu
rename to src/Benchmarks/SpMV/tnl-benchmark-spmv.cu
diff --git a/tests/benchmarks/tnl-benchmark-spmv.h b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
similarity index 100%
rename from tests/benchmarks/tnl-benchmark-spmv.h
rename to src/Benchmarks/SpMV/tnl-benchmark-spmv.h
diff --git a/tests/benchmarks/tnlCusparseCSRMatrix.h b/src/Benchmarks/SpMV/tnlCusparseCSRMatrix.h
similarity index 100%
rename from tests/benchmarks/tnlCusparseCSRMatrix.h
rename to src/Benchmarks/SpMV/tnlCusparseCSRMatrix.h
diff --git a/tests/benchmarks/share/CMakeLists.txt b/src/Benchmarks/scripts/CMakeLists.txt
similarity index 100%
rename from tests/benchmarks/share/CMakeLists.txt
rename to src/Benchmarks/scripts/CMakeLists.txt
diff --git a/tests/benchmarks/share/convert-matrices b/src/Benchmarks/scripts/convert-matrices
similarity index 100%
rename from tests/benchmarks/share/convert-matrices
rename to src/Benchmarks/scripts/convert-matrices
diff --git a/tests/benchmarks/share/cuda-profiler.conf b/src/Benchmarks/scripts/cuda-profiler.conf
similarity index 100%
rename from tests/benchmarks/share/cuda-profiler.conf
rename to src/Benchmarks/scripts/cuda-profiler.conf
diff --git a/tests/benchmarks/share/draw-matrices b/src/Benchmarks/scripts/draw-matrices
similarity index 100%
rename from tests/benchmarks/share/draw-matrices
rename to src/Benchmarks/scripts/draw-matrices
diff --git a/tests/benchmarks/share/florida-matrix-market b/src/Benchmarks/scripts/florida-matrix-market
similarity index 100%
rename from tests/benchmarks/share/florida-matrix-market
rename to src/Benchmarks/scripts/florida-matrix-market
diff --git a/tests/benchmarks/share/get-matrices b/src/Benchmarks/scripts/get-matrices
similarity index 100%
rename from tests/benchmarks/share/get-matrices
rename to src/Benchmarks/scripts/get-matrices
diff --git a/tests/benchmarks/share/matrix-market b/src/Benchmarks/scripts/matrix-market
similarity index 100%
rename from tests/benchmarks/share/matrix-market
rename to src/Benchmarks/scripts/matrix-market
diff --git a/tests/benchmarks/share/process-cuda-profile.pl b/src/Benchmarks/scripts/process-cuda-profile.pl
similarity index 100%
rename from tests/benchmarks/share/process-cuda-profile.pl
rename to src/Benchmarks/scripts/process-cuda-profile.pl
diff --git a/tests/benchmarks/share/run-matrix-solvers-benchmark b/src/Benchmarks/scripts/run-matrix-solvers-benchmark
similarity index 100%
rename from tests/benchmarks/share/run-matrix-solvers-benchmark
rename to src/Benchmarks/scripts/run-matrix-solvers-benchmark
diff --git a/tests/benchmarks/share/run-tnl-benchmark-linear-solvers b/src/Benchmarks/scripts/run-tnl-benchmark-linear-solvers
similarity index 100%
rename from tests/benchmarks/share/run-tnl-benchmark-linear-solvers
rename to src/Benchmarks/scripts/run-tnl-benchmark-linear-solvers
diff --git a/tests/benchmarks/share/run-tnl-benchmark-spmv b/src/Benchmarks/scripts/run-tnl-benchmark-spmv
similarity index 100%
rename from tests/benchmarks/share/run-tnl-benchmark-spmv
rename to src/Benchmarks/scripts/run-tnl-benchmark-spmv
diff --git a/src/Benchmarks/scripts/tnl-run-heat-equation-benchmark b/src/Benchmarks/scripts/tnl-run-heat-equation-benchmark
new file mode 100644
index 0000000000000000000000000000000000000000..28342318fd900c4ac7aa36ee6ca2f5f3997d9066
--- /dev/null
+++ b/src/Benchmarks/scripts/tnl-run-heat-equation-benchmark
@@ -0,0 +1,122 @@
+#!/bin/bash
+
+device="cuda"
+dofSize=256
+dimension=2;
+proportions=2
+finalTime=1.0e-4
+timeStep=1.0e-8
+
+analyticFunction="exp-bump"
+sigma=0.25
+
+tnl-grid-setup --dimensions ${dimension} \
+               --proportions-x ${proportions} \
+               --proportions-y ${proportions} \
+               --proportions-z ${proportions} \
+               --origin-x -1 \
+               --origin-y -1 \
+               --origin-z -1 \
+               --size-x ${dofSize} \
+               --size-y ${dofSize} \
+               --size-z ${dofSize} \
+               
+tnl-init --mesh mesh.tnl \
+         --test-function ${analyticFunction} \
+         --output-file initial.tnl \
+         --sigma ${sigma}
+
+#valgrind --tool=memcheck \
+#echo "==================================================================="
+#echo "================ Heat equation solver ============================="
+#echo "==================================================================="
+#tnl-heat-equation --device ${device} \
+#                 --time-discretisation explicit \
+#                  --boundary-conditions-type dirichlet \
+#                  --boundary-conditions-constant 0 \
+#                  --discrete-solver euler \
+#                  --snapshot-period ${finalTime} \
+#                  --final-time ${finalTime} \
+#                  --time-step ${timeStep} \
+#                  --max-iterations 100000000 \
+#                  --refresh-rate 1000 \
+#                  --openmp-enabled false \
+#                  --verbose 0
+#
+#cat log.txt                  
+
+#echo "==================================================================="
+#echo "=================== Pure-C benchcmark ============================="
+#echo "==================================================================="
+#
+#tnl-benchmark-heat-equation --device ${device} \
+#                 --time-discretisation explicit \
+#                  --boundary-conditions-type dirichlet \
+#                  --boundary-conditions-constant 0 \
+#                  --discrete-solver euler \
+#                  --snapshot-period ${finalTime} \
+#                  --final-time ${finalTime} \
+#                  --time-step ${timeStep} \
+#                  --max-iterations 100000000 \
+#                  --refresh-rate 1000 \
+#                  --openmp-enabled false \
+#                  --verbose 0 \
+#                  --cuda-kernel-type pure-c
+#
+#cat log.txt                  
+
+#echo "==================================================================="
+#echo "=============== Templated compact benchmark ======================="
+#echo "==================================================================="
+#
+#tnl-benchmark-heat-equation --device ${device} \
+#                 --time-discretisation explicit \
+#                  --boundary-conditions-type dirichlet \
+#                  --boundary-conditions-constant 0 \
+#                  --discrete-solver euler \
+#                  --snapshot-period ${finalTime} \
+#                  --final-time ${finalTime} \
+#                  --time-step ${timeStep} \
+#                  --max-iterations 100000000 \
+#                  --refresh-rate 1000 \
+#                  --openmp-enabled false \
+#                  --verbose 0 \
+#                  --cuda-kernel-type templated-compact
+#
+#cat log.txt                  
+
+echo "==================================================================="
+echo "=================== Templated benchmark ==========================="
+echo "==================================================================="
+
+tnl-benchmark-heat-equation --device ${device} \
+                 --time-discretisation explicit \
+                  --boundary-conditions-type dirichlet \
+                  --boundary-conditions-constant 0 \
+                  --discrete-solver euler \
+                  --snapshot-period ${finalTime} \
+                  --final-time ${finalTime} \
+                  --time-step ${timeStep} \
+                  --max-iterations 100000000 \
+                  --refresh-rate 1000 \
+                  --openmp-enabled false \
+                  --verbose 0 \
+                  --cuda-kernel-type templated
+
+cat log.txt                  
+
+echo "==================================================================="
+echo "=============== Heat equation pure C benchmark ===================="
+echo "==================================================================="
+tnl-benchmark-simple-heat-equation --device ${device} \
+                                   --domain-x-size 2 \
+                                   --domain-y-size 2 \
+                                   --grid-x-size ${dofSize} \
+                                   --grid-y-size ${dofSize} \
+                                   --sigma ${sigma} \
+                                   --time-step ${timeStep} \
+                                   --final-time ${finalTime}       
+
+tnl-diff --input-files simple-heat-equation-result.tnl u-00001.tnl
+
+cat tnl-diff.log                                         
diff --git a/tests/benchmarks/share/tnl-run-spmv-benchmark b/src/Benchmarks/scripts/tnl-run-spmv-benchmark
similarity index 100%
rename from tests/benchmarks/share/tnl-run-spmv-benchmark
rename to src/Benchmarks/scripts/tnl-run-spmv-benchmark
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 540606d3782c71df92031ad96abd519783e70b63..ee275cafcc5f5f2a506fb1d2a79037f877963173 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,9 +1,16 @@
-if( WITH_PYTHON STREQUAL "yes" )
-   ADD_SUBDIRECTORY( Python )
-endif( WITH_PYTHON STREQUAL "yes" )
-
 ADD_SUBDIRECTORY( TNL )
 
+# Note that it is important to start building examples as soon as possible,
+# because they take the longest time and other stuff can be pipelined before
+# they are finished (at least with Ninja).
+if( ${WITH_EXAMPLES} )
+   add_subdirectory( Examples )
+endif()
+
+if( ${WITH_BENCHMARKS} )
+   ADD_SUBDIRECTORY( Benchmarks )
+endif()
+
 if( ${WITH_TOOLS} )
    ADD_SUBDIRECTORY( Tools )
 endif()
@@ -11,3 +18,7 @@ endif()
 if( ${WITH_TESTS} )
    ADD_SUBDIRECTORY( UnitTests )
 endif()
+
+if( ${WITH_PYTHON} )
+   ADD_SUBDIRECTORY( Python )
+endif()
diff --git a/examples/CMakeLists.txt b/src/Examples/CMakeLists.txt
similarity index 94%
rename from examples/CMakeLists.txt
rename to src/Examples/CMakeLists.txt
index c3e8869e49d81db90e46bc5288da5858e9e745c5..8fcc4a5de7f5ae768fb9709bddd6d875fd53c257 100644
--- a/examples/CMakeLists.txt
+++ b/src/Examples/CMakeLists.txt
@@ -1,3 +1,4 @@
+add_subdirectory( simple-examples )
 add_subdirectory( heat-equation )
 add_subdirectory( transport-equation )
 add_subdirectory( navier-stokes )
diff --git a/examples/advection/Riemann1DBoundaryConditions.h b/src/Examples/advection/Riemann1DBoundaryConditions.h
similarity index 98%
rename from examples/advection/Riemann1DBoundaryConditions.h
rename to src/Examples/advection/Riemann1DBoundaryConditions.h
index 75b8966269088094153cbfb6b214a4eede178fc5..69ea8ebe8cf0009132d824c2b431de24150b7f24 100644
--- a/examples/advection/Riemann1DBoundaryConditions.h
+++ b/src/Examples/advection/Riemann1DBoundaryConditions.h
@@ -50,7 +50,7 @@ class Riemann1DBoundaryConditions
       typedef typename MeshType::DeviceType DeviceType;
       typedef Index IndexType;
       
-      typedef SharedPointer< Mesh > MeshPointer;
+      typedef Pointers::SharedPointer< Mesh > MeshPointer;
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef typename MeshType::VertexType VertexType;
 
diff --git a/examples/advection/Riemann2DBoundaryConditions.h b/src/Examples/advection/Riemann2DBoundaryConditions.h
similarity index 98%
rename from examples/advection/Riemann2DBoundaryConditions.h
rename to src/Examples/advection/Riemann2DBoundaryConditions.h
index f29cb5a86c77d126ace2b4d93839a918d5665865..33cafb77376ee5ddd0188f6ab7bd3fcb6ffaf6e4 100644
--- a/examples/advection/Riemann2DBoundaryConditions.h
+++ b/src/Examples/advection/Riemann2DBoundaryConditions.h
@@ -50,7 +50,7 @@ class Riemann2DBoundaryConditions
       typedef typename MeshType::DeviceType DeviceType;
       typedef Index IndexType;
       
-      typedef SharedPointer< Mesh > MeshPointer;
+      typedef Pointers::SharedPointer< Mesh > MeshPointer;
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef typename MeshType::VertexType VertexType;
 
diff --git a/examples/advection/tnl-run-advection b/src/Examples/advection/tnl-run-advection
similarity index 100%
rename from examples/advection/tnl-run-advection
rename to src/Examples/advection/tnl-run-advection
diff --git a/examples/flow-vl/BoundaryConditionsBoiler.h b/src/Examples/flow-sw/BoundaryConditionsBoiler.h
similarity index 87%
rename from examples/flow-vl/BoundaryConditionsBoiler.h
rename to src/Examples/flow-sw/BoundaryConditionsBoiler.h
index 0cba68d7fa1a8689df50b0ea9016b511ad126918..f61f988da66f3f7476827c8f8853f4d6dbf67217 100644
--- a/examples/flow-vl/BoundaryConditionsBoiler.h
+++ b/src/Examples/flow-sw/BoundaryConditionsBoiler.h
@@ -29,14 +29,14 @@ class BoundaryConditionsBoiler
       typedef TNL::Operators::EnergyBoundaryConditionsBoiler< MeshType, FunctionType, RealType, IndexType > EnergyBoundaryConditionsType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
 
-      typedef SharedPointer< DensityBoundaryConditionsType > DensityBoundaryConditionsTypePointer;
-      typedef SharedPointer< MomentumXBoundaryConditionsType > MomentumXBoundaryConditionsTypePointer;
-      typedef SharedPointer< MomentumYBoundaryConditionsType > MomentumYBoundaryConditionsTypePointer;
-      typedef SharedPointer< MomentumZBoundaryConditionsType > MomentumZBoundaryConditionsTypePointer;
-      typedef SharedPointer< EnergyBoundaryConditionsType > EnergyBoundaryConditionsTypePointer;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshType > MeshPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< DensityBoundaryConditionsType > DensityBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< MomentumXBoundaryConditionsType > MomentumXBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< MomentumYBoundaryConditionsType > MomentumYBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< MomentumZBoundaryConditionsType > MomentumZBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< EnergyBoundaryConditionsType > EnergyBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
diff --git a/examples/flow/BoundaryConditionsCavity.h b/src/Examples/flow-sw/BoundaryConditionsCavity.h
similarity index 87%
rename from examples/flow/BoundaryConditionsCavity.h
rename to src/Examples/flow-sw/BoundaryConditionsCavity.h
index 8a42faea17fa4cbdfa40a0b27533bca567d79206..bbae2d3e93c65383db11a53a3b16d734fc3131e2 100644
--- a/examples/flow/BoundaryConditionsCavity.h
+++ b/src/Examples/flow-sw/BoundaryConditionsCavity.h
@@ -29,14 +29,14 @@ class BoundaryConditionsCavity
       typedef TNL::Operators::EnergyBoundaryConditionsCavity< MeshType, FunctionType, RealType, IndexType > EnergyBoundaryConditionsType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
 
-      typedef SharedPointer< DensityBoundaryConditionsType > DensityBoundaryConditionsTypePointer;
-      typedef SharedPointer< MomentumXBoundaryConditionsType > MomentumXBoundaryConditionsTypePointer;
-      typedef SharedPointer< MomentumYBoundaryConditionsType > MomentumYBoundaryConditionsTypePointer;
-      typedef SharedPointer< MomentumZBoundaryConditionsType > MomentumZBoundaryConditionsTypePointer;
-      typedef SharedPointer< EnergyBoundaryConditionsType > EnergyBoundaryConditionsTypePointer;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshType > MeshPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< DensityBoundaryConditionsType > DensityBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< MomentumXBoundaryConditionsType > MomentumXBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< MomentumYBoundaryConditionsType > MomentumYBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< MomentumZBoundaryConditionsType > MomentumZBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< EnergyBoundaryConditionsType > EnergyBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
diff --git a/examples/flow-sw/CMakeLists.txt b/src/Examples/flow-sw/CMakeLists.txt
similarity index 100%
rename from examples/flow-sw/CMakeLists.txt
rename to src/Examples/flow-sw/CMakeLists.txt
diff --git a/examples/inviscid-flow-sw/CompressibleConservativeVariables.h b/src/Examples/flow-sw/CompressibleConservativeVariables.h
similarity index 93%
rename from examples/inviscid-flow-sw/CompressibleConservativeVariables.h
rename to src/Examples/flow-sw/CompressibleConservativeVariables.h
index a3afc845366f8df17b41c5affc5a4e49d5da052a..01e820686e98781a3267c4526e8e7c6449218415 100644
--- a/examples/inviscid-flow-sw/CompressibleConservativeVariables.h
+++ b/src/Examples/flow-sw/CompressibleConservativeVariables.h
@@ -13,7 +13,7 @@
 
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Functions/VectorField.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 
 namespace TNL {
 
@@ -28,9 +28,9 @@ class CompressibleConservativeVariables
       typedef typename MeshType::IndexType IndexType;
       typedef Functions::MeshFunction< Mesh > MeshFunctionType;
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshType > MeshPointer;      
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > MomentumFieldPointer;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;      
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > MomentumFieldPointer;
       
       CompressibleConservativeVariables(){};
       
@@ -144,4 +144,4 @@ class CompressibleConservativeVariables
       
 };
 
-} // namespace TN
\ No newline at end of file
+} // namespace TN
diff --git a/examples/flow-sw/DensityBoundaryConditionBoiler.h b/src/Examples/flow-sw/DensityBoundaryConditionBoiler.h
similarity index 97%
rename from examples/flow-sw/DensityBoundaryConditionBoiler.h
rename to src/Examples/flow-sw/DensityBoundaryConditionBoiler.h
index c3bae7e3d961ab4a6f6dddb287cc1a23184f1c87..6231f6780e24e3090c83832c9d47534a9c6104a8 100644
--- a/examples/flow-sw/DensityBoundaryConditionBoiler.h
+++ b/src/Examples/flow-sw/DensityBoundaryConditionBoiler.h
@@ -114,8 +114,8 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshIn
    typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
    template< typename EntityType,
@@ -237,8 +237,8 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshIn
       typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -387,8 +387,8 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshIn
       typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;   
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow-sw/DensityBoundaryConditionCavity.h b/src/Examples/flow-sw/DensityBoundaryConditionCavity.h
similarity index 97%
rename from examples/flow-sw/DensityBoundaryConditionCavity.h
rename to src/Examples/flow-sw/DensityBoundaryConditionCavity.h
index a2d34ce540b8f8806b0ae0bf7e31d357c01ed647..18eaff1101eccc7733eb5978b48807be82bba916 100644
--- a/examples/flow-sw/DensityBoundaryConditionCavity.h
+++ b/src/Examples/flow-sw/DensityBoundaryConditionCavity.h
@@ -114,8 +114,8 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshIn
    typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef DensityBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
    template< typename EntityType,
@@ -237,8 +237,8 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshIn
       typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -384,8 +384,8 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshIn
       typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsCavityBase< Function > BaseType;   
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow-vl/EnergyBoundaryConditionBoiler.h b/src/Examples/flow-sw/EnergyBoundaryConditionBoiler.h
similarity index 98%
rename from examples/flow-vl/EnergyBoundaryConditionBoiler.h
rename to src/Examples/flow-sw/EnergyBoundaryConditionBoiler.h
index fe227d68f81a0df6b980d429cfb23472c0f97bc2..a99fdf0157bfcbca614374e8472ab9fe8a3b4f58 100644
--- a/examples/flow-vl/EnergyBoundaryConditionBoiler.h
+++ b/src/Examples/flow-sw/EnergyBoundaryConditionBoiler.h
@@ -116,8 +116,8 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshInd
    typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -255,8 +255,8 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshInd
       typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -535,8 +535,8 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshInd
       typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow-vl/EnergyBoundaryConditionCavity.h b/src/Examples/flow-sw/EnergyBoundaryConditionCavity.h
similarity index 97%
rename from examples/flow-vl/EnergyBoundaryConditionCavity.h
rename to src/Examples/flow-sw/EnergyBoundaryConditionCavity.h
index ca3fbe01dedcfdb7bc0a40777ff93e264fcfdec0..3b49cd56e5ab6901716c86115561c26fbbbff973 100644
--- a/examples/flow-vl/EnergyBoundaryConditionCavity.h
+++ b/src/Examples/flow-sw/EnergyBoundaryConditionCavity.h
@@ -116,8 +116,8 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshInd
    typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -255,8 +255,8 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshInd
       typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -479,8 +479,8 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshInd
       typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow-sw/LaxFridrichs.h b/src/Examples/flow-sw/LaxFridrichs.h
similarity index 88%
rename from examples/flow-sw/LaxFridrichs.h
rename to src/Examples/flow-sw/LaxFridrichs.h
index cdf32899f69eb797a6d9a18a52b84c09709867bf..c56d20aed9947bd16796a5bf7ceb22adb237bf0c 100644
--- a/examples/flow-sw/LaxFridrichs.h
+++ b/src/Examples/flow-sw/LaxFridrichs.h
@@ -43,15 +43,15 @@ class LaxFridrichs
       typedef LaxFridrichsMomentumZ< Mesh, Real, Index > MomentumZOperatorType;
       typedef LaxFridrichsEnergy< Mesh, Real, Index > EnergyOperatorType;
 
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VectorFieldType > VectorFieldPointer;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VectorFieldType > VectorFieldPointer;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;
       
-      typedef SharedPointer< ContinuityOperatorType > ContinuityOperatorPointer;
-      typedef SharedPointer< MomentumXOperatorType > MomentumXOperatorPointer;
-      typedef SharedPointer< MomentumYOperatorType > MomentumYOperatorPointer;      
-      typedef SharedPointer< MomentumZOperatorType > MomentumZOperatorPointer;      
-      typedef SharedPointer< EnergyOperatorType > EnergyOperatorPointer;
+      typedef Pointers::SharedPointer< ContinuityOperatorType > ContinuityOperatorPointer;
+      typedef Pointers::SharedPointer< MomentumXOperatorType > MomentumXOperatorPointer;
+      typedef Pointers::SharedPointer< MomentumYOperatorType > MomentumYOperatorPointer;      
+      typedef Pointers::SharedPointer< MomentumZOperatorType > MomentumZOperatorPointer;      
+      typedef Pointers::SharedPointer< EnergyOperatorType > EnergyOperatorPointer;
 
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
diff --git a/examples/inviscid-flow-vl/LaxFridrichsContinuity.h b/src/Examples/flow-sw/LaxFridrichsContinuity.h
similarity index 99%
rename from examples/inviscid-flow-vl/LaxFridrichsContinuity.h
rename to src/Examples/flow-sw/LaxFridrichsContinuity.h
index 45ad4d52b12d402365a40cac043d5525e230cecb..82747cd18220efc01bc2d68e0247c01723c29fd0 100644
--- a/examples/inviscid-flow-vl/LaxFridrichsContinuity.h
+++ b/src/Examples/flow-sw/LaxFridrichsContinuity.h
@@ -34,7 +34,7 @@ class LaxFridrichsContinuityBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
 
       LaxFridrichsContinuityBase()
        : artificialViscosity( 1.0 ){};
diff --git a/examples/flow-vl/LaxFridrichsEnergy.h b/src/Examples/flow-sw/LaxFridrichsEnergy.h
similarity index 98%
rename from examples/flow-vl/LaxFridrichsEnergy.h
rename to src/Examples/flow-sw/LaxFridrichsEnergy.h
index 18c824762b8c677253dbd4e494be7ad3aea7e769..03019ed23c85f82ee489c95d8173c0f100cff3c8 100644
--- a/examples/flow-vl/LaxFridrichsEnergy.h
+++ b/src/Examples/flow-sw/LaxFridrichsEnergy.h
@@ -30,8 +30,8 @@ class LaxFridrichsEnergyBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
       LaxFridrichsEnergyBase()
        : artificialViscosity( 1.0 ){};
diff --git a/examples/flow-sw/LaxFridrichsMomentumBase.h b/src/Examples/flow-sw/LaxFridrichsMomentumBase.h
similarity index 92%
rename from examples/flow-sw/LaxFridrichsMomentumBase.h
rename to src/Examples/flow-sw/LaxFridrichsMomentumBase.h
index 67dae9fdf8256cecf032a731dd5d616d715ca0fe..cc2561748968a2bd808fa434d4f3d87d41765f45 100644
--- a/examples/flow-sw/LaxFridrichsMomentumBase.h
+++ b/src/Examples/flow-sw/LaxFridrichsMomentumBase.h
@@ -28,8 +28,8 @@ class LaxFridrichsMomentumBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
       LaxFridrichsMomentumBase()
        : artificialViscosity( 1.0 ){};
diff --git a/examples/flow-sw/LaxFridrichsMomentumX.h b/src/Examples/flow-sw/LaxFridrichsMomentumX.h
similarity index 100%
rename from examples/flow-sw/LaxFridrichsMomentumX.h
rename to src/Examples/flow-sw/LaxFridrichsMomentumX.h
diff --git a/examples/flow-sw/LaxFridrichsMomentumY.h b/src/Examples/flow-sw/LaxFridrichsMomentumY.h
similarity index 100%
rename from examples/flow-sw/LaxFridrichsMomentumY.h
rename to src/Examples/flow-sw/LaxFridrichsMomentumY.h
diff --git a/examples/flow-sw/LaxFridrichsMomentumZ.h b/src/Examples/flow-sw/LaxFridrichsMomentumZ.h
similarity index 100%
rename from examples/flow-sw/LaxFridrichsMomentumZ.h
rename to src/Examples/flow-sw/LaxFridrichsMomentumZ.h
diff --git a/examples/flow-vl/MomentumXBoundaryConditionBoiler.h b/src/Examples/flow-sw/MomentumXBoundaryConditionBoiler.h
similarity index 97%
rename from examples/flow-vl/MomentumXBoundaryConditionBoiler.h
rename to src/Examples/flow-sw/MomentumXBoundaryConditionBoiler.h
index 823ec475a570074f2f147a5b899606f203b76354..dfe63e07623a6fdb6ca7ebb7da8ab445d9505372 100644
--- a/examples/flow-vl/MomentumXBoundaryConditionBoiler.h
+++ b/src/Examples/flow-sw/MomentumXBoundaryConditionBoiler.h
@@ -114,8 +114,8 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -245,8 +245,8 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -421,8 +421,8 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow-vl/MomentumXBoundaryConditionCavity.h b/src/Examples/flow-sw/MomentumXBoundaryConditionCavity.h
similarity index 97%
rename from examples/flow-vl/MomentumXBoundaryConditionCavity.h
rename to src/Examples/flow-sw/MomentumXBoundaryConditionCavity.h
index b78731382731f3aea95b64257c7330af15d78a99..07abfdbeb940039555ac2799d0ef374ca26faff0 100644
--- a/examples/flow-vl/MomentumXBoundaryConditionCavity.h
+++ b/src/Examples/flow-sw/MomentumXBoundaryConditionCavity.h
@@ -114,8 +114,8 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -245,8 +245,8 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -410,8 +410,8 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow-vl/MomentumYBoundaryConditionBoiler.h b/src/Examples/flow-sw/MomentumYBoundaryConditionBoiler.h
similarity index 97%
rename from examples/flow-vl/MomentumYBoundaryConditionBoiler.h
rename to src/Examples/flow-sw/MomentumYBoundaryConditionBoiler.h
index 76f3ff05735d1e813ffef7105509986a14036de8..83b6282ddd50033f32a382f0b48f5abe7347ccaa 100644
--- a/examples/flow-vl/MomentumYBoundaryConditionBoiler.h
+++ b/src/Examples/flow-sw/MomentumYBoundaryConditionBoiler.h
@@ -114,8 +114,8 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -245,8 +245,8 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -416,8 +416,8 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow-vl/MomentumYBoundaryConditionCavity.h b/src/Examples/flow-sw/MomentumYBoundaryConditionCavity.h
similarity index 97%
rename from examples/flow-vl/MomentumYBoundaryConditionCavity.h
rename to src/Examples/flow-sw/MomentumYBoundaryConditionCavity.h
index afce8239b71ef68697a395177ae28c0be21dc788..a83dd653f92328814b8d0746bc45c8775552a310 100644
--- a/examples/flow-vl/MomentumYBoundaryConditionCavity.h
+++ b/src/Examples/flow-sw/MomentumYBoundaryConditionCavity.h
@@ -114,8 +114,8 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -245,8 +245,8 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -407,8 +407,8 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow/MomentumZBoundaryConditionBoiler.h b/src/Examples/flow-sw/MomentumZBoundaryConditionBoiler.h
similarity index 97%
rename from examples/flow/MomentumZBoundaryConditionBoiler.h
rename to src/Examples/flow-sw/MomentumZBoundaryConditionBoiler.h
index 188aaa9851aaa93eeadd0de85d26554d562b66df..9d887857ce97f916dcfaccd0208138afc200afd1 100644
--- a/examples/flow/MomentumZBoundaryConditionBoiler.h
+++ b/src/Examples/flow-sw/MomentumZBoundaryConditionBoiler.h
@@ -114,8 +114,8 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -245,8 +245,8 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -397,8 +397,8 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow-vl/MomentumZBoundaryConditionCavity.h b/src/Examples/flow-sw/MomentumZBoundaryConditionCavity.h
similarity index 97%
rename from examples/flow-vl/MomentumZBoundaryConditionCavity.h
rename to src/Examples/flow-sw/MomentumZBoundaryConditionCavity.h
index 1942cd58935395f340ea99ab0be0b74f1aee0c69..5fe6f22e5945513c9e9e86d835256ef84e27c054 100644
--- a/examples/flow-vl/MomentumZBoundaryConditionCavity.h
+++ b/src/Examples/flow-sw/MomentumZBoundaryConditionCavity.h
@@ -114,8 +114,8 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -245,8 +245,8 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -397,8 +397,8 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow/PhysicalVariablesGetter.h b/src/Examples/flow-sw/PhysicalVariablesGetter.h
similarity index 90%
rename from examples/flow/PhysicalVariablesGetter.h
rename to src/Examples/flow-sw/PhysicalVariablesGetter.h
index f1ba6bd1222b8653faeaac041606c101a071e188..2af0a02b820db183fa32669dcecfe63f7f006990 100644
--- a/examples/flow/PhysicalVariablesGetter.h
+++ b/src/Examples/flow-sw/PhysicalVariablesGetter.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Functions/VectorField.h>
 #include <TNL/Functions/MeshFunctionEvaluator.h>
@@ -30,11 +30,11 @@ class PhysicalVariablesGetter
       static const int Dimensions = MeshType::getMeshDimension();
       
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef CompressibleConservativeVariables< MeshType > ConservativeVariablesType;
-      typedef SharedPointer< ConservativeVariablesType > ConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< ConservativeVariablesType > ConservativeVariablesPointer;
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
       class VelocityGetter : public Functions::Domain< Dimensions, Functions::MeshDomain >
       {
@@ -99,7 +99,7 @@ class PhysicalVariablesGetter
          Functions::MeshFunctionEvaluator< MeshFunctionType, VelocityGetter > evaluator;
          for( int i = 0; i < Dimensions; i++ )
          {
-            SharedPointer< VelocityGetter, DeviceType > velocityGetter( conservativeVariables->getDensity(),
+            Pointers::SharedPointer< VelocityGetter, DeviceType > velocityGetter( conservativeVariables->getDensity(),
                                                                         ( *conservativeVariables->getMomentum() )[ i ] );
             evaluator.evaluate( ( *velocity )[ i ], velocityGetter );
          }
@@ -110,7 +110,7 @@ class PhysicalVariablesGetter
                         MeshFunctionPointer& pressure )
       {
          Functions::MeshFunctionEvaluator< MeshFunctionType, PressureGetter > evaluator;
-         SharedPointer< PressureGetter, DeviceType > pressureGetter( conservativeVariables->getDensity(),
+         Pointers::SharedPointer< PressureGetter, DeviceType > pressureGetter( conservativeVariables->getDensity(),
                                                                      conservativeVariables->getEnergy(),
                                                                      conservativeVariables->getMomentum(),
                                                                      gamma );
diff --git a/examples/flow-sw/RiemannProblemInitialCondition.h b/src/Examples/flow-sw/RiemannProblemInitialCondition.h
similarity index 99%
rename from examples/flow-sw/RiemannProblemInitialCondition.h
rename to src/Examples/flow-sw/RiemannProblemInitialCondition.h
index 640e4b6d1e59bca19fd09e0f2374bd88f5b82cd5..dfde32625973ef72161ebed80d633a089666049f 100644
--- a/examples/flow-sw/RiemannProblemInitialCondition.h
+++ b/src/Examples/flow-sw/RiemannProblemInitialCondition.h
@@ -38,7 +38,7 @@ class RiemannProblemInitialConditionSetter< Meshes::Grid< 1,MeshReal, Device, Me
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
 //       for cyklus i = 0 to mesh.getDimensions().x() j pro .y() a k pro .z()
 //       typedef typename MeshType::Cell CellType
@@ -195,7 +195,7 @@ class RiemannProblemInitialConditionSetter< Meshes::Grid< 2, MeshReal, Device, M
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
 //       for cyklus i = 0 to mesh.getDimensions().x() j pro .y() a k pro .z()
 //       typedef typename MeshType::Cell CellType
@@ -415,7 +415,7 @@ class RiemannProblemInitialConditionSetter< Meshes::Grid< 3, MeshReal, Device, M
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
 //       for cyklus i = 0 to mesh.getDimensions().x() j pro .y() a k pro .z()
 //       typedef typename MeshType::Cell CellType
@@ -768,7 +768,7 @@ class RiemannProblemInitialCondition
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
       
       RiemannProblemInitialCondition()
@@ -1342,7 +1342,7 @@ class RiemannProblemInitialCondition
          typedef Functions::Analytic::VectorNorm< Dimensions, RealType > VectorNormType;
          typedef Operators::Analytic::Sign< Dimensions, RealType > SignType;
          typedef Functions::OperatorFunction< SignType, VectorNormType > InitialConditionType;
-         typedef SharedPointer< InitialConditionType, DeviceType > InitialConditionPointer;
+         typedef Pointers::SharedPointer< InitialConditionType, DeviceType > InitialConditionPointer;
          
          InitialConditionPointer initialCondition;
          initialCondition->getFunction().setCenter( center );
diff --git a/examples/flow-sw/Upwind.h b/src/Examples/flow-sw/Upwind.h
similarity index 89%
rename from examples/flow-sw/Upwind.h
rename to src/Examples/flow-sw/Upwind.h
index cf337144b1b1f2b7e163c56c6e632cb28f495f47..5aee91b62520fa9afd1c3427d330b6dab0e4ea98 100644
--- a/examples/flow-sw/Upwind.h
+++ b/src/Examples/flow-sw/Upwind.h
@@ -43,15 +43,15 @@ class Upwind
       typedef UpwindMomentumZ< Mesh, Real, Index > MomentumZOperatorType;
       typedef UpwindEnergy< Mesh, Real, Index > EnergyOperatorType;
 
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VectorFieldType > VectorFieldPointer;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VectorFieldType > VectorFieldPointer;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;
       
-      typedef SharedPointer< ContinuityOperatorType > ContinuityOperatorPointer;
-      typedef SharedPointer< MomentumXOperatorType > MomentumXOperatorPointer;
-      typedef SharedPointer< MomentumYOperatorType > MomentumYOperatorPointer;      
-      typedef SharedPointer< MomentumZOperatorType > MomentumZOperatorPointer;      
-      typedef SharedPointer< EnergyOperatorType > EnergyOperatorPointer;
+      typedef Pointers::SharedPointer< ContinuityOperatorType > ContinuityOperatorPointer;
+      typedef Pointers::SharedPointer< MomentumXOperatorType > MomentumXOperatorPointer;
+      typedef Pointers::SharedPointer< MomentumYOperatorType > MomentumYOperatorPointer;      
+      typedef Pointers::SharedPointer< MomentumZOperatorType > MomentumZOperatorPointer;      
+      typedef Pointers::SharedPointer< EnergyOperatorType > EnergyOperatorPointer;
 
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
diff --git a/examples/flow-sw/UpwindContinuity.h b/src/Examples/flow-sw/UpwindContinuity.h
similarity index 98%
rename from examples/flow-sw/UpwindContinuity.h
rename to src/Examples/flow-sw/UpwindContinuity.h
index 9f019e23b1fd9dce1110efcd0e7b37dbc7c6ef76..fc599d3d9773c39752b72abcee6626150ea70c7a 100644
--- a/examples/flow-sw/UpwindContinuity.h
+++ b/src/Examples/flow-sw/UpwindContinuity.h
@@ -14,7 +14,7 @@
 #include <TNL/Containers/Vector.h>
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Functions/VectorField.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 
 namespace TNL {
 
@@ -34,8 +34,8 @@ class UpwindContinuityBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
 
       static String getType()
       {
diff --git a/examples/flow-sw/UpwindEnergy.h b/src/Examples/flow-sw/UpwindEnergy.h
similarity index 99%
rename from examples/flow-sw/UpwindEnergy.h
rename to src/Examples/flow-sw/UpwindEnergy.h
index 06cad50003c687f67113dc64d80878084aac4255..6c7e94ec87ce2e3fbd96e6affaeb91b1242d9246 100644
--- a/examples/flow-sw/UpwindEnergy.h
+++ b/src/Examples/flow-sw/UpwindEnergy.h
@@ -30,8 +30,8 @@ class UpwindEnergyBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
       UpwindEnergyBase()
        : artificialViscosity( 1.0 ){};
diff --git a/examples/flow-sw/UpwindMomentumBase.h b/src/Examples/flow-sw/UpwindMomentumBase.h
similarity index 97%
rename from examples/flow-sw/UpwindMomentumBase.h
rename to src/Examples/flow-sw/UpwindMomentumBase.h
index a00bd21703209cfe820b4739fa47b62093efbf1c..8761467afc0143dace983dc730d0d8f41c069d6e 100644
--- a/examples/flow-sw/UpwindMomentumBase.h
+++ b/src/Examples/flow-sw/UpwindMomentumBase.h
@@ -28,8 +28,8 @@ class UpwindMomentumBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
 
       void setTau(const Real& tau)
diff --git a/examples/flow-sw/UpwindMomentumX.h b/src/Examples/flow-sw/UpwindMomentumX.h
similarity index 100%
rename from examples/flow-sw/UpwindMomentumX.h
rename to src/Examples/flow-sw/UpwindMomentumX.h
diff --git a/examples/flow-sw/UpwindMomentumY.h b/src/Examples/flow-sw/UpwindMomentumY.h
similarity index 100%
rename from examples/flow-sw/UpwindMomentumY.h
rename to src/Examples/flow-sw/UpwindMomentumY.h
diff --git a/examples/flow-sw/UpwindMomentumZ.h b/src/Examples/flow-sw/UpwindMomentumZ.h
similarity index 100%
rename from examples/flow-sw/UpwindMomentumZ.h
rename to src/Examples/flow-sw/UpwindMomentumZ.h
diff --git a/examples/flow-sw/navierStokes.cpp b/src/Examples/flow-sw/navierStokes.cpp
similarity index 100%
rename from examples/flow-sw/navierStokes.cpp
rename to src/Examples/flow-sw/navierStokes.cpp
diff --git a/examples/flow-sw/navierStokes.cu b/src/Examples/flow-sw/navierStokes.cu
similarity index 100%
rename from examples/flow-sw/navierStokes.cu
rename to src/Examples/flow-sw/navierStokes.cu
diff --git a/examples/flow-sw/navierStokes.h b/src/Examples/flow-sw/navierStokes.h
similarity index 100%
rename from examples/flow-sw/navierStokes.h
rename to src/Examples/flow-sw/navierStokes.h
diff --git a/examples/flow-sw/navierStokesBuildConfigTag.h b/src/Examples/flow-sw/navierStokesBuildConfigTag.h
similarity index 100%
rename from examples/flow-sw/navierStokesBuildConfigTag.h
rename to src/Examples/flow-sw/navierStokesBuildConfigTag.h
diff --git a/examples/flow-vl/navierStokesProblem.h b/src/Examples/flow-sw/navierStokesProblem.h
similarity index 84%
rename from examples/flow-vl/navierStokesProblem.h
rename to src/Examples/flow-sw/navierStokesProblem.h
index 1d221661ec36c925f346792f58da5a2f9aaa8eff..faae6513c6aa4ba32c79f4516dc70b3e51b8ddb0 100644
--- a/examples/flow-vl/navierStokesProblem.h
+++ b/src/Examples/flow-sw/navierStokesProblem.h
@@ -42,20 +42,19 @@ class navierStokesProblem:
       using typename BaseType::MeshPointer;
       using typename BaseType::DofVectorType;
       using typename BaseType::DofVectorPointer;
-
       static const int Dimensions = Mesh::getMeshDimension();      
 
       typedef Functions::MeshFunction< Mesh > MeshFunctionType;
       typedef CompressibleConservativeVariables< MeshType > ConservativeVariablesType;
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
-      typedef SharedPointer< ConservativeVariablesType > ConservativeVariablesPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
-      typedef SharedPointer< InviscidOperators > InviscidOperatorsPointer;
-      typedef SharedPointer< BoundaryCondition > BoundaryConditionPointer;
-      typedef SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< ConservativeVariablesType > ConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< InviscidOperators > InviscidOperatorsPointer;
+      typedef Pointers::SharedPointer< BoundaryCondition > BoundaryConditionPointer;
+      typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
       using CommunicatorType = Communicator;
-
+      
       static String getTypeStatic();
 
       String getPrologHeader() const;
@@ -84,6 +83,9 @@ class navierStokesProblem:
                               const RealType& tau,
                               DofVectorPointer& _u,
                               DofVectorPointer& _fu );
+      
+      void applyBoundaryConditions( const RealType& time,
+                                    DofVectorPointer& dofs ) { TNL_ASSERT( false, "TODO:Implement")};      
 
       template< typename Matrix >
       void assemblyLinearSystem( const RealType& time,
diff --git a/examples/flow-sw/navierStokesProblem_impl.h b/src/Examples/flow-sw/navierStokesProblem_impl.h
similarity index 100%
rename from examples/flow-sw/navierStokesProblem_impl.h
rename to src/Examples/flow-sw/navierStokesProblem_impl.h
diff --git a/examples/flow-sw/navierStokesRhs.h b/src/Examples/flow-sw/navierStokesRhs.h
similarity index 100%
rename from examples/flow-sw/navierStokesRhs.h
rename to src/Examples/flow-sw/navierStokesRhs.h
diff --git a/examples/flow-sw/run-navier-stokes-sw b/src/Examples/flow-sw/run-navier-stokes-sw
similarity index 100%
rename from examples/flow-sw/run-navier-stokes-sw
rename to src/Examples/flow-sw/run-navier-stokes-sw
diff --git a/examples/flow-sw/BoundaryConditionsBoiler.h b/src/Examples/flow-vl/BoundaryConditionsBoiler.h
similarity index 87%
rename from examples/flow-sw/BoundaryConditionsBoiler.h
rename to src/Examples/flow-vl/BoundaryConditionsBoiler.h
index 0cba68d7fa1a8689df50b0ea9016b511ad126918..f61f988da66f3f7476827c8f8853f4d6dbf67217 100644
--- a/examples/flow-sw/BoundaryConditionsBoiler.h
+++ b/src/Examples/flow-vl/BoundaryConditionsBoiler.h
@@ -29,14 +29,14 @@ class BoundaryConditionsBoiler
       typedef TNL::Operators::EnergyBoundaryConditionsBoiler< MeshType, FunctionType, RealType, IndexType > EnergyBoundaryConditionsType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
 
-      typedef SharedPointer< DensityBoundaryConditionsType > DensityBoundaryConditionsTypePointer;
-      typedef SharedPointer< MomentumXBoundaryConditionsType > MomentumXBoundaryConditionsTypePointer;
-      typedef SharedPointer< MomentumYBoundaryConditionsType > MomentumYBoundaryConditionsTypePointer;
-      typedef SharedPointer< MomentumZBoundaryConditionsType > MomentumZBoundaryConditionsTypePointer;
-      typedef SharedPointer< EnergyBoundaryConditionsType > EnergyBoundaryConditionsTypePointer;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshType > MeshPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< DensityBoundaryConditionsType > DensityBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< MomentumXBoundaryConditionsType > MomentumXBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< MomentumYBoundaryConditionsType > MomentumYBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< MomentumZBoundaryConditionsType > MomentumZBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< EnergyBoundaryConditionsType > EnergyBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
diff --git a/examples/flow-sw/BoundaryConditionsCavity.h b/src/Examples/flow-vl/BoundaryConditionsCavity.h
similarity index 87%
rename from examples/flow-sw/BoundaryConditionsCavity.h
rename to src/Examples/flow-vl/BoundaryConditionsCavity.h
index 8a42faea17fa4cbdfa40a0b27533bca567d79206..bbae2d3e93c65383db11a53a3b16d734fc3131e2 100644
--- a/examples/flow-sw/BoundaryConditionsCavity.h
+++ b/src/Examples/flow-vl/BoundaryConditionsCavity.h
@@ -29,14 +29,14 @@ class BoundaryConditionsCavity
       typedef TNL::Operators::EnergyBoundaryConditionsCavity< MeshType, FunctionType, RealType, IndexType > EnergyBoundaryConditionsType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
 
-      typedef SharedPointer< DensityBoundaryConditionsType > DensityBoundaryConditionsTypePointer;
-      typedef SharedPointer< MomentumXBoundaryConditionsType > MomentumXBoundaryConditionsTypePointer;
-      typedef SharedPointer< MomentumYBoundaryConditionsType > MomentumYBoundaryConditionsTypePointer;
-      typedef SharedPointer< MomentumZBoundaryConditionsType > MomentumZBoundaryConditionsTypePointer;
-      typedef SharedPointer< EnergyBoundaryConditionsType > EnergyBoundaryConditionsTypePointer;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshType > MeshPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< DensityBoundaryConditionsType > DensityBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< MomentumXBoundaryConditionsType > MomentumXBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< MomentumYBoundaryConditionsType > MomentumYBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< MomentumZBoundaryConditionsType > MomentumZBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< EnergyBoundaryConditionsType > EnergyBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
diff --git a/examples/flow-vl/CMakeLists.txt b/src/Examples/flow-vl/CMakeLists.txt
similarity index 100%
rename from examples/flow-vl/CMakeLists.txt
rename to src/Examples/flow-vl/CMakeLists.txt
diff --git a/examples/flow-vl/CompressibleConservativeVariables.h b/src/Examples/flow-vl/CompressibleConservativeVariables.h
similarity index 93%
rename from examples/flow-vl/CompressibleConservativeVariables.h
rename to src/Examples/flow-vl/CompressibleConservativeVariables.h
index a3afc845366f8df17b41c5affc5a4e49d5da052a..01e820686e98781a3267c4526e8e7c6449218415 100644
--- a/examples/flow-vl/CompressibleConservativeVariables.h
+++ b/src/Examples/flow-vl/CompressibleConservativeVariables.h
@@ -13,7 +13,7 @@
 
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Functions/VectorField.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 
 namespace TNL {
 
@@ -28,9 +28,9 @@ class CompressibleConservativeVariables
       typedef typename MeshType::IndexType IndexType;
       typedef Functions::MeshFunction< Mesh > MeshFunctionType;
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshType > MeshPointer;      
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > MomentumFieldPointer;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;      
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > MomentumFieldPointer;
       
       CompressibleConservativeVariables(){};
       
@@ -144,4 +144,4 @@ class CompressibleConservativeVariables
       
 };
 
-} // namespace TN
\ No newline at end of file
+} // namespace TN
diff --git a/examples/flow/DensityBoundaryConditionBoiler.h b/src/Examples/flow-vl/DensityBoundaryConditionBoiler.h
similarity index 97%
rename from examples/flow/DensityBoundaryConditionBoiler.h
rename to src/Examples/flow-vl/DensityBoundaryConditionBoiler.h
index c3bae7e3d961ab4a6f6dddb287cc1a23184f1c87..6231f6780e24e3090c83832c9d47534a9c6104a8 100644
--- a/examples/flow/DensityBoundaryConditionBoiler.h
+++ b/src/Examples/flow-vl/DensityBoundaryConditionBoiler.h
@@ -114,8 +114,8 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshIn
    typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
    template< typename EntityType,
@@ -237,8 +237,8 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshIn
       typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -387,8 +387,8 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshIn
       typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;   
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow-vl/DensityBoundaryConditionCavity.h b/src/Examples/flow-vl/DensityBoundaryConditionCavity.h
similarity index 97%
rename from examples/flow-vl/DensityBoundaryConditionCavity.h
rename to src/Examples/flow-vl/DensityBoundaryConditionCavity.h
index a2d34ce540b8f8806b0ae0bf7e31d357c01ed647..18eaff1101eccc7733eb5978b48807be82bba916 100644
--- a/examples/flow-vl/DensityBoundaryConditionCavity.h
+++ b/src/Examples/flow-vl/DensityBoundaryConditionCavity.h
@@ -114,8 +114,8 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshIn
    typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef DensityBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
    template< typename EntityType,
@@ -237,8 +237,8 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshIn
       typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -384,8 +384,8 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshIn
       typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsCavityBase< Function > BaseType;   
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow/EnergyBoundaryConditionBoiler.h b/src/Examples/flow-vl/EnergyBoundaryConditionBoiler.h
similarity index 98%
rename from examples/flow/EnergyBoundaryConditionBoiler.h
rename to src/Examples/flow-vl/EnergyBoundaryConditionBoiler.h
index fe227d68f81a0df6b980d429cfb23472c0f97bc2..a99fdf0157bfcbca614374e8472ab9fe8a3b4f58 100644
--- a/examples/flow/EnergyBoundaryConditionBoiler.h
+++ b/src/Examples/flow-vl/EnergyBoundaryConditionBoiler.h
@@ -116,8 +116,8 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshInd
    typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -255,8 +255,8 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshInd
       typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -535,8 +535,8 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshInd
       typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow-sw/EnergyBoundaryConditionCavity.h b/src/Examples/flow-vl/EnergyBoundaryConditionCavity.h
similarity index 97%
rename from examples/flow-sw/EnergyBoundaryConditionCavity.h
rename to src/Examples/flow-vl/EnergyBoundaryConditionCavity.h
index ca3fbe01dedcfdb7bc0a40777ff93e264fcfdec0..3b49cd56e5ab6901716c86115561c26fbbbff973 100644
--- a/examples/flow-sw/EnergyBoundaryConditionCavity.h
+++ b/src/Examples/flow-vl/EnergyBoundaryConditionCavity.h
@@ -116,8 +116,8 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshInd
    typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -255,8 +255,8 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshInd
       typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -479,8 +479,8 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshInd
       typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/inviscid-flow-vl/LaxFridrichs.h b/src/Examples/flow-vl/LaxFridrichs.h
similarity index 88%
rename from examples/inviscid-flow-vl/LaxFridrichs.h
rename to src/Examples/flow-vl/LaxFridrichs.h
index cdf32899f69eb797a6d9a18a52b84c09709867bf..c56d20aed9947bd16796a5bf7ceb22adb237bf0c 100644
--- a/examples/inviscid-flow-vl/LaxFridrichs.h
+++ b/src/Examples/flow-vl/LaxFridrichs.h
@@ -43,15 +43,15 @@ class LaxFridrichs
       typedef LaxFridrichsMomentumZ< Mesh, Real, Index > MomentumZOperatorType;
       typedef LaxFridrichsEnergy< Mesh, Real, Index > EnergyOperatorType;
 
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VectorFieldType > VectorFieldPointer;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VectorFieldType > VectorFieldPointer;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;
       
-      typedef SharedPointer< ContinuityOperatorType > ContinuityOperatorPointer;
-      typedef SharedPointer< MomentumXOperatorType > MomentumXOperatorPointer;
-      typedef SharedPointer< MomentumYOperatorType > MomentumYOperatorPointer;      
-      typedef SharedPointer< MomentumZOperatorType > MomentumZOperatorPointer;      
-      typedef SharedPointer< EnergyOperatorType > EnergyOperatorPointer;
+      typedef Pointers::SharedPointer< ContinuityOperatorType > ContinuityOperatorPointer;
+      typedef Pointers::SharedPointer< MomentumXOperatorType > MomentumXOperatorPointer;
+      typedef Pointers::SharedPointer< MomentumYOperatorType > MomentumYOperatorPointer;      
+      typedef Pointers::SharedPointer< MomentumZOperatorType > MomentumZOperatorPointer;      
+      typedef Pointers::SharedPointer< EnergyOperatorType > EnergyOperatorPointer;
 
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
diff --git a/examples/flow-vl/LaxFridrichsContinuity.h b/src/Examples/flow-vl/LaxFridrichsContinuity.h
similarity index 99%
rename from examples/flow-vl/LaxFridrichsContinuity.h
rename to src/Examples/flow-vl/LaxFridrichsContinuity.h
index 45ad4d52b12d402365a40cac043d5525e230cecb..82747cd18220efc01bc2d68e0247c01723c29fd0 100644
--- a/examples/flow-vl/LaxFridrichsContinuity.h
+++ b/src/Examples/flow-vl/LaxFridrichsContinuity.h
@@ -34,7 +34,7 @@ class LaxFridrichsContinuityBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
 
       LaxFridrichsContinuityBase()
        : artificialViscosity( 1.0 ){};
diff --git a/examples/inviscid-flow-sw/LaxFridrichsEnergy.h b/src/Examples/flow-vl/LaxFridrichsEnergy.h
similarity index 98%
rename from examples/inviscid-flow-sw/LaxFridrichsEnergy.h
rename to src/Examples/flow-vl/LaxFridrichsEnergy.h
index 18c824762b8c677253dbd4e494be7ad3aea7e769..03019ed23c85f82ee489c95d8173c0f100cff3c8 100644
--- a/examples/inviscid-flow-sw/LaxFridrichsEnergy.h
+++ b/src/Examples/flow-vl/LaxFridrichsEnergy.h
@@ -30,8 +30,8 @@ class LaxFridrichsEnergyBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
       LaxFridrichsEnergyBase()
        : artificialViscosity( 1.0 ){};
diff --git a/examples/flow-vl/LaxFridrichsMomentumBase.h b/src/Examples/flow-vl/LaxFridrichsMomentumBase.h
similarity index 92%
rename from examples/flow-vl/LaxFridrichsMomentumBase.h
rename to src/Examples/flow-vl/LaxFridrichsMomentumBase.h
index 67dae9fdf8256cecf032a731dd5d616d715ca0fe..cc2561748968a2bd808fa434d4f3d87d41765f45 100644
--- a/examples/flow-vl/LaxFridrichsMomentumBase.h
+++ b/src/Examples/flow-vl/LaxFridrichsMomentumBase.h
@@ -28,8 +28,8 @@ class LaxFridrichsMomentumBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
       LaxFridrichsMomentumBase()
        : artificialViscosity( 1.0 ){};
diff --git a/examples/flow-vl/LaxFridrichsMomentumX.h b/src/Examples/flow-vl/LaxFridrichsMomentumX.h
similarity index 100%
rename from examples/flow-vl/LaxFridrichsMomentumX.h
rename to src/Examples/flow-vl/LaxFridrichsMomentumX.h
diff --git a/examples/flow-vl/LaxFridrichsMomentumY.h b/src/Examples/flow-vl/LaxFridrichsMomentumY.h
similarity index 100%
rename from examples/flow-vl/LaxFridrichsMomentumY.h
rename to src/Examples/flow-vl/LaxFridrichsMomentumY.h
diff --git a/examples/flow-vl/LaxFridrichsMomentumZ.h b/src/Examples/flow-vl/LaxFridrichsMomentumZ.h
similarity index 100%
rename from examples/flow-vl/LaxFridrichsMomentumZ.h
rename to src/Examples/flow-vl/LaxFridrichsMomentumZ.h
diff --git a/examples/flow/MomentumXBoundaryConditionBoiler.h b/src/Examples/flow-vl/MomentumXBoundaryConditionBoiler.h
similarity index 97%
rename from examples/flow/MomentumXBoundaryConditionBoiler.h
rename to src/Examples/flow-vl/MomentumXBoundaryConditionBoiler.h
index 823ec475a570074f2f147a5b899606f203b76354..dfe63e07623a6fdb6ca7ebb7da8ab445d9505372 100644
--- a/examples/flow/MomentumXBoundaryConditionBoiler.h
+++ b/src/Examples/flow-vl/MomentumXBoundaryConditionBoiler.h
@@ -114,8 +114,8 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -245,8 +245,8 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -421,8 +421,8 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow/MomentumXBoundaryConditionCavity.h b/src/Examples/flow-vl/MomentumXBoundaryConditionCavity.h
similarity index 97%
rename from examples/flow/MomentumXBoundaryConditionCavity.h
rename to src/Examples/flow-vl/MomentumXBoundaryConditionCavity.h
index b78731382731f3aea95b64257c7330af15d78a99..07abfdbeb940039555ac2799d0ef374ca26faff0 100644
--- a/examples/flow/MomentumXBoundaryConditionCavity.h
+++ b/src/Examples/flow-vl/MomentumXBoundaryConditionCavity.h
@@ -114,8 +114,8 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -245,8 +245,8 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -410,8 +410,8 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow-sw/MomentumYBoundaryConditionBoiler.h b/src/Examples/flow-vl/MomentumYBoundaryConditionBoiler.h
similarity index 97%
rename from examples/flow-sw/MomentumYBoundaryConditionBoiler.h
rename to src/Examples/flow-vl/MomentumYBoundaryConditionBoiler.h
index 76f3ff05735d1e813ffef7105509986a14036de8..83b6282ddd50033f32a382f0b48f5abe7347ccaa 100644
--- a/examples/flow-sw/MomentumYBoundaryConditionBoiler.h
+++ b/src/Examples/flow-vl/MomentumYBoundaryConditionBoiler.h
@@ -114,8 +114,8 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -245,8 +245,8 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -416,8 +416,8 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow/MomentumYBoundaryConditionCavity.h b/src/Examples/flow-vl/MomentumYBoundaryConditionCavity.h
similarity index 97%
rename from examples/flow/MomentumYBoundaryConditionCavity.h
rename to src/Examples/flow-vl/MomentumYBoundaryConditionCavity.h
index afce8239b71ef68697a395177ae28c0be21dc788..a83dd653f92328814b8d0746bc45c8775552a310 100644
--- a/examples/flow/MomentumYBoundaryConditionCavity.h
+++ b/src/Examples/flow-vl/MomentumYBoundaryConditionCavity.h
@@ -114,8 +114,8 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -245,8 +245,8 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -407,8 +407,8 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow-sw/MomentumZBoundaryConditionBoiler.h b/src/Examples/flow-vl/MomentumZBoundaryConditionBoiler.h
similarity index 97%
rename from examples/flow-sw/MomentumZBoundaryConditionBoiler.h
rename to src/Examples/flow-vl/MomentumZBoundaryConditionBoiler.h
index 188aaa9851aaa93eeadd0de85d26554d562b66df..9d887857ce97f916dcfaccd0208138afc200afd1 100644
--- a/examples/flow-sw/MomentumZBoundaryConditionBoiler.h
+++ b/src/Examples/flow-vl/MomentumZBoundaryConditionBoiler.h
@@ -114,8 +114,8 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -245,8 +245,8 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -397,8 +397,8 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow-sw/MomentumZBoundaryConditionCavity.h b/src/Examples/flow-vl/MomentumZBoundaryConditionCavity.h
similarity index 97%
rename from examples/flow-sw/MomentumZBoundaryConditionCavity.h
rename to src/Examples/flow-vl/MomentumZBoundaryConditionCavity.h
index 1942cd58935395f340ea99ab0be0b74f1aee0c69..5fe6f22e5945513c9e9e86d835256ef84e27c054 100644
--- a/examples/flow-sw/MomentumZBoundaryConditionCavity.h
+++ b/src/Examples/flow-vl/MomentumZBoundaryConditionCavity.h
@@ -114,8 +114,8 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -245,8 +245,8 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -397,8 +397,8 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow-sw/PhysicalVariablesGetter.h b/src/Examples/flow-vl/PhysicalVariablesGetter.h
similarity index 90%
rename from examples/flow-sw/PhysicalVariablesGetter.h
rename to src/Examples/flow-vl/PhysicalVariablesGetter.h
index f1ba6bd1222b8653faeaac041606c101a071e188..2af0a02b820db183fa32669dcecfe63f7f006990 100644
--- a/examples/flow-sw/PhysicalVariablesGetter.h
+++ b/src/Examples/flow-vl/PhysicalVariablesGetter.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Functions/VectorField.h>
 #include <TNL/Functions/MeshFunctionEvaluator.h>
@@ -30,11 +30,11 @@ class PhysicalVariablesGetter
       static const int Dimensions = MeshType::getMeshDimension();
       
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef CompressibleConservativeVariables< MeshType > ConservativeVariablesType;
-      typedef SharedPointer< ConservativeVariablesType > ConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< ConservativeVariablesType > ConservativeVariablesPointer;
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
       class VelocityGetter : public Functions::Domain< Dimensions, Functions::MeshDomain >
       {
@@ -99,7 +99,7 @@ class PhysicalVariablesGetter
          Functions::MeshFunctionEvaluator< MeshFunctionType, VelocityGetter > evaluator;
          for( int i = 0; i < Dimensions; i++ )
          {
-            SharedPointer< VelocityGetter, DeviceType > velocityGetter( conservativeVariables->getDensity(),
+            Pointers::SharedPointer< VelocityGetter, DeviceType > velocityGetter( conservativeVariables->getDensity(),
                                                                         ( *conservativeVariables->getMomentum() )[ i ] );
             evaluator.evaluate( ( *velocity )[ i ], velocityGetter );
          }
@@ -110,7 +110,7 @@ class PhysicalVariablesGetter
                         MeshFunctionPointer& pressure )
       {
          Functions::MeshFunctionEvaluator< MeshFunctionType, PressureGetter > evaluator;
-         SharedPointer< PressureGetter, DeviceType > pressureGetter( conservativeVariables->getDensity(),
+         Pointers::SharedPointer< PressureGetter, DeviceType > pressureGetter( conservativeVariables->getDensity(),
                                                                      conservativeVariables->getEnergy(),
                                                                      conservativeVariables->getMomentum(),
                                                                      gamma );
diff --git a/examples/flow-vl/RiemannProblemInitialCondition.h b/src/Examples/flow-vl/RiemannProblemInitialCondition.h
similarity index 99%
rename from examples/flow-vl/RiemannProblemInitialCondition.h
rename to src/Examples/flow-vl/RiemannProblemInitialCondition.h
index 640e4b6d1e59bca19fd09e0f2374bd88f5b82cd5..dfde32625973ef72161ebed80d633a089666049f 100644
--- a/examples/flow-vl/RiemannProblemInitialCondition.h
+++ b/src/Examples/flow-vl/RiemannProblemInitialCondition.h
@@ -38,7 +38,7 @@ class RiemannProblemInitialConditionSetter< Meshes::Grid< 1,MeshReal, Device, Me
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
 //       for cyklus i = 0 to mesh.getDimensions().x() j pro .y() a k pro .z()
 //       typedef typename MeshType::Cell CellType
@@ -195,7 +195,7 @@ class RiemannProblemInitialConditionSetter< Meshes::Grid< 2, MeshReal, Device, M
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
 //       for cyklus i = 0 to mesh.getDimensions().x() j pro .y() a k pro .z()
 //       typedef typename MeshType::Cell CellType
@@ -415,7 +415,7 @@ class RiemannProblemInitialConditionSetter< Meshes::Grid< 3, MeshReal, Device, M
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
 //       for cyklus i = 0 to mesh.getDimensions().x() j pro .y() a k pro .z()
 //       typedef typename MeshType::Cell CellType
@@ -768,7 +768,7 @@ class RiemannProblemInitialCondition
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
       
       RiemannProblemInitialCondition()
@@ -1342,7 +1342,7 @@ class RiemannProblemInitialCondition
          typedef Functions::Analytic::VectorNorm< Dimensions, RealType > VectorNormType;
          typedef Operators::Analytic::Sign< Dimensions, RealType > SignType;
          typedef Functions::OperatorFunction< SignType, VectorNormType > InitialConditionType;
-         typedef SharedPointer< InitialConditionType, DeviceType > InitialConditionPointer;
+         typedef Pointers::SharedPointer< InitialConditionType, DeviceType > InitialConditionPointer;
          
          InitialConditionPointer initialCondition;
          initialCondition->getFunction().setCenter( center );
diff --git a/examples/flow-vl/Upwind.h b/src/Examples/flow-vl/Upwind.h
similarity index 89%
rename from examples/flow-vl/Upwind.h
rename to src/Examples/flow-vl/Upwind.h
index cf337144b1b1f2b7e163c56c6e632cb28f495f47..5aee91b62520fa9afd1c3427d330b6dab0e4ea98 100644
--- a/examples/flow-vl/Upwind.h
+++ b/src/Examples/flow-vl/Upwind.h
@@ -43,15 +43,15 @@ class Upwind
       typedef UpwindMomentumZ< Mesh, Real, Index > MomentumZOperatorType;
       typedef UpwindEnergy< Mesh, Real, Index > EnergyOperatorType;
 
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VectorFieldType > VectorFieldPointer;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VectorFieldType > VectorFieldPointer;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;
       
-      typedef SharedPointer< ContinuityOperatorType > ContinuityOperatorPointer;
-      typedef SharedPointer< MomentumXOperatorType > MomentumXOperatorPointer;
-      typedef SharedPointer< MomentumYOperatorType > MomentumYOperatorPointer;      
-      typedef SharedPointer< MomentumZOperatorType > MomentumZOperatorPointer;      
-      typedef SharedPointer< EnergyOperatorType > EnergyOperatorPointer;
+      typedef Pointers::SharedPointer< ContinuityOperatorType > ContinuityOperatorPointer;
+      typedef Pointers::SharedPointer< MomentumXOperatorType > MomentumXOperatorPointer;
+      typedef Pointers::SharedPointer< MomentumYOperatorType > MomentumYOperatorPointer;      
+      typedef Pointers::SharedPointer< MomentumZOperatorType > MomentumZOperatorPointer;      
+      typedef Pointers::SharedPointer< EnergyOperatorType > EnergyOperatorPointer;
 
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
diff --git a/examples/flow-vl/UpwindContinuity.h b/src/Examples/flow-vl/UpwindContinuity.h
similarity index 98%
rename from examples/flow-vl/UpwindContinuity.h
rename to src/Examples/flow-vl/UpwindContinuity.h
index 1281a495976856b99b2b7e0585d103951a66f469..20bae4fbb49fe4d1510f95f0ef4c2404873903f7 100644
--- a/examples/flow-vl/UpwindContinuity.h
+++ b/src/Examples/flow-vl/UpwindContinuity.h
@@ -14,7 +14,7 @@
 #include <TNL/Containers/Vector.h>
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Functions/VectorField.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 
 namespace TNL {
 
@@ -34,8 +34,8 @@ class UpwindContinuityBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
 
       static String getType()
       {
diff --git a/examples/flow-vl/UpwindEnergy.h b/src/Examples/flow-vl/UpwindEnergy.h
similarity index 99%
rename from examples/flow-vl/UpwindEnergy.h
rename to src/Examples/flow-vl/UpwindEnergy.h
index b18fc7090dc9e25c30349b3813387c06fce49624..8fa7a046a24d37c832746d6f17396e39582f7f5e 100644
--- a/examples/flow-vl/UpwindEnergy.h
+++ b/src/Examples/flow-vl/UpwindEnergy.h
@@ -30,8 +30,8 @@ class UpwindEnergyBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
       UpwindEnergyBase()
        : artificialViscosity( 1.0 ){};
diff --git a/examples/flow-vl/UpwindMomentumBase.h b/src/Examples/flow-vl/UpwindMomentumBase.h
similarity index 96%
rename from examples/flow-vl/UpwindMomentumBase.h
rename to src/Examples/flow-vl/UpwindMomentumBase.h
index 41e96b3450b4b8c82c299ce434790b4def83b70d..706008b81880539d011cfcb37acb46ff289bf8f9 100644
--- a/examples/flow-vl/UpwindMomentumBase.h
+++ b/src/Examples/flow-vl/UpwindMomentumBase.h
@@ -28,8 +28,8 @@ class UpwindMomentumBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
 
       void setTau(const Real& tau)
diff --git a/examples/flow-vl/UpwindMomentumX.h b/src/Examples/flow-vl/UpwindMomentumX.h
similarity index 100%
rename from examples/flow-vl/UpwindMomentumX.h
rename to src/Examples/flow-vl/UpwindMomentumX.h
diff --git a/examples/flow-vl/UpwindMomentumY.h b/src/Examples/flow-vl/UpwindMomentumY.h
similarity index 100%
rename from examples/flow-vl/UpwindMomentumY.h
rename to src/Examples/flow-vl/UpwindMomentumY.h
diff --git a/examples/flow-vl/UpwindMomentumZ.h b/src/Examples/flow-vl/UpwindMomentumZ.h
similarity index 100%
rename from examples/flow-vl/UpwindMomentumZ.h
rename to src/Examples/flow-vl/UpwindMomentumZ.h
diff --git a/examples/flow-vl/navierStokes.cpp b/src/Examples/flow-vl/navierStokes.cpp
similarity index 100%
rename from examples/flow-vl/navierStokes.cpp
rename to src/Examples/flow-vl/navierStokes.cpp
diff --git a/examples/flow-vl/navierStokes.cu b/src/Examples/flow-vl/navierStokes.cu
similarity index 100%
rename from examples/flow-vl/navierStokes.cu
rename to src/Examples/flow-vl/navierStokes.cu
diff --git a/examples/flow-vl/navierStokes.h b/src/Examples/flow-vl/navierStokes.h
similarity index 100%
rename from examples/flow-vl/navierStokes.h
rename to src/Examples/flow-vl/navierStokes.h
diff --git a/examples/flow-vl/navierStokesBuildConfigTag.h b/src/Examples/flow-vl/navierStokesBuildConfigTag.h
similarity index 100%
rename from examples/flow-vl/navierStokesBuildConfigTag.h
rename to src/Examples/flow-vl/navierStokesBuildConfigTag.h
diff --git a/examples/flow-sw/navierStokesProblem.h b/src/Examples/flow-vl/navierStokesProblem.h
similarity index 84%
rename from examples/flow-sw/navierStokesProblem.h
rename to src/Examples/flow-vl/navierStokesProblem.h
index 2658adf8d41318950ed6257f5582e5a8ae020b41..84ca7d7b86de812e8167812387c510ea221583bf 100644
--- a/examples/flow-sw/navierStokesProblem.h
+++ b/src/Examples/flow-vl/navierStokesProblem.h
@@ -42,19 +42,20 @@ class navierStokesProblem:
       using typename BaseType::MeshPointer;
       using typename BaseType::DofVectorType;
       using typename BaseType::DofVectorPointer;
+
       static const int Dimensions = Mesh::getMeshDimension();      
 
       typedef Functions::MeshFunction< Mesh > MeshFunctionType;
       typedef CompressibleConservativeVariables< MeshType > ConservativeVariablesType;
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
-      typedef SharedPointer< ConservativeVariablesType > ConservativeVariablesPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
-      typedef SharedPointer< InviscidOperators > InviscidOperatorsPointer;
-      typedef SharedPointer< BoundaryCondition > BoundaryConditionPointer;
-      typedef SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< ConservativeVariablesType > ConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< InviscidOperators > InviscidOperatorsPointer;
+      typedef Pointers::SharedPointer< BoundaryCondition > BoundaryConditionPointer;
+      typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
       using CommunicatorType = Communicator;
-      
+
       static String getTypeStatic();
 
       String getPrologHeader() const;
@@ -83,6 +84,9 @@ class navierStokesProblem:
                               const RealType& tau,
                               DofVectorPointer& _u,
                               DofVectorPointer& _fu );
+      
+      void applyBoundaryConditions( const RealType& time,
+                                    DofVectorPointer& dofs ) { TNL_ASSERT( false, "TODO:Implement")};      
 
       template< typename Matrix >
       void assemblyLinearSystem( const RealType& time,
diff --git a/examples/flow-vl/navierStokesProblem_impl.h b/src/Examples/flow-vl/navierStokesProblem_impl.h
similarity index 100%
rename from examples/flow-vl/navierStokesProblem_impl.h
rename to src/Examples/flow-vl/navierStokesProblem_impl.h
diff --git a/examples/flow-vl/navierStokesRhs.h b/src/Examples/flow-vl/navierStokesRhs.h
similarity index 100%
rename from examples/flow-vl/navierStokesRhs.h
rename to src/Examples/flow-vl/navierStokesRhs.h
diff --git a/examples/flow-vl/run-navier-stokes-vl b/src/Examples/flow-vl/run-navier-stokes-vl
similarity index 100%
rename from examples/flow-vl/run-navier-stokes-vl
rename to src/Examples/flow-vl/run-navier-stokes-vl
diff --git a/examples/flow/BoundaryConditionsBoiler.h b/src/Examples/flow/BoundaryConditionsBoiler.h
similarity index 87%
rename from examples/flow/BoundaryConditionsBoiler.h
rename to src/Examples/flow/BoundaryConditionsBoiler.h
index 0cba68d7fa1a8689df50b0ea9016b511ad126918..f61f988da66f3f7476827c8f8853f4d6dbf67217 100644
--- a/examples/flow/BoundaryConditionsBoiler.h
+++ b/src/Examples/flow/BoundaryConditionsBoiler.h
@@ -29,14 +29,14 @@ class BoundaryConditionsBoiler
       typedef TNL::Operators::EnergyBoundaryConditionsBoiler< MeshType, FunctionType, RealType, IndexType > EnergyBoundaryConditionsType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
 
-      typedef SharedPointer< DensityBoundaryConditionsType > DensityBoundaryConditionsTypePointer;
-      typedef SharedPointer< MomentumXBoundaryConditionsType > MomentumXBoundaryConditionsTypePointer;
-      typedef SharedPointer< MomentumYBoundaryConditionsType > MomentumYBoundaryConditionsTypePointer;
-      typedef SharedPointer< MomentumZBoundaryConditionsType > MomentumZBoundaryConditionsTypePointer;
-      typedef SharedPointer< EnergyBoundaryConditionsType > EnergyBoundaryConditionsTypePointer;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshType > MeshPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< DensityBoundaryConditionsType > DensityBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< MomentumXBoundaryConditionsType > MomentumXBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< MomentumYBoundaryConditionsType > MomentumYBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< MomentumZBoundaryConditionsType > MomentumZBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< EnergyBoundaryConditionsType > EnergyBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
diff --git a/examples/flow-vl/BoundaryConditionsCavity.h b/src/Examples/flow/BoundaryConditionsCavity.h
similarity index 87%
rename from examples/flow-vl/BoundaryConditionsCavity.h
rename to src/Examples/flow/BoundaryConditionsCavity.h
index 8a42faea17fa4cbdfa40a0b27533bca567d79206..bbae2d3e93c65383db11a53a3b16d734fc3131e2 100644
--- a/examples/flow-vl/BoundaryConditionsCavity.h
+++ b/src/Examples/flow/BoundaryConditionsCavity.h
@@ -29,14 +29,14 @@ class BoundaryConditionsCavity
       typedef TNL::Operators::EnergyBoundaryConditionsCavity< MeshType, FunctionType, RealType, IndexType > EnergyBoundaryConditionsType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
 
-      typedef SharedPointer< DensityBoundaryConditionsType > DensityBoundaryConditionsTypePointer;
-      typedef SharedPointer< MomentumXBoundaryConditionsType > MomentumXBoundaryConditionsTypePointer;
-      typedef SharedPointer< MomentumYBoundaryConditionsType > MomentumYBoundaryConditionsTypePointer;
-      typedef SharedPointer< MomentumZBoundaryConditionsType > MomentumZBoundaryConditionsTypePointer;
-      typedef SharedPointer< EnergyBoundaryConditionsType > EnergyBoundaryConditionsTypePointer;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshType > MeshPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< DensityBoundaryConditionsType > DensityBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< MomentumXBoundaryConditionsType > MomentumXBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< MomentumYBoundaryConditionsType > MomentumYBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< MomentumZBoundaryConditionsType > MomentumZBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< EnergyBoundaryConditionsType > EnergyBoundaryConditionsTypePointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
diff --git a/examples/flow/CMakeLists.txt b/src/Examples/flow/CMakeLists.txt
similarity index 100%
rename from examples/flow/CMakeLists.txt
rename to src/Examples/flow/CMakeLists.txt
diff --git a/examples/flow-sw/CompressibleConservativeVariables.h b/src/Examples/flow/CompressibleConservativeVariables.h
similarity index 93%
rename from examples/flow-sw/CompressibleConservativeVariables.h
rename to src/Examples/flow/CompressibleConservativeVariables.h
index a3afc845366f8df17b41c5affc5a4e49d5da052a..01e820686e98781a3267c4526e8e7c6449218415 100644
--- a/examples/flow-sw/CompressibleConservativeVariables.h
+++ b/src/Examples/flow/CompressibleConservativeVariables.h
@@ -13,7 +13,7 @@
 
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Functions/VectorField.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 
 namespace TNL {
 
@@ -28,9 +28,9 @@ class CompressibleConservativeVariables
       typedef typename MeshType::IndexType IndexType;
       typedef Functions::MeshFunction< Mesh > MeshFunctionType;
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshType > MeshPointer;      
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > MomentumFieldPointer;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;      
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > MomentumFieldPointer;
       
       CompressibleConservativeVariables(){};
       
@@ -144,4 +144,4 @@ class CompressibleConservativeVariables
       
 };
 
-} // namespace TN
\ No newline at end of file
+} // namespace TN
diff --git a/examples/flow-vl/DensityBoundaryConditionBoiler.h b/src/Examples/flow/DensityBoundaryConditionBoiler.h
similarity index 97%
rename from examples/flow-vl/DensityBoundaryConditionBoiler.h
rename to src/Examples/flow/DensityBoundaryConditionBoiler.h
index c3bae7e3d961ab4a6f6dddb287cc1a23184f1c87..6231f6780e24e3090c83832c9d47534a9c6104a8 100644
--- a/examples/flow-vl/DensityBoundaryConditionBoiler.h
+++ b/src/Examples/flow/DensityBoundaryConditionBoiler.h
@@ -114,8 +114,8 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshIn
    typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
    template< typename EntityType,
@@ -237,8 +237,8 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshIn
       typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -387,8 +387,8 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshIn
       typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsBoilerBase< Function > BaseType;   
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow/DensityBoundaryConditionCavity.h b/src/Examples/flow/DensityBoundaryConditionCavity.h
similarity index 97%
rename from examples/flow/DensityBoundaryConditionCavity.h
rename to src/Examples/flow/DensityBoundaryConditionCavity.h
index 06515a88c8ebf98ac9377fffe1e84e8ccce325b3..c753d324a288a331dbfb9d73e341fe962f39889e 100644
--- a/examples/flow/DensityBoundaryConditionCavity.h
+++ b/src/Examples/flow/DensityBoundaryConditionCavity.h
@@ -114,8 +114,8 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshIn
    typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef DensityBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
    template< typename EntityType,
@@ -237,8 +237,8 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshIn
       typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -384,8 +384,8 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshIn
       typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef DensityBoundaryConditionsCavityBase< Function > BaseType;   
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow-sw/EnergyBoundaryConditionBoiler.h b/src/Examples/flow/EnergyBoundaryConditionBoiler.h
similarity index 98%
rename from examples/flow-sw/EnergyBoundaryConditionBoiler.h
rename to src/Examples/flow/EnergyBoundaryConditionBoiler.h
index fe227d68f81a0df6b980d429cfb23472c0f97bc2..a99fdf0157bfcbca614374e8472ab9fe8a3b4f58 100644
--- a/examples/flow-sw/EnergyBoundaryConditionBoiler.h
+++ b/src/Examples/flow/EnergyBoundaryConditionBoiler.h
@@ -116,8 +116,8 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshInd
    typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -255,8 +255,8 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshInd
       typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -535,8 +535,8 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshInd
       typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow/EnergyBoundaryConditionCavity.h b/src/Examples/flow/EnergyBoundaryConditionCavity.h
similarity index 97%
rename from examples/flow/EnergyBoundaryConditionCavity.h
rename to src/Examples/flow/EnergyBoundaryConditionCavity.h
index 74ea75a70f76ee7b19b97516d609cd5ea98f3a4a..60e55f4240ed1fcb8bc63e494c01faf61f899568 100644
--- a/examples/flow/EnergyBoundaryConditionCavity.h
+++ b/src/Examples/flow/EnergyBoundaryConditionCavity.h
@@ -116,8 +116,8 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshInd
    typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -255,8 +255,8 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshInd
       typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -479,8 +479,8 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshInd
       typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef EnergyBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow/LaxFridrichs.h b/src/Examples/flow/LaxFridrichs.h
similarity index 89%
rename from examples/flow/LaxFridrichs.h
rename to src/Examples/flow/LaxFridrichs.h
index 34fbda09a259a7cbab5a6813737b63466ac23df0..1949bfe974fb3a80c7369abfc208ad89edeb7d47 100644
--- a/examples/flow/LaxFridrichs.h
+++ b/src/Examples/flow/LaxFridrichs.h
@@ -43,15 +43,15 @@ class LaxFridrichs
       typedef LaxFridrichsMomentumZ< Mesh, Real, Index > MomentumZOperatorType;
       typedef LaxFridrichsEnergy< Mesh, Real, Index > EnergyOperatorType;
 
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VectorFieldType > VectorFieldPointer;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VectorFieldType > VectorFieldPointer;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;
       
-      typedef SharedPointer< ContinuityOperatorType > ContinuityOperatorPointer;
-      typedef SharedPointer< MomentumXOperatorType > MomentumXOperatorPointer;
-      typedef SharedPointer< MomentumYOperatorType > MomentumYOperatorPointer;      
-      typedef SharedPointer< MomentumZOperatorType > MomentumZOperatorPointer;      
-      typedef SharedPointer< EnergyOperatorType > EnergyOperatorPointer;
+      typedef Pointers::SharedPointer< ContinuityOperatorType > ContinuityOperatorPointer;
+      typedef Pointers::SharedPointer< MomentumXOperatorType > MomentumXOperatorPointer;
+      typedef Pointers::SharedPointer< MomentumYOperatorType > MomentumYOperatorPointer;      
+      typedef Pointers::SharedPointer< MomentumZOperatorType > MomentumZOperatorPointer;      
+      typedef Pointers::SharedPointer< EnergyOperatorType > EnergyOperatorPointer;
 
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
diff --git a/examples/flow/LaxFridrichsContinuity.h b/src/Examples/flow/LaxFridrichsContinuity.h
similarity index 99%
rename from examples/flow/LaxFridrichsContinuity.h
rename to src/Examples/flow/LaxFridrichsContinuity.h
index d821224f1142bad24ea04a9caa854c8ccfcbd69e..bf3cc45ece7877291a53cb460dd874fa77bbd250 100644
--- a/examples/flow/LaxFridrichsContinuity.h
+++ b/src/Examples/flow/LaxFridrichsContinuity.h
@@ -14,7 +14,7 @@
 #include <TNL/Containers/Vector.h>
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Functions/VectorField.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 
 namespace TNL {
 
@@ -34,7 +34,7 @@ class LaxFridrichsContinuityBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
 
       LaxFridrichsContinuityBase()
        : artificialViscosity( 1.0 ){};
diff --git a/examples/flow/LaxFridrichsContinuityEuler.h b/src/Examples/flow/LaxFridrichsContinuityEuler.h
similarity index 99%
rename from examples/flow/LaxFridrichsContinuityEuler.h
rename to src/Examples/flow/LaxFridrichsContinuityEuler.h
index d821224f1142bad24ea04a9caa854c8ccfcbd69e..f444a4e2541e76addb5c7a3eba87cf1d946ee4fa 100644
--- a/examples/flow/LaxFridrichsContinuityEuler.h
+++ b/src/Examples/flow/LaxFridrichsContinuityEuler.h
@@ -34,7 +34,7 @@ class LaxFridrichsContinuityBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
 
       LaxFridrichsContinuityBase()
        : artificialViscosity( 1.0 ){};
diff --git a/examples/flow/LaxFridrichsEnergy.h b/src/Examples/flow/LaxFridrichsEnergy.h
similarity index 99%
rename from examples/flow/LaxFridrichsEnergy.h
rename to src/Examples/flow/LaxFridrichsEnergy.h
index e7cdd91bea2602da3fc27f4af006f99a5bebcc4f..dd940243d7fbaa59ae66d013451cd24c2def8488 100644
--- a/examples/flow/LaxFridrichsEnergy.h
+++ b/src/Examples/flow/LaxFridrichsEnergy.h
@@ -30,8 +30,8 @@ class LaxFridrichsEnergyBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
       LaxFridrichsEnergyBase()
        : artificialViscosity( 1.0 ){};
diff --git a/examples/flow/LaxFridrichsEnergyEuler.h b/src/Examples/flow/LaxFridrichsEnergyEuler.h
similarity index 98%
rename from examples/flow/LaxFridrichsEnergyEuler.h
rename to src/Examples/flow/LaxFridrichsEnergyEuler.h
index 7e326bef1b376c177362334d7d5e1cc8c16945c5..30180639d18c4d36b688eb60d597a571e5647115 100644
--- a/examples/flow/LaxFridrichsEnergyEuler.h
+++ b/src/Examples/flow/LaxFridrichsEnergyEuler.h
@@ -30,8 +30,8 @@ class LaxFridrichsEnergyBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
       LaxFridrichsEnergyBase()
        : artificialViscosity( 1.0 ){};
diff --git a/examples/flow/LaxFridrichsEuler.h b/src/Examples/flow/LaxFridrichsEuler.h
similarity index 88%
rename from examples/flow/LaxFridrichsEuler.h
rename to src/Examples/flow/LaxFridrichsEuler.h
index cdf32899f69eb797a6d9a18a52b84c09709867bf..c56d20aed9947bd16796a5bf7ceb22adb237bf0c 100644
--- a/examples/flow/LaxFridrichsEuler.h
+++ b/src/Examples/flow/LaxFridrichsEuler.h
@@ -43,15 +43,15 @@ class LaxFridrichs
       typedef LaxFridrichsMomentumZ< Mesh, Real, Index > MomentumZOperatorType;
       typedef LaxFridrichsEnergy< Mesh, Real, Index > EnergyOperatorType;
 
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VectorFieldType > VectorFieldPointer;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VectorFieldType > VectorFieldPointer;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;
       
-      typedef SharedPointer< ContinuityOperatorType > ContinuityOperatorPointer;
-      typedef SharedPointer< MomentumXOperatorType > MomentumXOperatorPointer;
-      typedef SharedPointer< MomentumYOperatorType > MomentumYOperatorPointer;      
-      typedef SharedPointer< MomentumZOperatorType > MomentumZOperatorPointer;      
-      typedef SharedPointer< EnergyOperatorType > EnergyOperatorPointer;
+      typedef Pointers::SharedPointer< ContinuityOperatorType > ContinuityOperatorPointer;
+      typedef Pointers::SharedPointer< MomentumXOperatorType > MomentumXOperatorPointer;
+      typedef Pointers::SharedPointer< MomentumYOperatorType > MomentumYOperatorPointer;      
+      typedef Pointers::SharedPointer< MomentumZOperatorType > MomentumZOperatorPointer;      
+      typedef Pointers::SharedPointer< EnergyOperatorType > EnergyOperatorPointer;
 
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
diff --git a/examples/flow/LaxFridrichsMomentumBase.h b/src/Examples/flow/LaxFridrichsMomentumBase.h
similarity index 92%
rename from examples/flow/LaxFridrichsMomentumBase.h
rename to src/Examples/flow/LaxFridrichsMomentumBase.h
index 5f02acfe2c474800336d7f7f2911c657e316241c..79005b55e04d27c88dfadbfd28d5171483f601a4 100644
--- a/examples/flow/LaxFridrichsMomentumBase.h
+++ b/src/Examples/flow/LaxFridrichsMomentumBase.h
@@ -28,8 +28,8 @@ class LaxFridrichsMomentumBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
       LaxFridrichsMomentumBase()
        : artificialViscosity( 1.0 ){};
diff --git a/examples/flow/LaxFridrichsMomentumBaseEuler.h b/src/Examples/flow/LaxFridrichsMomentumBaseEuler.h
similarity index 92%
rename from examples/flow/LaxFridrichsMomentumBaseEuler.h
rename to src/Examples/flow/LaxFridrichsMomentumBaseEuler.h
index 67dae9fdf8256cecf032a731dd5d616d715ca0fe..cc2561748968a2bd808fa434d4f3d87d41765f45 100644
--- a/examples/flow/LaxFridrichsMomentumBaseEuler.h
+++ b/src/Examples/flow/LaxFridrichsMomentumBaseEuler.h
@@ -28,8 +28,8 @@ class LaxFridrichsMomentumBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
       LaxFridrichsMomentumBase()
        : artificialViscosity( 1.0 ){};
diff --git a/examples/flow/LaxFridrichsMomentumX.h b/src/Examples/flow/LaxFridrichsMomentumX.h
similarity index 100%
rename from examples/flow/LaxFridrichsMomentumX.h
rename to src/Examples/flow/LaxFridrichsMomentumX.h
diff --git a/examples/flow/LaxFridrichsMomentumXEuler.h b/src/Examples/flow/LaxFridrichsMomentumXEuler.h
similarity index 100%
rename from examples/flow/LaxFridrichsMomentumXEuler.h
rename to src/Examples/flow/LaxFridrichsMomentumXEuler.h
diff --git a/examples/flow/LaxFridrichsMomentumY.h b/src/Examples/flow/LaxFridrichsMomentumY.h
similarity index 100%
rename from examples/flow/LaxFridrichsMomentumY.h
rename to src/Examples/flow/LaxFridrichsMomentumY.h
diff --git a/examples/flow/LaxFridrichsMomentumYEuler.h b/src/Examples/flow/LaxFridrichsMomentumYEuler.h
similarity index 100%
rename from examples/flow/LaxFridrichsMomentumYEuler.h
rename to src/Examples/flow/LaxFridrichsMomentumYEuler.h
diff --git a/examples/flow/LaxFridrichsMomentumZ.h b/src/Examples/flow/LaxFridrichsMomentumZ.h
similarity index 100%
rename from examples/flow/LaxFridrichsMomentumZ.h
rename to src/Examples/flow/LaxFridrichsMomentumZ.h
diff --git a/examples/flow/LaxFridrichsMomentumZEuler.h b/src/Examples/flow/LaxFridrichsMomentumZEuler.h
similarity index 100%
rename from examples/flow/LaxFridrichsMomentumZEuler.h
rename to src/Examples/flow/LaxFridrichsMomentumZEuler.h
diff --git a/examples/flow-sw/MomentumXBoundaryConditionBoiler.h b/src/Examples/flow/MomentumXBoundaryConditionBoiler.h
similarity index 97%
rename from examples/flow-sw/MomentumXBoundaryConditionBoiler.h
rename to src/Examples/flow/MomentumXBoundaryConditionBoiler.h
index 823ec475a570074f2f147a5b899606f203b76354..dfe63e07623a6fdb6ca7ebb7da8ab445d9505372 100644
--- a/examples/flow-sw/MomentumXBoundaryConditionBoiler.h
+++ b/src/Examples/flow/MomentumXBoundaryConditionBoiler.h
@@ -114,8 +114,8 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -245,8 +245,8 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -421,8 +421,8 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow-sw/MomentumXBoundaryConditionCavity.h b/src/Examples/flow/MomentumXBoundaryConditionCavity.h
similarity index 97%
rename from examples/flow-sw/MomentumXBoundaryConditionCavity.h
rename to src/Examples/flow/MomentumXBoundaryConditionCavity.h
index b78731382731f3aea95b64257c7330af15d78a99..07abfdbeb940039555ac2799d0ef374ca26faff0 100644
--- a/examples/flow-sw/MomentumXBoundaryConditionCavity.h
+++ b/src/Examples/flow/MomentumXBoundaryConditionCavity.h
@@ -114,8 +114,8 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -245,8 +245,8 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -410,8 +410,8 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow/MomentumYBoundaryConditionBoiler.h b/src/Examples/flow/MomentumYBoundaryConditionBoiler.h
similarity index 97%
rename from examples/flow/MomentumYBoundaryConditionBoiler.h
rename to src/Examples/flow/MomentumYBoundaryConditionBoiler.h
index 76f3ff05735d1e813ffef7105509986a14036de8..83b6282ddd50033f32a382f0b48f5abe7347ccaa 100644
--- a/examples/flow/MomentumYBoundaryConditionBoiler.h
+++ b/src/Examples/flow/MomentumYBoundaryConditionBoiler.h
@@ -114,8 +114,8 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -245,8 +245,8 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -416,8 +416,8 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow-sw/MomentumYBoundaryConditionCavity.h b/src/Examples/flow/MomentumYBoundaryConditionCavity.h
similarity index 97%
rename from examples/flow-sw/MomentumYBoundaryConditionCavity.h
rename to src/Examples/flow/MomentumYBoundaryConditionCavity.h
index afce8239b71ef68697a395177ae28c0be21dc788..a83dd653f92328814b8d0746bc45c8775552a310 100644
--- a/examples/flow-sw/MomentumYBoundaryConditionCavity.h
+++ b/src/Examples/flow/MomentumYBoundaryConditionCavity.h
@@ -114,8 +114,8 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -245,8 +245,8 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -407,8 +407,8 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow-vl/MomentumZBoundaryConditionBoiler.h b/src/Examples/flow/MomentumZBoundaryConditionBoiler.h
similarity index 97%
rename from examples/flow-vl/MomentumZBoundaryConditionBoiler.h
rename to src/Examples/flow/MomentumZBoundaryConditionBoiler.h
index 188aaa9851aaa93eeadd0de85d26554d562b66df..9d887857ce97f916dcfaccd0208138afc200afd1 100644
--- a/examples/flow-vl/MomentumZBoundaryConditionBoiler.h
+++ b/src/Examples/flow/MomentumZBoundaryConditionBoiler.h
@@ -114,8 +114,8 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
    typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -245,8 +245,8 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -397,8 +397,8 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow/MomentumZBoundaryConditionCavity.h b/src/Examples/flow/MomentumZBoundaryConditionCavity.h
similarity index 97%
rename from examples/flow/MomentumZBoundaryConditionCavity.h
rename to src/Examples/flow/MomentumZBoundaryConditionCavity.h
index 1942cd58935395f340ea99ab0be0b74f1aee0c69..5fe6f22e5945513c9e9e86d835256ef84e27c054 100644
--- a/examples/flow/MomentumZBoundaryConditionCavity.h
+++ b/src/Examples/flow/MomentumZBoundaryConditionCavity.h
@@ -114,8 +114,8 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh
    typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
    typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;
    typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-   typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-   typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+   typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
    template< typename EntityType,
              typename MeshFunction >
@@ -245,8 +245,8 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh
       typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
 
       template< typename EntityType,
@@ -397,8 +397,8 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh
       typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType;
       typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType;  
       typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType;
-      typedef SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; 
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType,
                 typename MeshFunction >
diff --git a/examples/flow-vl/PhysicalVariablesGetter.h b/src/Examples/flow/PhysicalVariablesGetter.h
similarity index 90%
rename from examples/flow-vl/PhysicalVariablesGetter.h
rename to src/Examples/flow/PhysicalVariablesGetter.h
index f1ba6bd1222b8653faeaac041606c101a071e188..2af0a02b820db183fa32669dcecfe63f7f006990 100644
--- a/examples/flow-vl/PhysicalVariablesGetter.h
+++ b/src/Examples/flow/PhysicalVariablesGetter.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Functions/VectorField.h>
 #include <TNL/Functions/MeshFunctionEvaluator.h>
@@ -30,11 +30,11 @@ class PhysicalVariablesGetter
       static const int Dimensions = MeshType::getMeshDimension();
       
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef CompressibleConservativeVariables< MeshType > ConservativeVariablesType;
-      typedef SharedPointer< ConservativeVariablesType > ConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< ConservativeVariablesType > ConservativeVariablesPointer;
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
       class VelocityGetter : public Functions::Domain< Dimensions, Functions::MeshDomain >
       {
@@ -99,7 +99,7 @@ class PhysicalVariablesGetter
          Functions::MeshFunctionEvaluator< MeshFunctionType, VelocityGetter > evaluator;
          for( int i = 0; i < Dimensions; i++ )
          {
-            SharedPointer< VelocityGetter, DeviceType > velocityGetter( conservativeVariables->getDensity(),
+            Pointers::SharedPointer< VelocityGetter, DeviceType > velocityGetter( conservativeVariables->getDensity(),
                                                                         ( *conservativeVariables->getMomentum() )[ i ] );
             evaluator.evaluate( ( *velocity )[ i ], velocityGetter );
          }
@@ -110,7 +110,7 @@ class PhysicalVariablesGetter
                         MeshFunctionPointer& pressure )
       {
          Functions::MeshFunctionEvaluator< MeshFunctionType, PressureGetter > evaluator;
-         SharedPointer< PressureGetter, DeviceType > pressureGetter( conservativeVariables->getDensity(),
+         Pointers::SharedPointer< PressureGetter, DeviceType > pressureGetter( conservativeVariables->getDensity(),
                                                                      conservativeVariables->getEnergy(),
                                                                      conservativeVariables->getMomentum(),
                                                                      gamma );
diff --git a/examples/flow/RiemannProblemInitialCondition.h b/src/Examples/flow/RiemannProblemInitialCondition.h
similarity index 99%
rename from examples/flow/RiemannProblemInitialCondition.h
rename to src/Examples/flow/RiemannProblemInitialCondition.h
index a712934143c551899e2e005a98e98eea6f1adf7b..481a45bba9601b3f402e765ab0f830a65bdb7010 100644
--- a/examples/flow/RiemannProblemInitialCondition.h
+++ b/src/Examples/flow/RiemannProblemInitialCondition.h
@@ -38,7 +38,7 @@ class RiemannProblemInitialConditionSetter< Meshes::Grid< 1,MeshReal, Device, Me
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
 //       for cyklus i = 0 to mesh.getDimensions().x() j pro .y() a k pro .z()
 //       typedef typename MeshType::Cell CellType
@@ -195,7 +195,7 @@ class RiemannProblemInitialConditionSetter< Meshes::Grid< 2, MeshReal, Device, M
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
 //       for cyklus i = 0 to mesh.getDimensions().x() j pro .y() a k pro .z()
 //       typedef typename MeshType::Cell CellType
@@ -415,7 +415,7 @@ class RiemannProblemInitialConditionSetter< Meshes::Grid< 3, MeshReal, Device, M
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
 //       for cyklus i = 0 to mesh.getDimensions().x() j pro .y() a k pro .z()
 //       typedef typename MeshType::Cell CellType
@@ -768,7 +768,7 @@ class RiemannProblemInitialCondition
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
       
       RiemannProblemInitialCondition()
@@ -1338,7 +1338,7 @@ class RiemannProblemInitialCondition
          typedef Functions::Analytic::VectorNorm< Dimensions, RealType > VectorNormType;
          typedef Operators::Analytic::Sign< Dimensions, RealType > SignType;
          typedef Functions::OperatorFunction< SignType, VectorNormType > InitialConditionType;
-         typedef SharedPointer< InitialConditionType, DeviceType > InitialConditionPointer;
+         typedef Pointers::SharedPointer< InitialConditionType, DeviceType > InitialConditionPointer;
          
          InitialConditionPointer initialCondition;
          initialCondition->getFunction().setCenter( center );
diff --git a/examples/flow/navierStokes.cpp b/src/Examples/flow/navierStokes.cpp
similarity index 100%
rename from examples/flow/navierStokes.cpp
rename to src/Examples/flow/navierStokes.cpp
diff --git a/examples/flow/navierStokes.cu b/src/Examples/flow/navierStokes.cu
similarity index 100%
rename from examples/flow/navierStokes.cu
rename to src/Examples/flow/navierStokes.cu
diff --git a/examples/flow/navierStokes.h b/src/Examples/flow/navierStokes.h
similarity index 100%
rename from examples/flow/navierStokes.h
rename to src/Examples/flow/navierStokes.h
diff --git a/examples/flow/navierStokesBuildConfigTag.h b/src/Examples/flow/navierStokesBuildConfigTag.h
similarity index 100%
rename from examples/flow/navierStokesBuildConfigTag.h
rename to src/Examples/flow/navierStokesBuildConfigTag.h
diff --git a/examples/flow/navierStokesProblem.h b/src/Examples/flow/navierStokesProblem.h
similarity index 84%
rename from examples/flow/navierStokesProblem.h
rename to src/Examples/flow/navierStokesProblem.h
index 9558c29a2bf3db86def7d981aaea49ac295fcb65..76c08a038ea9a20e825b63efab37279ef6d89add 100644
--- a/examples/flow/navierStokesProblem.h
+++ b/src/Examples/flow/navierStokesProblem.h
@@ -48,12 +48,12 @@ class navierStokesProblem:
       typedef Functions::MeshFunction< Mesh > MeshFunctionType;
       typedef CompressibleConservativeVariables< MeshType > ConservativeVariablesType;
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
-      typedef SharedPointer< ConservativeVariablesType > ConservativeVariablesPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
-      typedef SharedPointer< InviscidOperators > InviscidOperatorsPointer;
-      typedef SharedPointer< BoundaryCondition > BoundaryConditionPointer;
-      typedef SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< ConservativeVariablesType > ConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< InviscidOperators > InviscidOperatorsPointer;
+      typedef Pointers::SharedPointer< BoundaryCondition > BoundaryConditionPointer;
+      typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
       using CommunicatorType = Communicator;
 
       static String getTypeStatic();
@@ -84,6 +84,9 @@ class navierStokesProblem:
                               const RealType& tau,
                               DofVectorPointer& _u,
                               DofVectorPointer& _fu );
+      
+      void applyBoundaryConditions( const RealType& time,
+                                    DofVectorPointer& dofs ) { TNL_ASSERT( false, "TODO:Implement")};
 
       template< typename Matrix >
       void assemblyLinearSystem( const RealType& time,
diff --git a/examples/flow/navierStokesProblem_impl.h b/src/Examples/flow/navierStokesProblem_impl.h
similarity index 100%
rename from examples/flow/navierStokesProblem_impl.h
rename to src/Examples/flow/navierStokesProblem_impl.h
diff --git a/examples/flow/navierStokesRhs.h b/src/Examples/flow/navierStokesRhs.h
similarity index 100%
rename from examples/flow/navierStokesRhs.h
rename to src/Examples/flow/navierStokesRhs.h
diff --git a/examples/flow/run-navier-stokes b/src/Examples/flow/run-navier-stokes
similarity index 100%
rename from examples/flow/run-navier-stokes
rename to src/Examples/flow/run-navier-stokes
diff --git a/examples/heat-equation/CMakeLists.txt b/src/Examples/heat-equation/CMakeLists.txt
similarity index 100%
rename from examples/heat-equation/CMakeLists.txt
rename to src/Examples/heat-equation/CMakeLists.txt
diff --git a/examples/heat-equation/HeatEquationBuildConfigTag.h b/src/Examples/heat-equation/HeatEquationBuildConfigTag.h
similarity index 100%
rename from examples/heat-equation/HeatEquationBuildConfigTag.h
rename to src/Examples/heat-equation/HeatEquationBuildConfigTag.h
diff --git a/examples/heat-equation/tnl-heat-equation-eoc.cpp b/src/Examples/heat-equation/tnl-heat-equation-eoc.cpp
similarity index 100%
rename from examples/heat-equation/tnl-heat-equation-eoc.cpp
rename to src/Examples/heat-equation/tnl-heat-equation-eoc.cpp
diff --git a/examples/heat-equation/tnl-heat-equation-eoc.cu b/src/Examples/heat-equation/tnl-heat-equation-eoc.cu
similarity index 100%
rename from examples/heat-equation/tnl-heat-equation-eoc.cu
rename to src/Examples/heat-equation/tnl-heat-equation-eoc.cu
diff --git a/examples/heat-equation/tnl-heat-equation-eoc.h b/src/Examples/heat-equation/tnl-heat-equation-eoc.h
similarity index 100%
rename from examples/heat-equation/tnl-heat-equation-eoc.h
rename to src/Examples/heat-equation/tnl-heat-equation-eoc.h
diff --git a/examples/heat-equation/tnl-heat-equation.cpp b/src/Examples/heat-equation/tnl-heat-equation.cpp
similarity index 100%
rename from examples/heat-equation/tnl-heat-equation.cpp
rename to src/Examples/heat-equation/tnl-heat-equation.cpp
diff --git a/examples/heat-equation/tnl-heat-equation.cu b/src/Examples/heat-equation/tnl-heat-equation.cu
similarity index 100%
rename from examples/heat-equation/tnl-heat-equation.cu
rename to src/Examples/heat-equation/tnl-heat-equation.cu
diff --git a/examples/heat-equation/tnl-heat-equation.h b/src/Examples/heat-equation/tnl-heat-equation.h
similarity index 100%
rename from examples/heat-equation/tnl-heat-equation.h
rename to src/Examples/heat-equation/tnl-heat-equation.h
diff --git a/examples/heat-equation/tnl-run-heat-equation b/src/Examples/heat-equation/tnl-run-heat-equation
similarity index 100%
rename from examples/heat-equation/tnl-run-heat-equation
rename to src/Examples/heat-equation/tnl-run-heat-equation
diff --git a/examples/heat-equation/tnl-run-heat-equation-eoc-test b/src/Examples/heat-equation/tnl-run-heat-equation-eoc-test
similarity index 100%
rename from examples/heat-equation/tnl-run-heat-equation-eoc-test
rename to src/Examples/heat-equation/tnl-run-heat-equation-eoc-test
diff --git a/examples/inviscid-flow-sw/CMakeLists.txt b/src/Examples/inviscid-flow-sw/CMakeLists.txt
similarity index 100%
rename from examples/inviscid-flow-sw/CMakeLists.txt
rename to src/Examples/inviscid-flow-sw/CMakeLists.txt
diff --git a/src/Examples/inviscid-flow-sw/CompressibleConservativeVariables.h b/src/Examples/inviscid-flow-sw/CompressibleConservativeVariables.h
new file mode 100644
index 0000000000000000000000000000000000000000..01e820686e98781a3267c4526e8e7c6449218415
--- /dev/null
+++ b/src/Examples/inviscid-flow-sw/CompressibleConservativeVariables.h
@@ -0,0 +1,147 @@
+/***************************************************************************
+                          CompressibleConservativeVariables.h  -  description
+                             -------------------
+    begin                : Feb 12, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+
+#pragma once
+
+#include <TNL/Functions/MeshFunction.h>
+#include <TNL/Functions/VectorField.h>
+#include <TNL/Pointers/SharedPointer.h>
+
+namespace TNL {
+
+template< typename Mesh >
+class CompressibleConservativeVariables
+{
+   public:
+      typedef Mesh MeshType;
+      static const int Dimensions = MeshType::getMeshDimension();
+      typedef typename MeshType::RealType RealType;
+      typedef typename MeshType::DeviceType DeviceType;
+      typedef typename MeshType::IndexType IndexType;
+      typedef Functions::MeshFunction< Mesh > MeshFunctionType;
+      typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;      
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > MomentumFieldPointer;
+      
+      CompressibleConservativeVariables(){};
+      
+      CompressibleConservativeVariables( const MeshPointer& meshPointer )
+      : density( meshPointer ),
+        momentum( meshPointer ),
+        //pressure( meshPointer ),
+        energy( meshPointer ){};
+        
+      void setMesh( const MeshPointer& meshPointer )
+      {
+         this->density->setMesh( meshPointer );
+         this->momentum->setMesh( meshPointer );
+         //this->pressure.setMesh( meshPointer );
+         this->energy->setMesh( meshPointer );
+      }
+      
+      template< typename Vector >
+      void bind( const MeshPointer& meshPointer,
+                 const Vector& data,
+                 IndexType offset = 0 )
+      {
+         IndexType currentOffset( offset );
+         this->density->bind( meshPointer, data, currentOffset );
+         currentOffset += this->density->getDofs( meshPointer );
+         for( IndexType i = 0; i < Dimensions; i++ )
+         {
+            ( *this->momentum )[ i ]->bind( meshPointer, data, currentOffset );
+            currentOffset += ( *this->momentum )[ i ]->getDofs( meshPointer );
+         }
+         this->energy->bind( meshPointer, data, currentOffset );
+      }
+      
+      IndexType getDofs( const MeshPointer& meshPointer ) const
+      {
+         return this->density->getDofs( meshPointer ) + 
+            this->momentum->getDofs( meshPointer ) +
+            this->energy->getDofs( meshPointer );
+      }
+      
+      MeshFunctionPointer& getDensity()
+      {
+         return this->density;
+      }
+
+      const MeshFunctionPointer& getDensity() const
+      {
+         return this->density;
+      }
+      
+      void setDensity( MeshFunctionPointer& density )
+      {
+         this->density = density;
+      }
+      
+      MomentumFieldPointer& getMomentum()
+      {
+         return this->momentum;
+      }
+      
+      const MomentumFieldPointer& getMomentum() const
+      {
+         return this->momentum;
+      }
+      
+      void setMomentum( MomentumFieldPointer& momentum )
+      {
+         this->momentum = momentum;
+      }
+      
+      /*MeshFunctionPointer& getPressure()
+      {
+         return this->pressure;
+      }
+      
+      const MeshFunctionPointer& getPressure() const
+      {
+         return this->pressure;
+      }
+      
+      void setPressure( MeshFunctionPointer& pressure )
+      {
+         this->pressure = pressure;
+      }*/
+      
+      MeshFunctionPointer& getEnergy()
+      {
+         return this->energy;
+      }
+      
+      const MeshFunctionPointer& getEnergy() const
+      {
+         return this->energy;
+      }
+      
+      void setEnergy( MeshFunctionPointer& energy )
+      {
+         this->energy = energy;
+      }
+      
+      void getVelocityField( VelocityFieldType& velocityField )
+      {
+         
+      }
+
+   protected:
+      
+      MeshFunctionPointer density;
+      MomentumFieldPointer momentum;
+      MeshFunctionPointer energy;
+      
+};
+
+} // namespace TN
diff --git a/examples/inviscid-flow-sw/LaxFridrichs.h b/src/Examples/inviscid-flow-sw/LaxFridrichs.h
similarity index 88%
rename from examples/inviscid-flow-sw/LaxFridrichs.h
rename to src/Examples/inviscid-flow-sw/LaxFridrichs.h
index cdf32899f69eb797a6d9a18a52b84c09709867bf..c56d20aed9947bd16796a5bf7ceb22adb237bf0c 100644
--- a/examples/inviscid-flow-sw/LaxFridrichs.h
+++ b/src/Examples/inviscid-flow-sw/LaxFridrichs.h
@@ -43,15 +43,15 @@ class LaxFridrichs
       typedef LaxFridrichsMomentumZ< Mesh, Real, Index > MomentumZOperatorType;
       typedef LaxFridrichsEnergy< Mesh, Real, Index > EnergyOperatorType;
 
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VectorFieldType > VectorFieldPointer;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VectorFieldType > VectorFieldPointer;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;
       
-      typedef SharedPointer< ContinuityOperatorType > ContinuityOperatorPointer;
-      typedef SharedPointer< MomentumXOperatorType > MomentumXOperatorPointer;
-      typedef SharedPointer< MomentumYOperatorType > MomentumYOperatorPointer;      
-      typedef SharedPointer< MomentumZOperatorType > MomentumZOperatorPointer;      
-      typedef SharedPointer< EnergyOperatorType > EnergyOperatorPointer;
+      typedef Pointers::SharedPointer< ContinuityOperatorType > ContinuityOperatorPointer;
+      typedef Pointers::SharedPointer< MomentumXOperatorType > MomentumXOperatorPointer;
+      typedef Pointers::SharedPointer< MomentumYOperatorType > MomentumYOperatorPointer;      
+      typedef Pointers::SharedPointer< MomentumZOperatorType > MomentumZOperatorPointer;      
+      typedef Pointers::SharedPointer< EnergyOperatorType > EnergyOperatorPointer;
 
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
diff --git a/examples/flow-sw/LaxFridrichsContinuity.h b/src/Examples/inviscid-flow-sw/LaxFridrichsContinuity.h
similarity index 99%
rename from examples/flow-sw/LaxFridrichsContinuity.h
rename to src/Examples/inviscid-flow-sw/LaxFridrichsContinuity.h
index 45ad4d52b12d402365a40cac043d5525e230cecb..82747cd18220efc01bc2d68e0247c01723c29fd0 100644
--- a/examples/flow-sw/LaxFridrichsContinuity.h
+++ b/src/Examples/inviscid-flow-sw/LaxFridrichsContinuity.h
@@ -34,7 +34,7 @@ class LaxFridrichsContinuityBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
 
       LaxFridrichsContinuityBase()
        : artificialViscosity( 1.0 ){};
diff --git a/examples/flow-sw/LaxFridrichsEnergy.h b/src/Examples/inviscid-flow-sw/LaxFridrichsEnergy.h
similarity index 98%
rename from examples/flow-sw/LaxFridrichsEnergy.h
rename to src/Examples/inviscid-flow-sw/LaxFridrichsEnergy.h
index 18c824762b8c677253dbd4e494be7ad3aea7e769..03019ed23c85f82ee489c95d8173c0f100cff3c8 100644
--- a/examples/flow-sw/LaxFridrichsEnergy.h
+++ b/src/Examples/inviscid-flow-sw/LaxFridrichsEnergy.h
@@ -30,8 +30,8 @@ class LaxFridrichsEnergyBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
       LaxFridrichsEnergyBase()
        : artificialViscosity( 1.0 ){};
diff --git a/examples/inviscid-flow-vl/LaxFridrichsMomentumBase.h b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumBase.h
similarity index 92%
rename from examples/inviscid-flow-vl/LaxFridrichsMomentumBase.h
rename to src/Examples/inviscid-flow-sw/LaxFridrichsMomentumBase.h
index 67dae9fdf8256cecf032a731dd5d616d715ca0fe..cc2561748968a2bd808fa434d4f3d87d41765f45 100644
--- a/examples/inviscid-flow-vl/LaxFridrichsMomentumBase.h
+++ b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumBase.h
@@ -28,8 +28,8 @@ class LaxFridrichsMomentumBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
       LaxFridrichsMomentumBase()
        : artificialViscosity( 1.0 ){};
diff --git a/examples/inviscid-flow-sw/LaxFridrichsMomentumX.h b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumX.h
similarity index 100%
rename from examples/inviscid-flow-sw/LaxFridrichsMomentumX.h
rename to src/Examples/inviscid-flow-sw/LaxFridrichsMomentumX.h
diff --git a/examples/inviscid-flow-sw/LaxFridrichsMomentumY.h b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumY.h
similarity index 100%
rename from examples/inviscid-flow-sw/LaxFridrichsMomentumY.h
rename to src/Examples/inviscid-flow-sw/LaxFridrichsMomentumY.h
diff --git a/examples/inviscid-flow-sw/LaxFridrichsMomentumZ.h b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumZ.h
similarity index 100%
rename from examples/inviscid-flow-sw/LaxFridrichsMomentumZ.h
rename to src/Examples/inviscid-flow-sw/LaxFridrichsMomentumZ.h
diff --git a/examples/inviscid-flow-sw/PhysicalVariablesGetter.h b/src/Examples/inviscid-flow-sw/PhysicalVariablesGetter.h
similarity index 90%
rename from examples/inviscid-flow-sw/PhysicalVariablesGetter.h
rename to src/Examples/inviscid-flow-sw/PhysicalVariablesGetter.h
index f1ba6bd1222b8653faeaac041606c101a071e188..2af0a02b820db183fa32669dcecfe63f7f006990 100644
--- a/examples/inviscid-flow-sw/PhysicalVariablesGetter.h
+++ b/src/Examples/inviscid-flow-sw/PhysicalVariablesGetter.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Functions/VectorField.h>
 #include <TNL/Functions/MeshFunctionEvaluator.h>
@@ -30,11 +30,11 @@ class PhysicalVariablesGetter
       static const int Dimensions = MeshType::getMeshDimension();
       
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef CompressibleConservativeVariables< MeshType > ConservativeVariablesType;
-      typedef SharedPointer< ConservativeVariablesType > ConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< ConservativeVariablesType > ConservativeVariablesPointer;
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
       class VelocityGetter : public Functions::Domain< Dimensions, Functions::MeshDomain >
       {
@@ -99,7 +99,7 @@ class PhysicalVariablesGetter
          Functions::MeshFunctionEvaluator< MeshFunctionType, VelocityGetter > evaluator;
          for( int i = 0; i < Dimensions; i++ )
          {
-            SharedPointer< VelocityGetter, DeviceType > velocityGetter( conservativeVariables->getDensity(),
+            Pointers::SharedPointer< VelocityGetter, DeviceType > velocityGetter( conservativeVariables->getDensity(),
                                                                         ( *conservativeVariables->getMomentum() )[ i ] );
             evaluator.evaluate( ( *velocity )[ i ], velocityGetter );
          }
@@ -110,7 +110,7 @@ class PhysicalVariablesGetter
                         MeshFunctionPointer& pressure )
       {
          Functions::MeshFunctionEvaluator< MeshFunctionType, PressureGetter > evaluator;
-         SharedPointer< PressureGetter, DeviceType > pressureGetter( conservativeVariables->getDensity(),
+         Pointers::SharedPointer< PressureGetter, DeviceType > pressureGetter( conservativeVariables->getDensity(),
                                                                      conservativeVariables->getEnergy(),
                                                                      conservativeVariables->getMomentum(),
                                                                      gamma );
diff --git a/examples/inviscid-flow-vl/RiemannProblemInitialCondition.h b/src/Examples/inviscid-flow-sw/RiemannProblemInitialCondition.h
similarity index 99%
rename from examples/inviscid-flow-vl/RiemannProblemInitialCondition.h
rename to src/Examples/inviscid-flow-sw/RiemannProblemInitialCondition.h
index 85fbc8af01994495e2c1f2f7d95f8155ee216684..b8e53d60178f44111018dcc67fe5e244eda942d5 100644
--- a/examples/inviscid-flow-vl/RiemannProblemInitialCondition.h
+++ b/src/Examples/inviscid-flow-sw/RiemannProblemInitialCondition.h
@@ -38,7 +38,7 @@ class RiemannProblemInitialConditionSetter< Meshes::Grid< 1,MeshReal, Device, Me
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
 //       for cyklus i = 0 to mesh.getDimensions().x() j pro .y() a k pro .z()
 //       typedef typename MeshType::Cell CellType
@@ -195,7 +195,7 @@ class RiemannProblemInitialConditionSetter< Meshes::Grid< 2, MeshReal, Device, M
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
 //       for cyklus i = 0 to mesh.getDimensions().x() j pro .y() a k pro .z()
 //       typedef typename MeshType::Cell CellType
@@ -415,7 +415,7 @@ class RiemannProblemInitialConditionSetter< Meshes::Grid< 3, MeshReal, Device, M
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
 //       for cyklus i = 0 to mesh.getDimensions().x() j pro .y() a k pro .z()
 //       typedef typename MeshType::Cell CellType
@@ -768,7 +768,7 @@ class RiemannProblemInitialCondition
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
       
       RiemannProblemInitialCondition()
@@ -1342,7 +1342,7 @@ class RiemannProblemInitialCondition
          typedef Functions::Analytic::VectorNorm< Dimensions, RealType > VectorNormType;
          typedef Operators::Analytic::Sign< Dimensions, RealType > SignType;
          typedef Functions::OperatorFunction< SignType, VectorNormType > InitialConditionType;
-         typedef SharedPointer< InitialConditionType, DeviceType > InitialConditionPointer;
+         typedef Pointers::SharedPointer< InitialConditionType, DeviceType > InitialConditionPointer;
          
          InitialConditionPointer initialCondition;
          initialCondition->getFunction().setCenter( center );
diff --git a/examples/inviscid-flow-sw/Upwind.h b/src/Examples/inviscid-flow-sw/Upwind.h
similarity index 88%
rename from examples/inviscid-flow-sw/Upwind.h
rename to src/Examples/inviscid-flow-sw/Upwind.h
index 263da044a2edaca855b6c6f3fd050bd10cc7c689..529e0aadf7e68c86c97393e122927142014c8c45 100644
--- a/examples/inviscid-flow-sw/Upwind.h
+++ b/src/Examples/inviscid-flow-sw/Upwind.h
@@ -43,15 +43,15 @@ class Upwind
       typedef UpwindMomentumZ< Mesh, Real, Index > MomentumZOperatorType;
       typedef UpwindEnergy< Mesh, Real, Index > EnergyOperatorType;
 
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VectorFieldType > VectorFieldPointer;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VectorFieldType > VectorFieldPointer;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;
       
-      typedef SharedPointer< ContinuityOperatorType > ContinuityOperatorPointer;
-      typedef SharedPointer< MomentumXOperatorType > MomentumXOperatorPointer;
-      typedef SharedPointer< MomentumYOperatorType > MomentumYOperatorPointer;      
-      typedef SharedPointer< MomentumZOperatorType > MomentumZOperatorPointer;      
-      typedef SharedPointer< EnergyOperatorType > EnergyOperatorPointer;
+      typedef Pointers::SharedPointer< ContinuityOperatorType > ContinuityOperatorPointer;
+      typedef Pointers::SharedPointer< MomentumXOperatorType > MomentumXOperatorPointer;
+      typedef Pointers::SharedPointer< MomentumYOperatorType > MomentumYOperatorPointer;      
+      typedef Pointers::SharedPointer< MomentumZOperatorType > MomentumZOperatorPointer;      
+      typedef Pointers::SharedPointer< EnergyOperatorType > EnergyOperatorPointer;
 
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
diff --git a/examples/inviscid-flow-sw/UpwindContinuity.h b/src/Examples/inviscid-flow-sw/UpwindContinuity.h
similarity index 98%
rename from examples/inviscid-flow-sw/UpwindContinuity.h
rename to src/Examples/inviscid-flow-sw/UpwindContinuity.h
index f743e6260a62e96cd0d4bb7525f5259c82359f2c..22fc4ffc5d97e2933d7db36ceac3f66d5a33b63f 100644
--- a/examples/inviscid-flow-sw/UpwindContinuity.h
+++ b/src/Examples/inviscid-flow-sw/UpwindContinuity.h
@@ -14,7 +14,7 @@
 #include <TNL/Containers/Vector.h>
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Functions/VectorField.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 
 namespace TNL {
 
@@ -34,8 +34,8 @@ class UpwindContinuityBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
 
       static String getType()
       {
diff --git a/examples/inviscid-flow-sw/UpwindEnergy.h b/src/Examples/inviscid-flow-sw/UpwindEnergy.h
similarity index 99%
rename from examples/inviscid-flow-sw/UpwindEnergy.h
rename to src/Examples/inviscid-flow-sw/UpwindEnergy.h
index 822f75ef042659cc1350a18987428915da999efb..39f6090064075b0dd688e6105ffac94c14421cde 100644
--- a/examples/inviscid-flow-sw/UpwindEnergy.h
+++ b/src/Examples/inviscid-flow-sw/UpwindEnergy.h
@@ -30,8 +30,8 @@ class UpwindEnergyBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
       UpwindEnergyBase()
        : artificialViscosity( 1.0 ){};
diff --git a/examples/inviscid-flow-sw/UpwindMomentumBase.h b/src/Examples/inviscid-flow-sw/UpwindMomentumBase.h
similarity index 97%
rename from examples/inviscid-flow-sw/UpwindMomentumBase.h
rename to src/Examples/inviscid-flow-sw/UpwindMomentumBase.h
index c9a07e2b83a0cdab77e521d4499921002375c266..be1333f810b2e168d6821aa6c1b362ae41d5dade 100644
--- a/examples/inviscid-flow-sw/UpwindMomentumBase.h
+++ b/src/Examples/inviscid-flow-sw/UpwindMomentumBase.h
@@ -28,8 +28,8 @@ class UpwindMomentumBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
 
       void setTau(const Real& tau)
diff --git a/examples/inviscid-flow-sw/UpwindMomentumX.h b/src/Examples/inviscid-flow-sw/UpwindMomentumX.h
similarity index 100%
rename from examples/inviscid-flow-sw/UpwindMomentumX.h
rename to src/Examples/inviscid-flow-sw/UpwindMomentumX.h
diff --git a/examples/inviscid-flow-sw/UpwindMomentumY.h b/src/Examples/inviscid-flow-sw/UpwindMomentumY.h
similarity index 100%
rename from examples/inviscid-flow-sw/UpwindMomentumY.h
rename to src/Examples/inviscid-flow-sw/UpwindMomentumY.h
diff --git a/examples/inviscid-flow-sw/UpwindMomentumZ.h b/src/Examples/inviscid-flow-sw/UpwindMomentumZ.h
similarity index 100%
rename from examples/inviscid-flow-sw/UpwindMomentumZ.h
rename to src/Examples/inviscid-flow-sw/UpwindMomentumZ.h
diff --git a/examples/inviscid-flow-sw/euler.cpp b/src/Examples/inviscid-flow-sw/euler.cpp
similarity index 100%
rename from examples/inviscid-flow-sw/euler.cpp
rename to src/Examples/inviscid-flow-sw/euler.cpp
diff --git a/examples/inviscid-flow-sw/euler.cu b/src/Examples/inviscid-flow-sw/euler.cu
similarity index 100%
rename from examples/inviscid-flow-sw/euler.cu
rename to src/Examples/inviscid-flow-sw/euler.cu
diff --git a/examples/inviscid-flow-sw/euler.h b/src/Examples/inviscid-flow-sw/euler.h
similarity index 100%
rename from examples/inviscid-flow-sw/euler.h
rename to src/Examples/inviscid-flow-sw/euler.h
diff --git a/examples/inviscid-flow-sw/eulerBuildConfigTag.h b/src/Examples/inviscid-flow-sw/eulerBuildConfigTag.h
similarity index 100%
rename from examples/inviscid-flow-sw/eulerBuildConfigTag.h
rename to src/Examples/inviscid-flow-sw/eulerBuildConfigTag.h
diff --git a/examples/inviscid-flow-vl/eulerProblem.h b/src/Examples/inviscid-flow-sw/eulerProblem.h
similarity index 84%
rename from examples/inviscid-flow-vl/eulerProblem.h
rename to src/Examples/inviscid-flow-sw/eulerProblem.h
index 4ee775becb2032f7f2802bd721af62b56021bed3..9ff30a5d5ba66d9b3b4d6fbb81cc0c07aaca26c8 100644
--- a/examples/inviscid-flow-vl/eulerProblem.h
+++ b/src/Examples/inviscid-flow-sw/eulerProblem.h
@@ -48,12 +48,12 @@ class eulerProblem:
       typedef Functions::MeshFunction< Mesh > MeshFunctionType;
       typedef CompressibleConservativeVariables< MeshType > ConservativeVariablesType;
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
-      typedef SharedPointer< ConservativeVariablesType > ConservativeVariablesPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
-      typedef SharedPointer< InviscidOperators > InviscidOperatorsPointer;
-      typedef SharedPointer< BoundaryCondition > BoundaryConditionPointer;
-      typedef SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< ConservativeVariablesType > ConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< InviscidOperators > InviscidOperatorsPointer;
+      typedef Pointers::SharedPointer< BoundaryCondition > BoundaryConditionPointer;
+      typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
       using CommunicatorType = Communicator;
 
       static String getTypeStatic();
@@ -84,6 +84,9 @@ class eulerProblem:
                               const RealType& tau,
                               DofVectorPointer& _u,
                               DofVectorPointer& _fu );
+      
+      void applyBoundaryConditions( const RealType& time,
+                                    DofVectorPointer& dofs ) { TNL_ASSERT( false, "TODO:Implement")};      
 
       template< typename Matrix >
       void assemblyLinearSystem( const RealType& time,
diff --git a/examples/inviscid-flow-sw/eulerProblem_impl.h b/src/Examples/inviscid-flow-sw/eulerProblem_impl.h
similarity index 100%
rename from examples/inviscid-flow-sw/eulerProblem_impl.h
rename to src/Examples/inviscid-flow-sw/eulerProblem_impl.h
diff --git a/examples/inviscid-flow-sw/eulerRhs.h b/src/Examples/inviscid-flow-sw/eulerRhs.h
similarity index 100%
rename from examples/inviscid-flow-sw/eulerRhs.h
rename to src/Examples/inviscid-flow-sw/eulerRhs.h
diff --git a/examples/inviscid-flow-sw/run-euler-sw b/src/Examples/inviscid-flow-sw/run-euler-sw
similarity index 100%
rename from examples/inviscid-flow-sw/run-euler-sw
rename to src/Examples/inviscid-flow-sw/run-euler-sw
diff --git a/examples/inviscid-flow-vl/CMakeLists.txt b/src/Examples/inviscid-flow-vl/CMakeLists.txt
similarity index 100%
rename from examples/inviscid-flow-vl/CMakeLists.txt
rename to src/Examples/inviscid-flow-vl/CMakeLists.txt
diff --git a/src/Examples/inviscid-flow-vl/CompressibleConservativeVariables.h b/src/Examples/inviscid-flow-vl/CompressibleConservativeVariables.h
new file mode 100644
index 0000000000000000000000000000000000000000..01e820686e98781a3267c4526e8e7c6449218415
--- /dev/null
+++ b/src/Examples/inviscid-flow-vl/CompressibleConservativeVariables.h
@@ -0,0 +1,147 @@
+/***************************************************************************
+                          CompressibleConservativeVariables.h  -  description
+                             -------------------
+    begin                : Feb 12, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+
+#pragma once
+
+#include <TNL/Functions/MeshFunction.h>
+#include <TNL/Functions/VectorField.h>
+#include <TNL/Pointers/SharedPointer.h>
+
+namespace TNL {
+
+template< typename Mesh >
+class CompressibleConservativeVariables
+{
+   public:
+      typedef Mesh MeshType;
+      static const int Dimensions = MeshType::getMeshDimension();
+      typedef typename MeshType::RealType RealType;
+      typedef typename MeshType::DeviceType DeviceType;
+      typedef typename MeshType::IndexType IndexType;
+      typedef Functions::MeshFunction< Mesh > MeshFunctionType;
+      typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;      
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > MomentumFieldPointer;
+      
+      CompressibleConservativeVariables(){};
+      
+      CompressibleConservativeVariables( const MeshPointer& meshPointer )
+      : density( meshPointer ),
+        momentum( meshPointer ),
+        //pressure( meshPointer ),
+        energy( meshPointer ){};
+        
+      void setMesh( const MeshPointer& meshPointer )
+      {
+         this->density->setMesh( meshPointer );
+         this->momentum->setMesh( meshPointer );
+         //this->pressure.setMesh( meshPointer );
+         this->energy->setMesh( meshPointer );
+      }
+      
+      template< typename Vector >
+      void bind( const MeshPointer& meshPointer,
+                 const Vector& data,
+                 IndexType offset = 0 )
+      {
+         IndexType currentOffset( offset );
+         this->density->bind( meshPointer, data, currentOffset );
+         currentOffset += this->density->getDofs( meshPointer );
+         for( IndexType i = 0; i < Dimensions; i++ )
+         {
+            ( *this->momentum )[ i ]->bind( meshPointer, data, currentOffset );
+            currentOffset += ( *this->momentum )[ i ]->getDofs( meshPointer );
+         }
+         this->energy->bind( meshPointer, data, currentOffset );
+      }
+      
+      IndexType getDofs( const MeshPointer& meshPointer ) const
+      {
+         return this->density->getDofs( meshPointer ) + 
+            this->momentum->getDofs( meshPointer ) +
+            this->energy->getDofs( meshPointer );
+      }
+      
+      MeshFunctionPointer& getDensity()
+      {
+         return this->density;
+      }
+
+      const MeshFunctionPointer& getDensity() const
+      {
+         return this->density;
+      }
+      
+      void setDensity( MeshFunctionPointer& density )
+      {
+         this->density = density;
+      }
+      
+      MomentumFieldPointer& getMomentum()
+      {
+         return this->momentum;
+      }
+      
+      const MomentumFieldPointer& getMomentum() const
+      {
+         return this->momentum;
+      }
+      
+      void setMomentum( MomentumFieldPointer& momentum )
+      {
+         this->momentum = momentum;
+      }
+      
+      /*MeshFunctionPointer& getPressure()
+      {
+         return this->pressure;
+      }
+      
+      const MeshFunctionPointer& getPressure() const
+      {
+         return this->pressure;
+      }
+      
+      void setPressure( MeshFunctionPointer& pressure )
+      {
+         this->pressure = pressure;
+      }*/
+      
+      MeshFunctionPointer& getEnergy()
+      {
+         return this->energy;
+      }
+      
+      const MeshFunctionPointer& getEnergy() const
+      {
+         return this->energy;
+      }
+      
+      void setEnergy( MeshFunctionPointer& energy )
+      {
+         this->energy = energy;
+      }
+      
+      void getVelocityField( VelocityFieldType& velocityField )
+      {
+         
+      }
+
+   protected:
+      
+      MeshFunctionPointer density;
+      MomentumFieldPointer momentum;
+      MeshFunctionPointer energy;
+      
+};
+
+} // namespace TN
diff --git a/examples/flow-vl/LaxFridrichs.h b/src/Examples/inviscid-flow-vl/LaxFridrichs.h
similarity index 88%
rename from examples/flow-vl/LaxFridrichs.h
rename to src/Examples/inviscid-flow-vl/LaxFridrichs.h
index cdf32899f69eb797a6d9a18a52b84c09709867bf..c56d20aed9947bd16796a5bf7ceb22adb237bf0c 100644
--- a/examples/flow-vl/LaxFridrichs.h
+++ b/src/Examples/inviscid-flow-vl/LaxFridrichs.h
@@ -43,15 +43,15 @@ class LaxFridrichs
       typedef LaxFridrichsMomentumZ< Mesh, Real, Index > MomentumZOperatorType;
       typedef LaxFridrichsEnergy< Mesh, Real, Index > EnergyOperatorType;
 
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VectorFieldType > VectorFieldPointer;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VectorFieldType > VectorFieldPointer;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;
       
-      typedef SharedPointer< ContinuityOperatorType > ContinuityOperatorPointer;
-      typedef SharedPointer< MomentumXOperatorType > MomentumXOperatorPointer;
-      typedef SharedPointer< MomentumYOperatorType > MomentumYOperatorPointer;      
-      typedef SharedPointer< MomentumZOperatorType > MomentumZOperatorPointer;      
-      typedef SharedPointer< EnergyOperatorType > EnergyOperatorPointer;
+      typedef Pointers::SharedPointer< ContinuityOperatorType > ContinuityOperatorPointer;
+      typedef Pointers::SharedPointer< MomentumXOperatorType > MomentumXOperatorPointer;
+      typedef Pointers::SharedPointer< MomentumYOperatorType > MomentumYOperatorPointer;      
+      typedef Pointers::SharedPointer< MomentumZOperatorType > MomentumZOperatorPointer;      
+      typedef Pointers::SharedPointer< EnergyOperatorType > EnergyOperatorPointer;
 
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
diff --git a/examples/inviscid-flow-sw/LaxFridrichsContinuity.h b/src/Examples/inviscid-flow-vl/LaxFridrichsContinuity.h
similarity index 99%
rename from examples/inviscid-flow-sw/LaxFridrichsContinuity.h
rename to src/Examples/inviscid-flow-vl/LaxFridrichsContinuity.h
index 45ad4d52b12d402365a40cac043d5525e230cecb..82747cd18220efc01bc2d68e0247c01723c29fd0 100644
--- a/examples/inviscid-flow-sw/LaxFridrichsContinuity.h
+++ b/src/Examples/inviscid-flow-vl/LaxFridrichsContinuity.h
@@ -34,7 +34,7 @@ class LaxFridrichsContinuityBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
 
       LaxFridrichsContinuityBase()
        : artificialViscosity( 1.0 ){};
diff --git a/examples/inviscid-flow-vl/LaxFridrichsEnergy.h b/src/Examples/inviscid-flow-vl/LaxFridrichsEnergy.h
similarity index 98%
rename from examples/inviscid-flow-vl/LaxFridrichsEnergy.h
rename to src/Examples/inviscid-flow-vl/LaxFridrichsEnergy.h
index 18c824762b8c677253dbd4e494be7ad3aea7e769..03019ed23c85f82ee489c95d8173c0f100cff3c8 100644
--- a/examples/inviscid-flow-vl/LaxFridrichsEnergy.h
+++ b/src/Examples/inviscid-flow-vl/LaxFridrichsEnergy.h
@@ -30,8 +30,8 @@ class LaxFridrichsEnergyBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
       LaxFridrichsEnergyBase()
        : artificialViscosity( 1.0 ){};
diff --git a/examples/inviscid-flow-sw/LaxFridrichsMomentumBase.h b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumBase.h
similarity index 92%
rename from examples/inviscid-flow-sw/LaxFridrichsMomentumBase.h
rename to src/Examples/inviscid-flow-vl/LaxFridrichsMomentumBase.h
index 67dae9fdf8256cecf032a731dd5d616d715ca0fe..cc2561748968a2bd808fa434d4f3d87d41765f45 100644
--- a/examples/inviscid-flow-sw/LaxFridrichsMomentumBase.h
+++ b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumBase.h
@@ -28,8 +28,8 @@ class LaxFridrichsMomentumBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
       LaxFridrichsMomentumBase()
        : artificialViscosity( 1.0 ){};
diff --git a/examples/inviscid-flow-vl/LaxFridrichsMomentumX.h b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumX.h
similarity index 100%
rename from examples/inviscid-flow-vl/LaxFridrichsMomentumX.h
rename to src/Examples/inviscid-flow-vl/LaxFridrichsMomentumX.h
diff --git a/examples/inviscid-flow-vl/LaxFridrichsMomentumY.h b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumY.h
similarity index 100%
rename from examples/inviscid-flow-vl/LaxFridrichsMomentumY.h
rename to src/Examples/inviscid-flow-vl/LaxFridrichsMomentumY.h
diff --git a/examples/inviscid-flow-vl/LaxFridrichsMomentumZ.h b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumZ.h
similarity index 100%
rename from examples/inviscid-flow-vl/LaxFridrichsMomentumZ.h
rename to src/Examples/inviscid-flow-vl/LaxFridrichsMomentumZ.h
diff --git a/src/Examples/inviscid-flow-vl/PhysicalVariablesGetter.h b/src/Examples/inviscid-flow-vl/PhysicalVariablesGetter.h
new file mode 100644
index 0000000000000000000000000000000000000000..2af0a02b820db183fa32669dcecfe63f7f006990
--- /dev/null
+++ b/src/Examples/inviscid-flow-vl/PhysicalVariablesGetter.h
@@ -0,0 +1,122 @@
+/***************************************************************************
+                          CompressibleConservativeVariables.h  -  description
+                             -------------------
+    begin                : Feb 12, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Pointers/SharedPointer.h>
+#include <TNL/Functions/MeshFunction.h>
+#include <TNL/Functions/VectorField.h>
+#include <TNL/Functions/MeshFunctionEvaluator.h>
+#include "CompressibleConservativeVariables.h"
+
+namespace TNL {
+   
+template< typename Mesh >
+class PhysicalVariablesGetter
+{
+   public:
+      
+      typedef Mesh MeshType;
+      typedef typename MeshType::RealType RealType;
+      typedef typename MeshType::DeviceType DeviceType;
+      typedef typename MeshType::IndexType IndexType;
+      static const int Dimensions = MeshType::getMeshDimension();
+      
+      typedef Functions::MeshFunction< MeshType > MeshFunctionType;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef CompressibleConservativeVariables< MeshType > ConservativeVariablesType;
+      typedef Pointers::SharedPointer< ConservativeVariablesType > ConservativeVariablesPointer;
+      typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      
+      class VelocityGetter : public Functions::Domain< Dimensions, Functions::MeshDomain >
+      {
+         public:
+            typedef typename MeshType::RealType RealType;
+            
+            VelocityGetter( MeshFunctionPointer density, 
+                            MeshFunctionPointer momentum )
+            : density( density ), momentum( momentum ) {}
+            
+            template< typename EntityType >
+            __cuda_callable__
+            RealType operator()( const EntityType& meshEntity,
+                                        const RealType& time = 0.0 ) const
+            {
+               if( density.template getData< DeviceType >()( meshEntity ) == 0.0 )
+                  return 0;
+               else
+                  return momentum.template getData< DeviceType >()( meshEntity ) / 
+                         density.template getData< DeviceType >()( meshEntity );
+            }
+            
+         protected:
+            const MeshFunctionPointer density, momentum;
+      };
+      
+      class PressureGetter : public Functions::Domain< Dimensions, Functions::MeshDomain >
+      {
+         public:
+            typedef typename MeshType::RealType RealType;
+            
+            PressureGetter( MeshFunctionPointer density,
+                            MeshFunctionPointer energy, 
+                            VelocityFieldPointer momentum,
+                            const RealType& gamma )
+            : density( density ), energy( energy ), momentum( momentum ), gamma( gamma ) {}
+            
+            template< typename EntityType >
+            __cuda_callable__
+            RealType operator()( const EntityType& meshEntity,
+                                 const RealType& time = 0.0 ) const
+            {
+               const RealType e = energy.template getData< DeviceType >()( meshEntity );
+               const RealType rho = density.template getData< DeviceType >()( meshEntity );
+               const RealType momentumNorm = momentum.template getData< DeviceType >().getVector( meshEntity ).lpNorm( 2.0 );
+               if( rho == 0.0 )
+                  return 0;
+               else
+                  return ( gamma - 1.0 ) * ( e - 0.5 * momentumNorm * momentumNorm / rho );
+            }
+            
+         protected:
+            const MeshFunctionPointer density, energy;
+            const VelocityFieldPointer momentum;
+            const RealType gamma;
+      };      
+
+      
+      void getVelocity( const ConservativeVariablesPointer& conservativeVariables,
+                        VelocityFieldPointer& velocity )
+      {
+         Functions::MeshFunctionEvaluator< MeshFunctionType, VelocityGetter > evaluator;
+         for( int i = 0; i < Dimensions; i++ )
+         {
+            Pointers::SharedPointer< VelocityGetter, DeviceType > velocityGetter( conservativeVariables->getDensity(),
+                                                                        ( *conservativeVariables->getMomentum() )[ i ] );
+            evaluator.evaluate( ( *velocity )[ i ], velocityGetter );
+         }
+      }
+      
+      void getPressure( const ConservativeVariablesPointer& conservativeVariables,
+                        const RealType& gamma,
+                        MeshFunctionPointer& pressure )
+      {
+         Functions::MeshFunctionEvaluator< MeshFunctionType, PressureGetter > evaluator;
+         Pointers::SharedPointer< PressureGetter, DeviceType > pressureGetter( conservativeVariables->getDensity(),
+                                                                     conservativeVariables->getEnergy(),
+                                                                     conservativeVariables->getMomentum(),
+                                                                     gamma );
+         evaluator.evaluate( pressure, pressureGetter );
+      }
+      
+};
+   
+} //namespace TNL
diff --git a/examples/inviscid-flow-sw/RiemannProblemInitialCondition.h b/src/Examples/inviscid-flow-vl/RiemannProblemInitialCondition.h
similarity index 99%
rename from examples/inviscid-flow-sw/RiemannProblemInitialCondition.h
rename to src/Examples/inviscid-flow-vl/RiemannProblemInitialCondition.h
index 85fbc8af01994495e2c1f2f7d95f8155ee216684..b8e53d60178f44111018dcc67fe5e244eda942d5 100644
--- a/examples/inviscid-flow-sw/RiemannProblemInitialCondition.h
+++ b/src/Examples/inviscid-flow-vl/RiemannProblemInitialCondition.h
@@ -38,7 +38,7 @@ class RiemannProblemInitialConditionSetter< Meshes::Grid< 1,MeshReal, Device, Me
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
 //       for cyklus i = 0 to mesh.getDimensions().x() j pro .y() a k pro .z()
 //       typedef typename MeshType::Cell CellType
@@ -195,7 +195,7 @@ class RiemannProblemInitialConditionSetter< Meshes::Grid< 2, MeshReal, Device, M
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
 //       for cyklus i = 0 to mesh.getDimensions().x() j pro .y() a k pro .z()
 //       typedef typename MeshType::Cell CellType
@@ -415,7 +415,7 @@ class RiemannProblemInitialConditionSetter< Meshes::Grid< 3, MeshReal, Device, M
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
 //       for cyklus i = 0 to mesh.getDimensions().x() j pro .y() a k pro .z()
 //       typedef typename MeshType::Cell CellType
@@ -768,7 +768,7 @@ class RiemannProblemInitialCondition
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
       
       RiemannProblemInitialCondition()
@@ -1342,7 +1342,7 @@ class RiemannProblemInitialCondition
          typedef Functions::Analytic::VectorNorm< Dimensions, RealType > VectorNormType;
          typedef Operators::Analytic::Sign< Dimensions, RealType > SignType;
          typedef Functions::OperatorFunction< SignType, VectorNormType > InitialConditionType;
-         typedef SharedPointer< InitialConditionType, DeviceType > InitialConditionPointer;
+         typedef Pointers::SharedPointer< InitialConditionType, DeviceType > InitialConditionPointer;
          
          InitialConditionPointer initialCondition;
          initialCondition->getFunction().setCenter( center );
diff --git a/examples/inviscid-flow-vl/Upwind.h b/src/Examples/inviscid-flow-vl/Upwind.h
similarity index 88%
rename from examples/inviscid-flow-vl/Upwind.h
rename to src/Examples/inviscid-flow-vl/Upwind.h
index 263da044a2edaca855b6c6f3fd050bd10cc7c689..529e0aadf7e68c86c97393e122927142014c8c45 100644
--- a/examples/inviscid-flow-vl/Upwind.h
+++ b/src/Examples/inviscid-flow-vl/Upwind.h
@@ -43,15 +43,15 @@ class Upwind
       typedef UpwindMomentumZ< Mesh, Real, Index > MomentumZOperatorType;
       typedef UpwindEnergy< Mesh, Real, Index > EnergyOperatorType;
 
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VectorFieldType > VectorFieldPointer;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VectorFieldType > VectorFieldPointer;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;
       
-      typedef SharedPointer< ContinuityOperatorType > ContinuityOperatorPointer;
-      typedef SharedPointer< MomentumXOperatorType > MomentumXOperatorPointer;
-      typedef SharedPointer< MomentumYOperatorType > MomentumYOperatorPointer;      
-      typedef SharedPointer< MomentumZOperatorType > MomentumZOperatorPointer;      
-      typedef SharedPointer< EnergyOperatorType > EnergyOperatorPointer;
+      typedef Pointers::SharedPointer< ContinuityOperatorType > ContinuityOperatorPointer;
+      typedef Pointers::SharedPointer< MomentumXOperatorType > MomentumXOperatorPointer;
+      typedef Pointers::SharedPointer< MomentumYOperatorType > MomentumYOperatorPointer;      
+      typedef Pointers::SharedPointer< MomentumZOperatorType > MomentumZOperatorPointer;      
+      typedef Pointers::SharedPointer< EnergyOperatorType > EnergyOperatorPointer;
 
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
diff --git a/examples/inviscid-flow-vl/UpwindContinuity.h b/src/Examples/inviscid-flow-vl/UpwindContinuity.h
similarity index 98%
rename from examples/inviscid-flow-vl/UpwindContinuity.h
rename to src/Examples/inviscid-flow-vl/UpwindContinuity.h
index 144e04f5017c7c7ac39a1886374232d7e6fb511f..4a21cd502b3f54f898a25ef85be84dcf52a52f5d 100644
--- a/examples/inviscid-flow-vl/UpwindContinuity.h
+++ b/src/Examples/inviscid-flow-vl/UpwindContinuity.h
@@ -14,7 +14,7 @@
 #include <TNL/Containers/Vector.h>
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Functions/VectorField.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 
 namespace TNL {
 
@@ -34,8 +34,8 @@ class UpwindContinuityBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
 
       static String getType()
       {
diff --git a/examples/inviscid-flow-vl/UpwindEnergy.h b/src/Examples/inviscid-flow-vl/UpwindEnergy.h
similarity index 99%
rename from examples/inviscid-flow-vl/UpwindEnergy.h
rename to src/Examples/inviscid-flow-vl/UpwindEnergy.h
index d9ddc728eba03ad0daaf8b4424a6b2d2c5f0efbc..e3857cbcdf2e29110d01e3f6140a1f7da6b9f0b6 100644
--- a/examples/inviscid-flow-vl/UpwindEnergy.h
+++ b/src/Examples/inviscid-flow-vl/UpwindEnergy.h
@@ -30,8 +30,8 @@ class UpwindEnergyBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
       UpwindEnergyBase()
        : artificialViscosity( 1.0 ){};
diff --git a/examples/inviscid-flow-vl/UpwindMomentumBase.h b/src/Examples/inviscid-flow-vl/UpwindMomentumBase.h
similarity index 96%
rename from examples/inviscid-flow-vl/UpwindMomentumBase.h
rename to src/Examples/inviscid-flow-vl/UpwindMomentumBase.h
index 209a58ac6deb3711a45eefee1b95d85822c5cc8d..93b470a2822c6e4d33b700a41eadf4953ec1d01c 100644
--- a/examples/inviscid-flow-vl/UpwindMomentumBase.h
+++ b/src/Examples/inviscid-flow-vl/UpwindMomentumBase.h
@@ -28,8 +28,8 @@ class UpwindMomentumBase
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
       
 
       void setTau(const Real& tau)
diff --git a/examples/inviscid-flow-vl/UpwindMomentumX.h b/src/Examples/inviscid-flow-vl/UpwindMomentumX.h
similarity index 100%
rename from examples/inviscid-flow-vl/UpwindMomentumX.h
rename to src/Examples/inviscid-flow-vl/UpwindMomentumX.h
diff --git a/examples/inviscid-flow-vl/UpwindMomentumY.h b/src/Examples/inviscid-flow-vl/UpwindMomentumY.h
similarity index 100%
rename from examples/inviscid-flow-vl/UpwindMomentumY.h
rename to src/Examples/inviscid-flow-vl/UpwindMomentumY.h
diff --git a/examples/inviscid-flow-vl/UpwindMomentumZ.h b/src/Examples/inviscid-flow-vl/UpwindMomentumZ.h
similarity index 100%
rename from examples/inviscid-flow-vl/UpwindMomentumZ.h
rename to src/Examples/inviscid-flow-vl/UpwindMomentumZ.h
diff --git a/examples/inviscid-flow-vl/euler.cpp b/src/Examples/inviscid-flow-vl/euler.cpp
similarity index 100%
rename from examples/inviscid-flow-vl/euler.cpp
rename to src/Examples/inviscid-flow-vl/euler.cpp
diff --git a/examples/inviscid-flow-vl/euler.cu b/src/Examples/inviscid-flow-vl/euler.cu
similarity index 100%
rename from examples/inviscid-flow-vl/euler.cu
rename to src/Examples/inviscid-flow-vl/euler.cu
diff --git a/examples/inviscid-flow-vl/euler.h b/src/Examples/inviscid-flow-vl/euler.h
similarity index 100%
rename from examples/inviscid-flow-vl/euler.h
rename to src/Examples/inviscid-flow-vl/euler.h
diff --git a/examples/inviscid-flow-vl/eulerBuildConfigTag.h b/src/Examples/inviscid-flow-vl/eulerBuildConfigTag.h
similarity index 100%
rename from examples/inviscid-flow-vl/eulerBuildConfigTag.h
rename to src/Examples/inviscid-flow-vl/eulerBuildConfigTag.h
diff --git a/examples/inviscid-flow-sw/eulerProblem.h b/src/Examples/inviscid-flow-vl/eulerProblem.h
similarity index 84%
rename from examples/inviscid-flow-sw/eulerProblem.h
rename to src/Examples/inviscid-flow-vl/eulerProblem.h
index 4ee775becb2032f7f2802bd721af62b56021bed3..9ff30a5d5ba66d9b3b4d6fbb81cc0c07aaca26c8 100644
--- a/examples/inviscid-flow-sw/eulerProblem.h
+++ b/src/Examples/inviscid-flow-vl/eulerProblem.h
@@ -48,12 +48,12 @@ class eulerProblem:
       typedef Functions::MeshFunction< Mesh > MeshFunctionType;
       typedef CompressibleConservativeVariables< MeshType > ConservativeVariablesType;
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
-      typedef SharedPointer< ConservativeVariablesType > ConservativeVariablesPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
-      typedef SharedPointer< InviscidOperators > InviscidOperatorsPointer;
-      typedef SharedPointer< BoundaryCondition > BoundaryConditionPointer;
-      typedef SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< ConservativeVariablesType > ConservativeVariablesPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< InviscidOperators > InviscidOperatorsPointer;
+      typedef Pointers::SharedPointer< BoundaryCondition > BoundaryConditionPointer;
+      typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
       using CommunicatorType = Communicator;
 
       static String getTypeStatic();
@@ -84,6 +84,9 @@ class eulerProblem:
                               const RealType& tau,
                               DofVectorPointer& _u,
                               DofVectorPointer& _fu );
+      
+      void applyBoundaryConditions( const RealType& time,
+                                    DofVectorPointer& dofs ) { TNL_ASSERT( false, "TODO:Implement")};      
 
       template< typename Matrix >
       void assemblyLinearSystem( const RealType& time,
diff --git a/examples/inviscid-flow-vl/eulerProblem_impl.h b/src/Examples/inviscid-flow-vl/eulerProblem_impl.h
similarity index 100%
rename from examples/inviscid-flow-vl/eulerProblem_impl.h
rename to src/Examples/inviscid-flow-vl/eulerProblem_impl.h
diff --git a/examples/inviscid-flow-vl/eulerRhs.h b/src/Examples/inviscid-flow-vl/eulerRhs.h
similarity index 100%
rename from examples/inviscid-flow-vl/eulerRhs.h
rename to src/Examples/inviscid-flow-vl/eulerRhs.h
diff --git a/examples/inviscid-flow-vl/run-euler-vl b/src/Examples/inviscid-flow-vl/run-euler-vl
similarity index 100%
rename from examples/inviscid-flow-vl/run-euler-vl
rename to src/Examples/inviscid-flow-vl/run-euler-vl
diff --git a/examples/inviscid-flow/1d/MyMixedBoundaryConditions.h b/src/Examples/inviscid-flow/1d/MyMixedBoundaryConditions.h
similarity index 98%
rename from examples/inviscid-flow/1d/MyMixedBoundaryConditions.h
rename to src/Examples/inviscid-flow/1d/MyMixedBoundaryConditions.h
index 006a33e53f772dc961c11ad2a5d2d077ee80020e..218805fa13a57b66acc38d4d264a196b001fb3f7 100644
--- a/examples/inviscid-flow/1d/MyMixedBoundaryConditions.h
+++ b/src/Examples/inviscid-flow/1d/MyMixedBoundaryConditions.h
@@ -50,7 +50,7 @@ class MyMixedBoundaryConditions
       typedef typename MeshType::DeviceType DeviceType;
       typedef Index IndexType;
       
-      typedef SharedPointer< Mesh > MeshPointer;
+      typedef Pointers::SharedPointer< Mesh > MeshPointer;
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef typename MeshType::VertexType VertexType;
 
diff --git a/examples/inviscid-flow/1d/MyNeumannBoundaryConditions.h b/src/Examples/inviscid-flow/1d/MyNeumannBoundaryConditions.h
similarity index 98%
rename from examples/inviscid-flow/1d/MyNeumannBoundaryConditions.h
rename to src/Examples/inviscid-flow/1d/MyNeumannBoundaryConditions.h
index cf9b29dcc19b6a781d00e66f1a97b5ce8aa738bb..204be5a3fd4a85dae693fa97354fd2f02e429660 100644
--- a/examples/inviscid-flow/1d/MyNeumannBoundaryConditions.h
+++ b/src/Examples/inviscid-flow/1d/MyNeumannBoundaryConditions.h
@@ -50,7 +50,7 @@ class MyNeumannBoundaryConditions
       typedef typename MeshType::DeviceType DeviceType;
       typedef Index IndexType;
       
-      typedef SharedPointer< Mesh > MeshPointer;
+      typedef Pointers::SharedPointer< Mesh > MeshPointer;
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef typename MeshType::VertexType VertexType;
 
diff --git a/examples/inviscid-flow/1d/tnl-run-euler-1d b/src/Examples/inviscid-flow/1d/tnl-run-euler-1d
similarity index 100%
rename from examples/inviscid-flow/1d/tnl-run-euler-1d
rename to src/Examples/inviscid-flow/1d/tnl-run-euler-1d
diff --git a/examples/inviscid-flow/2d/Euler2DVelXGetter.h b/src/Examples/inviscid-flow/2d/Euler2DVelXGetter.h
similarity index 100%
rename from examples/inviscid-flow/2d/Euler2DVelXGetter.h
rename to src/Examples/inviscid-flow/2d/Euler2DVelXGetter.h
diff --git a/examples/inviscid-flow/3d/MyMixedBoundaryConditions.h b/src/Examples/inviscid-flow/2d/MyMixedBoundaryConditions.h
similarity index 99%
rename from examples/inviscid-flow/3d/MyMixedBoundaryConditions.h
rename to src/Examples/inviscid-flow/2d/MyMixedBoundaryConditions.h
index 066600f483d7e7bcf2adbb0cb6c68ad73eda52a6..dbcd6b5d21834919a5d2ef25245dd82786d656e2 100644
--- a/examples/inviscid-flow/3d/MyMixedBoundaryConditions.h
+++ b/src/Examples/inviscid-flow/2d/MyMixedBoundaryConditions.h
@@ -50,7 +50,7 @@ class MyMixedBoundaryConditions
       typedef typename MeshType::DeviceType DeviceType;
       typedef Index IndexType;
       
-      typedef SharedPointer< Mesh > MeshPointer;
+      typedef Pointers::SharedPointer< Mesh > MeshPointer;
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef typename MeshType::VertexType VertexType;
 
diff --git a/examples/inviscid-flow/2d/MyNeumannBoundaryConditions.h b/src/Examples/inviscid-flow/2d/MyNeumannBoundaryConditions.h
similarity index 99%
rename from examples/inviscid-flow/2d/MyNeumannBoundaryConditions.h
rename to src/Examples/inviscid-flow/2d/MyNeumannBoundaryConditions.h
index 72285a4ae0b46ace0750012d0923500e9b61829b..1db632bf5a7a1c1519f5b6c31a16719139bdd555 100644
--- a/examples/inviscid-flow/2d/MyNeumannBoundaryConditions.h
+++ b/src/Examples/inviscid-flow/2d/MyNeumannBoundaryConditions.h
@@ -50,7 +50,7 @@ class MyNeumannBoundaryConditions
       typedef typename MeshType::DeviceType DeviceType;
       typedef Index IndexType;
       
-      typedef SharedPointer< Mesh > MeshPointer;
+      typedef Pointers::SharedPointer< Mesh > MeshPointer;
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef typename MeshType::VertexType VertexType;
 
diff --git a/examples/inviscid-flow/2d/tnl-run-euler-2d b/src/Examples/inviscid-flow/2d/tnl-run-euler-2d
similarity index 100%
rename from examples/inviscid-flow/2d/tnl-run-euler-2d
rename to src/Examples/inviscid-flow/2d/tnl-run-euler-2d
diff --git a/examples/inviscid-flow/3d/CMakeLists.txt b/src/Examples/inviscid-flow/3d/CMakeLists.txt
similarity index 100%
rename from examples/inviscid-flow/3d/CMakeLists.txt
rename to src/Examples/inviscid-flow/3d/CMakeLists.txt
diff --git a/examples/inviscid-flow/3d/Euler2DVelXGetter.h b/src/Examples/inviscid-flow/3d/Euler2DVelXGetter.h
similarity index 100%
rename from examples/inviscid-flow/3d/Euler2DVelXGetter.h
rename to src/Examples/inviscid-flow/3d/Euler2DVelXGetter.h
diff --git a/examples/inviscid-flow/3d/EulerPressureGetter.h b/src/Examples/inviscid-flow/3d/EulerPressureGetter.h
similarity index 100%
rename from examples/inviscid-flow/3d/EulerPressureGetter.h
rename to src/Examples/inviscid-flow/3d/EulerPressureGetter.h
diff --git a/examples/inviscid-flow/3d/EulerVelGetter.h b/src/Examples/inviscid-flow/3d/EulerVelGetter.h
similarity index 100%
rename from examples/inviscid-flow/3d/EulerVelGetter.h
rename to src/Examples/inviscid-flow/3d/EulerVelGetter.h
diff --git a/examples/inviscid-flow/3d/LaxFridrichs3D.h b/src/Examples/inviscid-flow/3d/LaxFridrichs3D.h
similarity index 100%
rename from examples/inviscid-flow/3d/LaxFridrichs3D.h
rename to src/Examples/inviscid-flow/3d/LaxFridrichs3D.h
diff --git a/examples/inviscid-flow/3d/LaxFridrichsContinuity_impl .h b/src/Examples/inviscid-flow/3d/LaxFridrichsContinuity_impl .h
similarity index 100%
rename from examples/inviscid-flow/3d/LaxFridrichsContinuity_impl .h
rename to src/Examples/inviscid-flow/3d/LaxFridrichsContinuity_impl .h
diff --git a/examples/inviscid-flow/3d/LaxFridrichsEnergy.h b/src/Examples/inviscid-flow/3d/LaxFridrichsEnergy.h
similarity index 100%
rename from examples/inviscid-flow/3d/LaxFridrichsEnergy.h
rename to src/Examples/inviscid-flow/3d/LaxFridrichsEnergy.h
diff --git a/examples/inviscid-flow/3d/LaxFridrichsMomentumX.h b/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumX.h
similarity index 100%
rename from examples/inviscid-flow/3d/LaxFridrichsMomentumX.h
rename to src/Examples/inviscid-flow/3d/LaxFridrichsMomentumX.h
diff --git a/examples/inviscid-flow/3d/LaxFridrichsMomentumZ.h b/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumZ.h
similarity index 100%
rename from examples/inviscid-flow/3d/LaxFridrichsMomentumZ.h
rename to src/Examples/inviscid-flow/3d/LaxFridrichsMomentumZ.h
diff --git a/examples/inviscid-flow/2d/MyMixedBoundaryConditions.h b/src/Examples/inviscid-flow/3d/MyMixedBoundaryConditions.h
similarity index 99%
rename from examples/inviscid-flow/2d/MyMixedBoundaryConditions.h
rename to src/Examples/inviscid-flow/3d/MyMixedBoundaryConditions.h
index 066600f483d7e7bcf2adbb0cb6c68ad73eda52a6..dbcd6b5d21834919a5d2ef25245dd82786d656e2 100644
--- a/examples/inviscid-flow/2d/MyMixedBoundaryConditions.h
+++ b/src/Examples/inviscid-flow/3d/MyMixedBoundaryConditions.h
@@ -50,7 +50,7 @@ class MyMixedBoundaryConditions
       typedef typename MeshType::DeviceType DeviceType;
       typedef Index IndexType;
       
-      typedef SharedPointer< Mesh > MeshPointer;
+      typedef Pointers::SharedPointer< Mesh > MeshPointer;
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef typename MeshType::VertexType VertexType;
 
diff --git a/examples/inviscid-flow/3d/MyNeumannBoundaryConditions.h b/src/Examples/inviscid-flow/3d/MyNeumannBoundaryConditions.h
similarity index 99%
rename from examples/inviscid-flow/3d/MyNeumannBoundaryConditions.h
rename to src/Examples/inviscid-flow/3d/MyNeumannBoundaryConditions.h
index 7987b93160e73f415b86f77882ecb2bc445efaa4..d56b8210f14a40ea69cf92bd3bf19287be61d7d9 100644
--- a/examples/inviscid-flow/3d/MyNeumannBoundaryConditions.h
+++ b/src/Examples/inviscid-flow/3d/MyNeumannBoundaryConditions.h
@@ -50,7 +50,7 @@ class MyNeumannBoundaryConditions
       typedef typename MeshType::DeviceType DeviceType;
       typedef Index IndexType;
       
-      typedef SharedPointer< Mesh > MeshPointer;
+      typedef Pointers::SharedPointer< Mesh > MeshPointer;
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef typename MeshType::VertexType VertexType;
 
diff --git a/examples/inviscid-flow/3d/euler-cuda.cu b/src/Examples/inviscid-flow/3d/euler-cuda.cu
similarity index 100%
rename from examples/inviscid-flow/3d/euler-cuda.cu
rename to src/Examples/inviscid-flow/3d/euler-cuda.cu
diff --git a/examples/inviscid-flow/3d/euler.cpp b/src/Examples/inviscid-flow/3d/euler.cpp
similarity index 100%
rename from examples/inviscid-flow/3d/euler.cpp
rename to src/Examples/inviscid-flow/3d/euler.cpp
diff --git a/examples/inviscid-flow/3d/euler.h b/src/Examples/inviscid-flow/3d/euler.h
similarity index 100%
rename from examples/inviscid-flow/3d/euler.h
rename to src/Examples/inviscid-flow/3d/euler.h
diff --git a/examples/inviscid-flow/3d/eulerBuildConfigTag.h b/src/Examples/inviscid-flow/3d/eulerBuildConfigTag.h
similarity index 100%
rename from examples/inviscid-flow/3d/eulerBuildConfigTag.h
rename to src/Examples/inviscid-flow/3d/eulerBuildConfigTag.h
diff --git a/examples/inviscid-flow/3d/eulerProblem.h b/src/Examples/inviscid-flow/3d/eulerProblem.h
similarity index 92%
rename from examples/inviscid-flow/3d/eulerProblem.h
rename to src/Examples/inviscid-flow/3d/eulerProblem.h
index 6869dcb525083d855a32d681ed3754d05e0cd930..d2ff1fc109c6d548ac503d82e398aa7c5b19c066 100644
--- a/examples/inviscid-flow/3d/eulerProblem.h
+++ b/src/Examples/inviscid-flow/3d/eulerProblem.h
@@ -23,10 +23,10 @@ class eulerProblem:
       typedef typename Mesh::DeviceType DeviceType;
       typedef typename DifferentialOperator::IndexType IndexType;
       typedef Functions::MeshFunction< Mesh > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
-      typedef SharedPointer< DifferentialOperator > DifferentialOperatorPointer;
-      typedef SharedPointer< BoundaryCondition > BoundaryConditionPointer;
-      typedef SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< DifferentialOperator > DifferentialOperatorPointer;
+      typedef Pointers::SharedPointer< BoundaryCondition > BoundaryConditionPointer;
+      typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
       typedef PDEProblem< Mesh, RealType, DeviceType, IndexType > BaseType;      
       
       using typename BaseType::MeshType;
diff --git a/examples/inviscid-flow/3d/eulerProblem_impl.h b/src/Examples/inviscid-flow/3d/eulerProblem_impl.h
similarity index 100%
rename from examples/inviscid-flow/3d/eulerProblem_impl.h
rename to src/Examples/inviscid-flow/3d/eulerProblem_impl.h
diff --git a/examples/inviscid-flow/3d/eulerRhs.h b/src/Examples/inviscid-flow/3d/eulerRhs.h
similarity index 100%
rename from examples/inviscid-flow/3d/eulerRhs.h
rename to src/Examples/inviscid-flow/3d/eulerRhs.h
diff --git a/examples/inviscid-flow/3d/run-euler b/src/Examples/inviscid-flow/3d/run-euler
similarity index 100%
rename from examples/inviscid-flow/3d/run-euler
rename to src/Examples/inviscid-flow/3d/run-euler
diff --git a/examples/inviscid-flow/3d/tnl-run-euler-2d b/src/Examples/inviscid-flow/3d/tnl-run-euler-2d
similarity index 100%
rename from examples/inviscid-flow/3d/tnl-run-euler-2d
rename to src/Examples/inviscid-flow/3d/tnl-run-euler-2d
diff --git a/examples/inviscid-flow/CMakeLists.txt b/src/Examples/inviscid-flow/CMakeLists.txt
similarity index 100%
rename from examples/inviscid-flow/CMakeLists.txt
rename to src/Examples/inviscid-flow/CMakeLists.txt
diff --git a/examples/flow/CompressibleConservativeVariables.h b/src/Examples/inviscid-flow/CompressibleConservativeVariables.h
similarity index 93%
rename from examples/flow/CompressibleConservativeVariables.h
rename to src/Examples/inviscid-flow/CompressibleConservativeVariables.h
index a3afc845366f8df17b41c5affc5a4e49d5da052a..ca77bb281ffef853a96172e9da57749247b4a456 100644
--- a/examples/flow/CompressibleConservativeVariables.h
+++ b/src/Examples/inviscid-flow/CompressibleConservativeVariables.h
@@ -13,7 +13,7 @@
 
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Functions/VectorField.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 
 namespace TNL {
 
@@ -28,9 +28,9 @@ class CompressibleConservativeVariables
       typedef typename MeshType::IndexType IndexType;
       typedef Functions::MeshFunction< Mesh > MeshFunctionType;
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshType > MeshPointer;      
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
-      typedef SharedPointer< VelocityFieldType > MomentumFieldPointer;
+      typedef Pointers::SharedPointer<  MeshType > MeshPointer;      
+      typedef Pointers::SharedPointer<  MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer<  VelocityFieldType > MomentumFieldPointer;
       
       CompressibleConservativeVariables(){};
       
diff --git a/src/Examples/inviscid-flow/LaxFridrichs.h b/src/Examples/inviscid-flow/LaxFridrichs.h
new file mode 100644
index 0000000000000000000000000000000000000000..1a648234a89be3f4e391cce6d7317a64a3114b33
--- /dev/null
+++ b/src/Examples/inviscid-flow/LaxFridrichs.h
@@ -0,0 +1,141 @@
+/***************************************************************************
+                          LaxFridrichs.h  -  description
+                             -------------------
+    begin                : Feb 18, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Meshes/Grid.h>
+#include <TNL/Functions/VectorField.h>
+
+#include "LaxFridrichsContinuity.h"
+#include "LaxFridrichsEnergy.h"
+#include "LaxFridrichsMomentumX.h"
+#include "LaxFridrichsMomentumY.h"
+#include "LaxFridrichsMomentumZ.h"
+
+namespace TNL {
+
+template< typename Mesh,
+          typename Real = typename Mesh::RealType,
+          typename Index = typename Mesh::IndexType >
+class LaxFridrichs
+{
+   public:
+      typedef Mesh MeshType;
+      typedef Real RealType;
+      typedef typename Mesh::DeviceType DeviceType;
+      typedef Index IndexType;
+      typedef Functions::MeshFunction< Mesh > MeshFunctionType;
+      static const int Dimensions = Mesh::getMeshDimension();
+      typedef Functions::VectorField< Dimensions, MeshFunctionType > VectorFieldType;
+ 
+      typedef LaxFridrichsContinuity< Mesh, Real, Index > ContinuityOperatorType;
+      typedef LaxFridrichsMomentumX< Mesh, Real, Index > MomentumXOperatorType;
+      typedef LaxFridrichsMomentumY< Mesh, Real, Index > MomentumYOperatorType;
+      typedef LaxFridrichsMomentumZ< Mesh, Real, Index > MomentumZOperatorType;
+      typedef LaxFridrichsEnergy< Mesh, Real, Index > EnergyOperatorType;
+
+      typedef Pointers::SharedPointer<  MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer<  VectorFieldType > VectorFieldPointer;
+      typedef Pointers::SharedPointer<  MeshType > MeshPointer;
+      
+      typedef Pointers::SharedPointer<  ContinuityOperatorType > ContinuityOperatorPointer;
+      typedef Pointers::SharedPointer<  MomentumXOperatorType > MomentumXOperatorPointer;
+      typedef Pointers::SharedPointer<  MomentumYOperatorType > MomentumYOperatorPointer;      
+      typedef Pointers::SharedPointer<  MomentumZOperatorType > MomentumZOperatorPointer;      
+      typedef Pointers::SharedPointer<  EnergyOperatorType > EnergyOperatorPointer;
+
+      static void configSetup( Config::ConfigDescription& config,
+                               const String& prefix = "" )
+      {
+         config.addEntry< double >( prefix + "numerical-viscosity", "Value of artificial (numerical) viscosity in the Lax-Fridrichs scheme", 1.0 );
+      }
+      
+      LaxFridrichs()
+         : artificialViscosity( 1.0 ) {}
+      
+      bool setup( const MeshPointer& meshPointer,
+                  const Config::ParameterContainer& parameters,
+                  const String& prefix = "" )
+      {
+         this->artificialViscosity = parameters.getParameter< double >( prefix + "numerical-viscosity" );
+         this->continuityOperatorPointer->setArtificialViscosity( artificialViscosity );
+         this->momentumXOperatorPointer->setArtificialViscosity( artificialViscosity );
+         this->momentumYOperatorPointer->setArtificialViscosity( artificialViscosity );
+         this->momentumZOperatorPointer->setArtificialViscosity( artificialViscosity );
+         this->energyOperatorPointer->setArtificialViscosity( artificialViscosity );
+         
+         return true;
+      }
+      
+      void setTau( const RealType& tau )
+      {
+         this->continuityOperatorPointer->setTau( tau );
+         this->momentumXOperatorPointer->setTau( tau );
+         this->momentumYOperatorPointer->setTau( tau );
+         this->momentumZOperatorPointer->setTau( tau );
+         this->energyOperatorPointer->setTau( tau );
+      }
+      
+      void setPressure( const MeshFunctionPointer& pressure )
+      {
+         this->momentumXOperatorPointer->setPressure( pressure );
+         this->momentumYOperatorPointer->setPressure( pressure );
+         this->momentumZOperatorPointer->setPressure( pressure );
+         this->energyOperatorPointer->setPressure( pressure );
+      }
+      
+      void setVelocity( const VectorFieldPointer& velocity )
+      {
+         this->continuityOperatorPointer->setVelocity( velocity );
+         this->momentumXOperatorPointer->setVelocity( velocity );
+         this->momentumYOperatorPointer->setVelocity( velocity );
+         this->momentumZOperatorPointer->setVelocity( velocity );
+         this->energyOperatorPointer->setVelocity( velocity );
+      }
+      
+      const ContinuityOperatorPointer& getContinuityOperator() const
+      {
+         return this->continuityOperatorPointer;
+      }
+      
+      const MomentumXOperatorPointer& getMomentumXOperator() const
+      {
+         return this->momentumXOperatorPointer;
+      }
+
+      const MomentumYOperatorPointer& getMomentumYOperator() const
+      {
+         return this->momentumYOperatorPointer;
+      }
+      
+      const MomentumZOperatorPointer& getMomentumZOperator() const
+      {
+         return this->momentumZOperatorPointer;
+      }
+      
+      const EnergyOperatorPointer& getEnergyOperator() const
+      {
+         return this->energyOperatorPointer;
+      }
+
+   protected:
+      
+      ContinuityOperatorPointer continuityOperatorPointer;
+      MomentumXOperatorPointer momentumXOperatorPointer;
+      MomentumYOperatorPointer momentumYOperatorPointer;
+      MomentumZOperatorPointer momentumZOperatorPointer;
+      EnergyOperatorPointer energyOperatorPointer;  
+      
+      RealType artificialViscosity;
+};
+
+} //namespace TNL
diff --git a/src/Examples/inviscid-flow/LaxFridrichsContinuity.h b/src/Examples/inviscid-flow/LaxFridrichsContinuity.h
new file mode 100644
index 0000000000000000000000000000000000000000..0ae10b4f9399fc41c97d5ad35fba4748f98bef95
--- /dev/null
+++ b/src/Examples/inviscid-flow/LaxFridrichsContinuity.h
@@ -0,0 +1,288 @@
+/***************************************************************************
+                          LaxFridrichsContinuity.h  -  description
+                             -------------------
+    begin                : Feb 17, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Meshes/Grid.h>
+#include <TNL/Functions/VectorField.h>
+#include <TNL/Pointers/SharedPointer.h>
+
+namespace TNL {
+
+   
+template< typename Mesh,
+          typename Real = typename Mesh::RealType,
+          typename Index = typename Mesh::IndexType >
+class LaxFridrichsContinuityBase
+{
+   public:
+      
+      typedef Real RealType;
+      typedef Index IndexType;
+      typedef Mesh MeshType;
+      typedef typename MeshType::DeviceType DeviceType;
+      typedef typename MeshType::CoordinatesType CoordinatesType;
+      typedef Functions::MeshFunction< MeshType > MeshFunctionType;
+      static const int Dimensions = MeshType::getMeshDimension();
+      typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
+      typedef Pointers::SharedPointer<  VelocityFieldType > VelocityFieldPointer;
+
+      LaxFridrichsContinuityBase()
+       : artificialViscosity( 1.0 ){};
+      
+      static String getType()
+      {
+         return String( "LaxFridrichsContinuity< " ) +
+             MeshType::getType() + ", " +
+             TNL::getType< Real >() + ", " +
+             TNL::getType< Index >() + " >"; 
+      }
+
+      void setTau(const Real& tau)
+      {
+          this->tau = tau;
+      };
+      
+      void setVelocity( const VelocityFieldPointer& velocity )
+      {
+          this->velocity = velocity;
+      };
+      
+      void setArtificialViscosity( const RealType& artificialViscosity )
+      {
+         this->artificialViscosity = artificialViscosity;
+      }
+
+
+      protected:
+         
+         RealType tau;
+         
+         VelocityFieldPointer velocity;
+         
+         RealType artificialViscosity;
+};
+
+   
+template< typename Mesh,
+          typename Real = typename Mesh::RealType,
+          typename Index = typename Mesh::IndexType >
+class LaxFridrichsContinuity
+{
+};
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+class LaxFridrichsContinuity< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index >
+   : public LaxFridrichsContinuityBase< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index >
+{
+   public:
+      typedef Meshes::Grid< 1, MeshReal, Device, MeshIndex > MeshType;
+      typedef LaxFridrichsContinuityBase< MeshType, Real, Index > BaseType;
+      
+      using typename BaseType::RealType;
+      using typename BaseType::IndexType;
+      using typename BaseType::DeviceType;
+      using typename BaseType::CoordinatesType;
+      using typename BaseType::MeshFunctionType;
+      using typename BaseType::VelocityFieldType;
+      using typename BaseType::VelocityFieldPointer;
+      using BaseType::Dimensions;
+
+      template< typename MeshFunction, typename MeshEntity >
+      __cuda_callable__
+      Real operator()( const MeshFunction& u,
+                       const MeshEntity& entity,
+                       const RealType& time = 0.0 ) const
+      {
+         static_assert( MeshEntity::getEntityDimension() == 1, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshFunction::getEntitiesDimension() == 1, "Wrong preimage function" ); 
+         const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities(); 
+
+         const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1 >(); 
+         const IndexType& center = entity.getIndex(); 
+         const IndexType& east = neighborEntities.template getEntityIndex< 1 >(); 
+         const IndexType& west = neighborEntities.template getEntityIndex< -1 >();
+         const RealType& velocity_x_west = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ west ];
+         const RealType& velocity_x_east = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ east ];
+         return 1.0 / ( 2.0 * this->tau ) * this->artificialViscosity * ( u[ west ] - 2.0 * u[ center ]  + u[ east ] ) 
+               - 0.5 * ( u[ east ] * velocity_x_east - u[ west ] * velocity_x_west ) * hxInverse;
+      }
+
+      /*template< typename MeshEntity >
+      __cuda_callable__
+      Index getLinearSystemRowLength( const MeshType& mesh,
+                                      const IndexType& index,
+                                      const MeshEntity& entity ) const;
+
+      template< typename MeshEntity, typename Vector, typename MatrixRow >
+      __cuda_callable__
+      void updateLinearSystem( const RealType& time,
+                               const RealType& tau,
+                               const MeshType& mesh,
+                               const IndexType& index,
+                               const MeshEntity& entity,
+                               const MeshFunctionType& u,
+                               Vector& b,
+                               MatrixRow& matrixRow ) const;*/
+};
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+class LaxFridrichsContinuity< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index >
+   : public LaxFridrichsContinuityBase< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index >
+{
+   public:
+      typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType;
+      typedef LaxFridrichsContinuityBase< MeshType, Real, Index > BaseType;
+      
+      using typename BaseType::RealType;
+      using typename BaseType::IndexType;
+      using typename BaseType::DeviceType;
+      using typename BaseType::CoordinatesType;
+      using typename BaseType::MeshFunctionType;
+      using typename BaseType::VelocityFieldType;
+      using typename BaseType::VelocityFieldPointer;
+      using BaseType::Dimensions;      
+
+      template< typename MeshFunction, typename MeshEntity >
+      __cuda_callable__
+      Real operator()( const MeshFunction& u,
+                       const MeshEntity& entity,
+                       const RealType& time = 0.0 ) const
+      {
+         static_assert( MeshEntity::getEntityDimension() == 2, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshFunction::getEntitiesDimension() == 2, "Wrong preimage function" ); 
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities(); 
+
+         //rho
+         const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1, 0 >(); 
+         const RealType& hyInverse = entity.getMesh().template getSpaceStepsProducts< 0, -1 >(); 
+         const IndexType& center = entity.getIndex(); 
+         const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0 >(); 
+         const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0 >(); 
+         const IndexType& north = neighborEntities.template getEntityIndex<  0,  1 >(); 
+         const IndexType& south = neighborEntities.template getEntityIndex<  0, -1 >();
+         const RealType& velocity_x_west = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ west ];
+         const RealType& velocity_x_east = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ east ];
+         const RealType& velocity_y_north = this->velocity.template getData< DeviceType >()[ 1 ].template getData< DeviceType >()[ north ];
+         const RealType& velocity_y_south = this->velocity.template getData< DeviceType >()[ 1 ].template getData< DeviceType >()[ south ];
+         
+         return 1.0 / ( 4.0 * this->tau ) * this->artificialViscosity * ( u[ west ] + u[ east ] + u[ south ] + u[ north ] - 4.0 * u[ center ] ) 
+                       - 0.5 * ( ( u[ east ] * velocity_x_east - u[ west ] * velocity_x_west ) * hxInverse
+                               + ( u[ north ] * velocity_y_north - u[ south ] * velocity_y_south ) * hyInverse );
+      }
+
+      /*template< typename MeshEntity >
+      __cuda_callable__
+      Index getLinearSystemRowLength( const MeshType& mesh,
+                                      const IndexType& index,
+                                      const MeshEntity& entity ) const;
+
+      template< typename MeshEntity, typename Vector, typename MatrixRow >
+      __cuda_callable__
+      void updateLinearSystem( const RealType& time,
+                               const RealType& tau,
+                               const MeshType& mesh,
+                               const IndexType& index,
+                               const MeshEntity& entity,
+                               const MeshFunctionType& u,
+                               Vector& b,
+                               MatrixRow& matrixRow ) const;*/
+};
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+class LaxFridrichsContinuity< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index >
+   : public LaxFridrichsContinuityBase< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index >
+{
+   public:
+      typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType;
+      typedef LaxFridrichsContinuityBase< MeshType, Real, Index > BaseType;
+      
+      using typename BaseType::RealType;
+      using typename BaseType::IndexType;
+      using typename BaseType::DeviceType;
+      using typename BaseType::CoordinatesType;
+      using typename BaseType::MeshFunctionType;
+      using typename BaseType::VelocityFieldType;
+      using typename BaseType::VelocityFieldPointer;
+      using BaseType::Dimensions;
+
+      template< typename MeshFunction, typename MeshEntity >
+      __cuda_callable__
+      Real operator()( const MeshFunction& u,
+                       const MeshEntity& entity,
+                       const RealType& time = 0.0 ) const
+      {
+         static_assert( MeshEntity::getEntityDimension() == 3, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshFunction::getEntitiesDimension() == 3, "Wrong preimage function" ); 
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities(); 
+
+         //rho
+         const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1,  0,  0 >(); 
+         const RealType& hyInverse = entity.getMesh().template getSpaceStepsProducts<  0, -1,  0 >(); 
+         const RealType& hzInverse = entity.getMesh().template getSpaceStepsProducts<  0,  0, -1 >(); 
+         const IndexType& center = entity.getIndex(); 
+         const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0,  0 >(); 
+         const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0,  0 >(); 
+         const IndexType& north = neighborEntities.template getEntityIndex<  0,  1,  0 >(); 
+         const IndexType& south = neighborEntities.template getEntityIndex<  0, -1,  0 >();
+         const IndexType& up    = neighborEntities.template getEntityIndex<  0,  0,  1 >(); 
+         const IndexType& down  = neighborEntities.template getEntityIndex<  0,  0, -1 >();
+         
+         const RealType& velocity_x_west  = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ west ];
+         const RealType& velocity_x_east  = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ east ];
+         const RealType& velocity_y_north = this->velocity.template getData< DeviceType >()[ 1 ].template getData< DeviceType >()[ north ];
+         const RealType& velocity_y_south = this->velocity.template getData< DeviceType >()[ 1 ].template getData< DeviceType >()[ south ];
+         const RealType& velocity_z_up    = this->velocity.template getData< DeviceType >()[ 2 ].template getData< DeviceType >()[ up ];
+         const RealType& velocity_z_down  = this->velocity.template getData< DeviceType >()[ 2 ].template getData< DeviceType >()[ down ];
+         
+         return 1.0 / ( 6.0 * this->tau ) * this->artificialViscosity *
+                ( u[ west ] + u[ east ] + u[ south ] + u[ north ] + u[ up ] + u[ down ]- 6.0 * u[ center ] ) 
+                - 0.5 * ( ( u[ east ] * velocity_x_east - u[ west ] * velocity_x_west ) * hxInverse
+                        + ( u[ north ] * velocity_y_north - u[ south ] * velocity_y_south ) * hyInverse
+                        + ( u[ up ] * velocity_z_up - u[ down ] * velocity_z_down ) * hzInverse );
+         
+      }
+
+      /*template< typename MeshEntity >
+      __cuda_callable__
+      Index getLinearSystemRowLength( const MeshType& mesh,
+                                      const IndexType& index,
+                                      const MeshEntity& entity ) const;
+
+      template< typename MeshEntity, typename Vector, typename MatrixRow >
+      __cuda_callable__
+      void updateLinearSystem( const RealType& time,
+                               const RealType& tau,
+                               const MeshType& mesh,
+                               const IndexType& index,
+                               const MeshEntity& entity,
+                               const MeshFunctionType& u,
+                               Vector& b,
+                               MatrixRow& matrixRow ) const;*/
+};
+
+
+} //namespace TNL
diff --git a/src/Examples/inviscid-flow/LaxFridrichsEnergy.h b/src/Examples/inviscid-flow/LaxFridrichsEnergy.h
new file mode 100644
index 0000000000000000000000000000000000000000..8c6791cd17516d877a206adff346900fc80d2462
--- /dev/null
+++ b/src/Examples/inviscid-flow/LaxFridrichsEnergy.h
@@ -0,0 +1,309 @@
+/***************************************************************************
+                          LaxFridrichsEnergy.h  -  description
+                             -------------------
+    begin                : Feb 17, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Meshes/Grid.h>
+
+namespace TNL {
+   
+template< typename Mesh,
+          typename Real = typename Mesh::RealType,
+          typename Index = typename Mesh::IndexType >
+class LaxFridrichsEnergyBase
+{
+   public:
+      
+      typedef Real RealType;
+      typedef Index IndexType;
+      typedef Mesh MeshType;
+      typedef typename MeshType::DeviceType DeviceType;
+      typedef typename MeshType::CoordinatesType CoordinatesType;
+      typedef Functions::MeshFunction< MeshType > MeshFunctionType;
+      static const int Dimensions = MeshType::getMeshDimension();
+      typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
+      typedef Pointers::SharedPointer<  MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer<  VelocityFieldType > VelocityFieldPointer;
+      
+      LaxFridrichsEnergyBase()
+       : artificialViscosity( 1.0 ){};
+
+      static String getType()
+      {
+         return String( "LaxFridrichsEnergy< " ) +
+             MeshType::getType() + ", " +
+             TNL::getType< Real >() + ", " +
+             TNL::getType< Index >() + " >"; 
+      }
+
+      void setTau(const Real& tau)
+      {
+          this->tau = tau;
+      };
+      
+      void setVelocity( const VelocityFieldPointer& velocity )
+      {
+          this->velocity = velocity;
+      };
+      
+      void setPressure( const MeshFunctionPointer& pressure )
+      {
+          this->pressure = pressure;
+      };
+      
+      void setArtificialViscosity( const RealType& artificialViscosity )
+      {
+         this->artificialViscosity = artificialViscosity;
+      }      
+
+      protected:
+         
+         RealType tau;
+         
+         VelocityFieldPointer velocity;
+         
+         MeshFunctionPointer pressure;
+         
+         RealType artificialViscosity;
+};
+   
+template< typename Mesh,
+          typename Real = typename Mesh::RealType,
+          typename Index = typename Mesh::IndexType >
+class LaxFridrichsEnergy
+{
+};
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+class LaxFridrichsEnergy< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index >
+   : public LaxFridrichsEnergyBase< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index >
+{
+   public:
+
+      typedef Meshes::Grid< 1, MeshReal, Device, MeshIndex > MeshType;
+      typedef LaxFridrichsEnergyBase< MeshType, Real, Index > BaseType;
+      
+      using typename BaseType::RealType;
+      using typename BaseType::IndexType;
+      using typename BaseType::DeviceType;
+      using typename BaseType::CoordinatesType;
+      using typename BaseType::MeshFunctionType;
+      using typename BaseType::MeshFunctionPointer;
+      using typename BaseType::VelocityFieldType;
+      using typename BaseType::VelocityFieldPointer;
+      using BaseType::Dimensions;      
+      
+      template< typename MeshFunction, typename MeshEntity >
+      __cuda_callable__
+      Real operator()( const MeshFunction& e,
+                       const MeshEntity& entity,
+                       const RealType& time = 0.0 ) const
+      {
+         static_assert( MeshEntity::getEntityDimension() == 1, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshFunction::getEntitiesDimension() == 1, "Wrong preimage function" ); 
+         const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities(); 
+
+         const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1 >(); 
+         const IndexType& center = entity.getIndex(); 
+         const IndexType& east = neighborEntities.template getEntityIndex< 1 >(); 
+         const IndexType& west = neighborEntities.template getEntityIndex< -1 >();
+         const RealType& pressure_west = this->pressure.template getData< DeviceType >()[ west ];
+         const RealType& pressure_east = this->pressure.template getData< DeviceType >()[ east ];
+         const RealType& velocity_x_east = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ east ];
+         const RealType& velocity_x_west = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ west ];
+         return 1.0 / ( 2.0 * this->tau ) * this->artificialViscosity * ( e[ west ] - 2.0 * e[ center ]  + e[ east ] ) 
+                - 0.5 * ( ( e[ east ] + pressure_east ) * velocity_x_east  
+                        - ( e[ west ] + pressure_west ) * velocity_x_west ) * hxInverse;
+  
+      }
+
+      /*template< typename MeshEntity >
+      __cuda_callable__
+      Index getLinearSystemRowLength( const MeshType& mesh,
+                                      const IndexType& index,
+                                      const MeshEntity& entity ) const;
+
+      template< typename MeshEntity, typename Vector, typename MatrixRow >
+      __cuda_callable__
+      void updateLinearSystem( const RealType& time,
+                               const RealType& tau,
+                               const MeshType& mesh,
+                               const IndexType& index,
+                               const MeshEntity& entity,
+                               const MeshFunctionType& u,
+                               Vector& b,
+                               MatrixRow& matrixRow ) const;*/
+};
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+class LaxFridrichsEnergy< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index >
+   : public LaxFridrichsEnergyBase< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index >
+{
+   public:
+      typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType;
+      typedef LaxFridrichsEnergyBase< MeshType, Real, Index > BaseType;
+      
+      using typename BaseType::RealType;
+      using typename BaseType::IndexType;
+      using typename BaseType::DeviceType;
+      using typename BaseType::CoordinatesType;
+      using typename BaseType::MeshFunctionType;
+      using typename BaseType::MeshFunctionPointer;
+      using typename BaseType::VelocityFieldType;
+      using typename BaseType::VelocityFieldPointer;
+      using BaseType::Dimensions;
+      
+
+      template< typename MeshFunction, typename MeshEntity >
+      __cuda_callable__
+      Real operator()( const MeshFunction& e,
+                       const MeshEntity& entity,
+                       const RealType& time = 0.0 ) const
+      {
+         static_assert( MeshEntity::getEntityDimension() == 2, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshFunction::getEntitiesDimension() == 2, "Wrong preimage function" ); 
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities(); 
+ 
+         const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1, 0 >(); 
+         const RealType& hyInverse = entity.getMesh().template getSpaceStepsProducts< 0, -1 >(); 
+         const IndexType& center = entity.getIndex(); 
+         const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0 >(); 
+         const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0 >(); 
+         const IndexType& north = neighborEntities.template getEntityIndex<  0,  1 >(); 
+         const IndexType& south = neighborEntities.template getEntityIndex<  0, -1 >();
+         const RealType& pressure_west = this->pressure.template getData< DeviceType >()[ west ];
+         const RealType& pressure_east = this->pressure.template getData< DeviceType >()[ east ];
+         const RealType& pressure_north = this->pressure.template getData< DeviceType >()[ north ];
+         const RealType& pressure_south = this->pressure.template getData< DeviceType >()[ south ];
+         const RealType& velocity_x_east = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ east ];
+         const RealType& velocity_x_west = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ west ];
+         const RealType& velocity_y_north = this->velocity.template getData< DeviceType >()[ 1 ].template getData< DeviceType >()[ north ];
+         const RealType& velocity_y_south = this->velocity.template getData< DeviceType >()[ 1 ].template getData< DeviceType >()[ south ];         
+         
+         return 1.0 / ( 4.0 * this->tau ) * this->artificialViscosity * ( e[ west ] + e[ east ] + e[ south ] + e[ north ] - 4.0 * e[ center ] ) 
+                - 0.5 * ( ( ( ( e[ east ] + pressure_east ) * velocity_x_east )
+                          -( ( e[ west ] + pressure_west ) * velocity_x_west ) ) * hxInverse
+                        + ( ( ( e[ north ] + pressure_north ) * velocity_y_north )
+                          -( ( e[ south ] + pressure_south ) * velocity_y_south ) ) * hyInverse );
+      }
+
+      /*template< typename MeshEntity >
+      __cuda_callable__
+      Index getLinearSystemRowLength( const MeshType& mesh,
+                                      const IndexType& index,
+                                      const MeshEntity& entity ) const;
+
+      template< typename MeshEntity, typename Vector, typename MatrixRow >
+      __cuda_callable__
+      void updateLinearSystem( const RealType& time,
+                               const RealType& tau,
+                               const MeshType& mesh,
+                               const IndexType& index,
+                               const MeshEntity& entity,
+                               const MeshFunctionType& u,
+                               Vector& b,
+                               MatrixRow& matrixRow ) const;*/
+};
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+class LaxFridrichsEnergy< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index >
+   : public LaxFridrichsEnergyBase< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index >
+{
+   public:
+      typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType;
+      typedef LaxFridrichsEnergyBase< MeshType, Real, Index > BaseType;
+      
+      using typename BaseType::RealType;
+      using typename BaseType::IndexType;
+      using typename BaseType::DeviceType;
+      using typename BaseType::CoordinatesType;
+      using typename BaseType::MeshFunctionType;
+      using typename BaseType::MeshFunctionPointer;
+      using typename BaseType::VelocityFieldType;
+      using typename BaseType::VelocityFieldPointer;
+      using BaseType::Dimensions;      
+
+      template< typename MeshFunction, typename MeshEntity >
+      __cuda_callable__
+      Real operator()( const MeshFunction& e,
+                       const MeshEntity& entity,
+                       const RealType& time = 0.0 ) const
+      {
+         static_assert( MeshEntity::getEntityDimension() == 3, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshFunction::getEntitiesDimension() == 3, "Wrong preimage function" ); 
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities(); 
+ 
+         const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1, 0,  0 >(); 
+         const RealType& hyInverse = entity.getMesh().template getSpaceStepsProducts< 0, -1,  0 >(); 
+         const RealType& hzInverse = entity.getMesh().template getSpaceStepsProducts< 0,  0, -1 >(); 
+         const IndexType& center = entity.getIndex(); 
+         const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0,  0 >(); 
+         const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0,  0 >(); 
+         const IndexType& north = neighborEntities.template getEntityIndex<  0,  1,  0 >(); 
+         const IndexType& south = neighborEntities.template getEntityIndex<  0, -1,  0 >();
+         const IndexType& up    = neighborEntities.template getEntityIndex<  0,  0,  1 >(); 
+         const IndexType& down  = neighborEntities.template getEntityIndex<  0,  0, -1 >();
+         
+         const RealType& pressure_west  = this->pressure.template getData< DeviceType >()[ west ];
+         const RealType& pressure_east  = this->pressure.template getData< DeviceType >()[ east ];
+         const RealType& pressure_north = this->pressure.template getData< DeviceType >()[ north ];
+         const RealType& pressure_south = this->pressure.template getData< DeviceType >()[ south ];
+         const RealType& pressure_up    = this->pressure.template getData< DeviceType >()[ up ];
+         const RealType& pressure_down  = this->pressure.template getData< DeviceType >()[ down ];
+         
+         const RealType& velocity_x_east  = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ east ];
+         const RealType& velocity_x_west  = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ west ];
+         const RealType& velocity_y_north = this->velocity.template getData< DeviceType >()[ 1 ].template getData< DeviceType >()[ north ];
+         const RealType& velocity_y_south = this->velocity.template getData< DeviceType >()[ 1 ].template getData< DeviceType >()[ south ];
+         const RealType& velocity_z_up    = this->velocity.template getData< DeviceType >()[ 2 ].template getData< DeviceType >()[ up ];
+         const RealType& velocity_z_down  = this->velocity.template getData< DeviceType >()[ 2 ].template getData< DeviceType >()[ down ];         
+         
+         return 1.0 / ( 6.0 * this->tau ) * this->artificialViscosity *
+                 ( e[ west ] + e[ east ] + e[ south ] + e[ north ] + e[ up ] + e[ down ] - 6.0 * e[ center ] ) 
+                - 0.5 * ( ( ( ( e[ east ] + pressure_east ) * velocity_x_east )
+                           -( ( e[ west ] + pressure_west ) * velocity_x_west ) ) * hxInverse
+                        + ( ( ( e[ north ] + pressure_north ) * velocity_y_north )
+                           -( ( e[ south ] + pressure_south ) * velocity_y_south ) ) * hyInverse
+                        + ( ( ( e[ up ] + pressure_up ) * velocity_z_up )
+                           -( ( e[ down ] + pressure_down ) * velocity_z_down ) ) * hzInverse );
+      }
+
+      /*template< typename MeshEntity >
+      __cuda_callable__
+      Index getLinearSystemRowLength( const MeshType& mesh,
+                                      const IndexType& index,
+                                      const MeshEntity& entity ) const;
+
+      template< typename MeshEntity, typename Vector, typename MatrixRow >
+      __cuda_callable__
+      void updateLinearSystem( const RealType& time,
+                               const RealType& tau,
+                               const MeshType& mesh,
+                               const IndexType& index,
+                               const MeshEntity& entity,
+                               const MeshFunctionType& u,
+                               Vector& b,
+                               MatrixRow& matrixRow ) const;*/
+};
+
+} //namespace TNL
diff --git a/src/Examples/inviscid-flow/LaxFridrichsMomentumBase.h b/src/Examples/inviscid-flow/LaxFridrichsMomentumBase.h
new file mode 100644
index 0000000000000000000000000000000000000000..96488da1a8acc65b6c7a07f04fc0f835bba5282f
--- /dev/null
+++ b/src/Examples/inviscid-flow/LaxFridrichsMomentumBase.h
@@ -0,0 +1,68 @@
+/***************************************************************************
+                          LaxFridrichsMomentumBase.h  -  description
+                             -------------------
+    begin                : Feb 17, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+
+#pragma once
+
+namespace TNL {
+
+template< typename Mesh,
+          typename Real = typename Mesh::RealType,
+          typename Index = typename Mesh::IndexType >
+class LaxFridrichsMomentumBase
+{
+   public:
+      
+      typedef Real RealType;
+      typedef Index IndexType;
+      typedef Mesh MeshType;
+      typedef typename MeshType::DeviceType DeviceType;
+      typedef typename MeshType::CoordinatesType CoordinatesType;
+      typedef Functions::MeshFunction< MeshType > MeshFunctionType;
+      static const int Dimensions = MeshType::getMeshDimension();
+      typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
+      typedef Pointers::SharedPointer<  MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer<  VelocityFieldType > VelocityFieldPointer;
+      
+      LaxFridrichsMomentumBase()
+       : artificialViscosity( 1.0 ){};
+
+      void setTau(const Real& tau)
+      {
+          this->tau = tau;
+      };
+      
+      void setVelocity( const VelocityFieldPointer& velocity )
+      {
+          this->velocity = velocity;
+      };
+      
+      void setPressure( const MeshFunctionPointer& pressure )
+      {
+          this->pressure = pressure;
+      };
+
+      void setArtificialViscosity( const RealType& artificialViscosity )
+      {
+         this->artificialViscosity = artificialViscosity;
+      }
+
+      protected:
+         
+         RealType tau;
+         
+         VelocityFieldPointer velocity;
+         
+         MeshFunctionPointer pressure;
+         
+         RealType artificialViscosity;
+};
+
+} //namespace TNL
diff --git a/examples/inviscid-flow/LaxFridrichsMomentumX.h b/src/Examples/inviscid-flow/LaxFridrichsMomentumX.h
similarity index 100%
rename from examples/inviscid-flow/LaxFridrichsMomentumX.h
rename to src/Examples/inviscid-flow/LaxFridrichsMomentumX.h
diff --git a/examples/inviscid-flow/LaxFridrichsMomentumY.h b/src/Examples/inviscid-flow/LaxFridrichsMomentumY.h
similarity index 100%
rename from examples/inviscid-flow/LaxFridrichsMomentumY.h
rename to src/Examples/inviscid-flow/LaxFridrichsMomentumY.h
diff --git a/examples/inviscid-flow/LaxFridrichsMomentumZ.h b/src/Examples/inviscid-flow/LaxFridrichsMomentumZ.h
similarity index 100%
rename from examples/inviscid-flow/LaxFridrichsMomentumZ.h
rename to src/Examples/inviscid-flow/LaxFridrichsMomentumZ.h
diff --git a/src/Examples/inviscid-flow/PhysicalVariablesGetter.h b/src/Examples/inviscid-flow/PhysicalVariablesGetter.h
new file mode 100644
index 0000000000000000000000000000000000000000..a391d2d6477327d2a7058354212a9c85431173c8
--- /dev/null
+++ b/src/Examples/inviscid-flow/PhysicalVariablesGetter.h
@@ -0,0 +1,122 @@
+/***************************************************************************
+                          CompressibleConservativeVariables.h  -  description
+                             -------------------
+    begin                : Feb 12, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Pointers/SharedPointer.h>
+#include <TNL/Functions/MeshFunction.h>
+#include <TNL/Functions/VectorField.h>
+#include <TNL/Functions/MeshFunctionEvaluator.h>
+#include "CompressibleConservativeVariables.h"
+
+namespace TNL {
+   
+template< typename Mesh >
+class PhysicalVariablesGetter
+{
+   public:
+      
+      typedef Mesh MeshType;
+      typedef typename MeshType::RealType RealType;
+      typedef typename MeshType::DeviceType DeviceType;
+      typedef typename MeshType::IndexType IndexType;
+      static const int Dimensions = MeshType::getMeshDimension();
+      
+      typedef Functions::MeshFunction< MeshType > MeshFunctionType;
+      typedef Pointers::SharedPointer<  MeshFunctionType > MeshFunctionPointer;
+      typedef CompressibleConservativeVariables< MeshType > ConservativeVariablesType;
+      typedef Pointers::SharedPointer<  ConservativeVariablesType > ConservativeVariablesPointer;
+      typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
+      typedef Pointers::SharedPointer<  VelocityFieldType > VelocityFieldPointer;
+      
+      class VelocityGetter : public Functions::Domain< Dimensions, Functions::MeshDomain >
+      {
+         public:
+            typedef typename MeshType::RealType RealType;
+            
+            VelocityGetter( MeshFunctionPointer density, 
+                            MeshFunctionPointer momentum )
+            : density( density ), momentum( momentum ) {}
+            
+            template< typename EntityType >
+            __cuda_callable__
+            RealType operator()( const EntityType& meshEntity,
+                                        const RealType& time = 0.0 ) const
+            {
+               if( density.template getData< DeviceType >()( meshEntity ) == 0.0 )
+                  return 0;
+               else
+                  return momentum.template getData< DeviceType >()( meshEntity ) / 
+                         density.template getData< DeviceType >()( meshEntity );
+            }
+            
+         protected:
+            const MeshFunctionPointer density, momentum;
+      };
+      
+      class PressureGetter : public Functions::Domain< Dimensions, Functions::MeshDomain >
+      {
+         public:
+            typedef typename MeshType::RealType RealType;
+            
+            PressureGetter( MeshFunctionPointer density,
+                            MeshFunctionPointer energy, 
+                            VelocityFieldPointer momentum,
+                            const RealType& gamma )
+            : density( density ), energy( energy ), momentum( momentum ), gamma( gamma ) {}
+            
+            template< typename EntityType >
+            __cuda_callable__
+            RealType operator()( const EntityType& meshEntity,
+                                 const RealType& time = 0.0 ) const
+            {
+               const RealType e = energy.template getData< DeviceType >()( meshEntity );
+               const RealType rho = density.template getData< DeviceType >()( meshEntity );
+               const RealType momentumNorm = momentum.template getData< DeviceType >().getVector( meshEntity ).lpNorm( 2.0 );
+               if( rho == 0.0 )
+                  return 0;
+               else
+                  return ( gamma - 1.0 ) * ( e - 0.5 * momentumNorm * momentumNorm / rho );
+            }
+            
+         protected:
+            const MeshFunctionPointer density, energy;
+            const VelocityFieldPointer momentum;
+            const RealType gamma;
+      };      
+
+      
+      void getVelocity( const ConservativeVariablesPointer& conservativeVariables,
+                        VelocityFieldPointer& velocity )
+      {
+         Functions::MeshFunctionEvaluator< MeshFunctionType, VelocityGetter > evaluator;
+         for( int i = 0; i < Dimensions; i++ )
+         {
+            Pointers::SharedPointer<  VelocityGetter, DeviceType > velocityGetter( conservativeVariables->getDensity(),
+                                                                        ( *conservativeVariables->getMomentum() )[ i ] );
+            evaluator.evaluate( ( *velocity )[ i ], velocityGetter );
+         }
+      }
+      
+      void getPressure( const ConservativeVariablesPointer& conservativeVariables,
+                        const RealType& gamma,
+                        MeshFunctionPointer& pressure )
+      {
+         Functions::MeshFunctionEvaluator< MeshFunctionType, PressureGetter > evaluator;
+         Pointers::SharedPointer<  PressureGetter, DeviceType > pressureGetter( conservativeVariables->getDensity(),
+                                                                     conservativeVariables->getEnergy(),
+                                                                     conservativeVariables->getMomentum(),
+                                                                     gamma );
+         evaluator.evaluate( pressure, pressureGetter );
+      }
+      
+};
+   
+} //namespace TNL
diff --git a/examples/inviscid-flow/RiemannProblemInitialCondition.h b/src/Examples/inviscid-flow/RiemannProblemInitialCondition.h
similarity index 99%
rename from examples/inviscid-flow/RiemannProblemInitialCondition.h
rename to src/Examples/inviscid-flow/RiemannProblemInitialCondition.h
index b1819b1eb86c014f1689945b1dcac44b6cbdbfcd..a3d54c747562b04f0b7a11dccb472f855dcf9849 100644
--- a/examples/inviscid-flow/RiemannProblemInitialCondition.h
+++ b/src/Examples/inviscid-flow/RiemannProblemInitialCondition.h
@@ -38,7 +38,7 @@ class RiemannProblemInitialConditionSetter< Meshes::Grid< 1,MeshReal, Device, Me
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
 //       for cyklus i = 0 to mesh.getDimensions().x() j pro .y() a k pro .z()
 //       typedef typename MeshType::Cell CellType
@@ -195,7 +195,7 @@ class RiemannProblemInitialConditionSetter< Meshes::Grid< 2, MeshReal, Device, M
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
 //       for cyklus i = 0 to mesh.getDimensions().x() j pro .y() a k pro .z()
 //       typedef typename MeshType::Cell CellType
@@ -415,7 +415,7 @@ class RiemannProblemInitialConditionSetter< Meshes::Grid< 3, MeshReal, Device, M
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
 //       for cyklus i = 0 to mesh.getDimensions().x() j pro .y() a k pro .z()
 //       typedef typename MeshType::Cell CellType
@@ -768,7 +768,7 @@ class RiemannProblemInitialCondition
       static const int Dimensions = MeshType::getMeshDimension();
       typedef Containers::StaticVector< Dimensions, RealType > PointType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer<  MeshFunctionType > MeshFunctionPointer;
       typedef Functions::VectorField< Dimensions, MeshType > VectorFieldType;
       
       RiemannProblemInitialCondition()
@@ -1342,7 +1342,7 @@ class RiemannProblemInitialCondition
          typedef Functions::Analytic::VectorNorm< Dimensions, RealType > VectorNormType;
          typedef Operators::Analytic::Sign< Dimensions, RealType > SignType;
          typedef Functions::OperatorFunction< SignType, VectorNormType > InitialConditionType;
-         typedef SharedPointer< InitialConditionType, DeviceType > InitialConditionPointer;
+         typedef Pointers::SharedPointer<  InitialConditionType, DeviceType > InitialConditionPointer;
          
          InitialConditionPointer initialCondition;
          initialCondition->getFunction().setCenter( center );
diff --git a/examples/inviscid-flow/euler.cpp b/src/Examples/inviscid-flow/euler.cpp
similarity index 100%
rename from examples/inviscid-flow/euler.cpp
rename to src/Examples/inviscid-flow/euler.cpp
diff --git a/examples/inviscid-flow/euler.cu b/src/Examples/inviscid-flow/euler.cu
similarity index 100%
rename from examples/inviscid-flow/euler.cu
rename to src/Examples/inviscid-flow/euler.cu
diff --git a/examples/inviscid-flow/euler.h b/src/Examples/inviscid-flow/euler.h
similarity index 100%
rename from examples/inviscid-flow/euler.h
rename to src/Examples/inviscid-flow/euler.h
diff --git a/examples/inviscid-flow/eulerBuildConfigTag.h b/src/Examples/inviscid-flow/eulerBuildConfigTag.h
similarity index 100%
rename from examples/inviscid-flow/eulerBuildConfigTag.h
rename to src/Examples/inviscid-flow/eulerBuildConfigTag.h
diff --git a/examples/inviscid-flow/eulerProblem.h b/src/Examples/inviscid-flow/eulerProblem.h
similarity index 85%
rename from examples/inviscid-flow/eulerProblem.h
rename to src/Examples/inviscid-flow/eulerProblem.h
index cb24108ef4bccce4dacb10b7cd1749c8da3f4421..a854f8098e751d65d9f1e542c540d653b1fb08c1 100644
--- a/examples/inviscid-flow/eulerProblem.h
+++ b/src/Examples/inviscid-flow/eulerProblem.h
@@ -49,12 +49,12 @@ class eulerProblem:
       typedef Functions::MeshFunction< Mesh > MeshFunctionType;
       typedef CompressibleConservativeVariables< MeshType > ConservativeVariablesType;
       typedef Functions::VectorField< Dimensions, MeshFunctionType > VelocityFieldType;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
-      typedef SharedPointer< ConservativeVariablesType > ConservativeVariablesPointer;
-      typedef SharedPointer< VelocityFieldType > VelocityFieldPointer;
-      typedef SharedPointer< InviscidOperators > InviscidOperatorsPointer;
-      typedef SharedPointer< BoundaryCondition > BoundaryConditionPointer;
-      typedef SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
+      typedef Pointers::SharedPointer<  MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer<  ConservativeVariablesType > ConservativeVariablesPointer;
+      typedef Pointers::SharedPointer<  VelocityFieldType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer<  InviscidOperators > InviscidOperatorsPointer;
+      typedef Pointers::SharedPointer<  BoundaryCondition > BoundaryConditionPointer;
+      typedef Pointers::SharedPointer<  RightHandSide, DeviceType > RightHandSidePointer;
 
       static String getType();
 
@@ -84,6 +84,9 @@ class eulerProblem:
                               const RealType& tau,
                               DofVectorPointer& _u,
                               DofVectorPointer& _fu );
+      
+      void applyBoundaryConditions( const RealType& time,
+                                       DofVectorPointer& dofs );      
 
       template< typename Matrix >
       void assemblyLinearSystem( const RealType& time,
diff --git a/examples/inviscid-flow/eulerProblem_impl.h b/src/Examples/inviscid-flow/eulerProblem_impl.h
similarity index 97%
rename from examples/inviscid-flow/eulerProblem_impl.h
rename to src/Examples/inviscid-flow/eulerProblem_impl.h
index 2da812edfbefa68100c466c2bb40f9efa5a0d5a5..7347755cb118832f86b1015dac5d16428294ad3f 100644
--- a/examples/inviscid-flow/eulerProblem_impl.h
+++ b/src/Examples/inviscid-flow/eulerProblem_impl.h
@@ -313,6 +313,21 @@ getExplicitUpdate( const RealType& time,
 
 }
 
+template< typename Mesh,
+          typename BoundaryCondition,
+          typename RightHandSide,
+          typename Communicator,
+          typename InviscidOperators >
+void
+eulerProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, InviscidOperators >::
+applyBoundaryConditions( const RealType& time,
+                         DofVectorPointer& dofs )
+{
+   this->bindDofs( dofs );
+   TNL_ASSERT( false, "TODO: implement BC ... see HeatEquationProblem" );
+   
+}
+
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/examples/inviscid-flow/eulerRhs.h b/src/Examples/inviscid-flow/eulerRhs.h
similarity index 100%
rename from examples/inviscid-flow/eulerRhs.h
rename to src/Examples/inviscid-flow/eulerRhs.h
diff --git a/examples/inviscid-flow/run-euler b/src/Examples/inviscid-flow/run-euler
similarity index 100%
rename from examples/inviscid-flow/run-euler
rename to src/Examples/inviscid-flow/run-euler
diff --git a/examples/mean-curvature-flow/CMakeLists.txt b/src/Examples/mean-curvature-flow/CMakeLists.txt
similarity index 100%
rename from examples/mean-curvature-flow/CMakeLists.txt
rename to src/Examples/mean-curvature-flow/CMakeLists.txt
diff --git a/examples/mean-curvature-flow/tnl-mean-curvature-flow-eoc.cpp b/src/Examples/mean-curvature-flow/tnl-mean-curvature-flow-eoc.cpp
similarity index 100%
rename from examples/mean-curvature-flow/tnl-mean-curvature-flow-eoc.cpp
rename to src/Examples/mean-curvature-flow/tnl-mean-curvature-flow-eoc.cpp
diff --git a/examples/mean-curvature-flow/tnl-mean-curvature-flow-eoc.cu b/src/Examples/mean-curvature-flow/tnl-mean-curvature-flow-eoc.cu
similarity index 100%
rename from examples/mean-curvature-flow/tnl-mean-curvature-flow-eoc.cu
rename to src/Examples/mean-curvature-flow/tnl-mean-curvature-flow-eoc.cu
diff --git a/examples/mean-curvature-flow/tnl-mean-curvature-flow-eoc.h b/src/Examples/mean-curvature-flow/tnl-mean-curvature-flow-eoc.h
similarity index 100%
rename from examples/mean-curvature-flow/tnl-mean-curvature-flow-eoc.h
rename to src/Examples/mean-curvature-flow/tnl-mean-curvature-flow-eoc.h
diff --git a/examples/mean-curvature-flow/tnl-mean-curvature-flow.cpp b/src/Examples/mean-curvature-flow/tnl-mean-curvature-flow.cpp
similarity index 100%
rename from examples/mean-curvature-flow/tnl-mean-curvature-flow.cpp
rename to src/Examples/mean-curvature-flow/tnl-mean-curvature-flow.cpp
diff --git a/examples/mean-curvature-flow/tnl-mean-curvature-flow.cu b/src/Examples/mean-curvature-flow/tnl-mean-curvature-flow.cu
similarity index 100%
rename from examples/mean-curvature-flow/tnl-mean-curvature-flow.cu
rename to src/Examples/mean-curvature-flow/tnl-mean-curvature-flow.cu
diff --git a/examples/mean-curvature-flow/tnl-mean-curvature-flow.h b/src/Examples/mean-curvature-flow/tnl-mean-curvature-flow.h
similarity index 100%
rename from examples/mean-curvature-flow/tnl-mean-curvature-flow.h
rename to src/Examples/mean-curvature-flow/tnl-mean-curvature-flow.h
diff --git a/examples/mean-curvature-flow/tnl-run-mean-curvature-flow b/src/Examples/mean-curvature-flow/tnl-run-mean-curvature-flow
similarity index 100%
rename from examples/mean-curvature-flow/tnl-run-mean-curvature-flow
rename to src/Examples/mean-curvature-flow/tnl-run-mean-curvature-flow
diff --git a/examples/mean-curvature-flow/tnl-run-mean-curvature-flow-contour-video b/src/Examples/mean-curvature-flow/tnl-run-mean-curvature-flow-contour-video
similarity index 100%
rename from examples/mean-curvature-flow/tnl-run-mean-curvature-flow-contour-video
rename to src/Examples/mean-curvature-flow/tnl-run-mean-curvature-flow-contour-video
diff --git a/examples/mean-curvature-flow/tnl-run-mean-curvature-flow-eoc-test b/src/Examples/mean-curvature-flow/tnl-run-mean-curvature-flow-eoc-test
similarity index 100%
rename from examples/mean-curvature-flow/tnl-run-mean-curvature-flow-eoc-test
rename to src/Examples/mean-curvature-flow/tnl-run-mean-curvature-flow-eoc-test
diff --git a/examples/mean-curvature-flow/tnl-run-mean-curvature-flow-videos b/src/Examples/mean-curvature-flow/tnl-run-mean-curvature-flow-videos
similarity index 100%
rename from examples/mean-curvature-flow/tnl-run-mean-curvature-flow-videos
rename to src/Examples/mean-curvature-flow/tnl-run-mean-curvature-flow-videos
diff --git a/examples/narrow-band/CMakeLists.txt b/src/Examples/narrow-band/CMakeLists.txt
similarity index 100%
rename from examples/narrow-band/CMakeLists.txt
rename to src/Examples/narrow-band/CMakeLists.txt
diff --git a/examples/narrow-band/MainBuildConfig.h b/src/Examples/narrow-band/MainBuildConfig.h
similarity index 100%
rename from examples/narrow-band/MainBuildConfig.h
rename to src/Examples/narrow-band/MainBuildConfig.h
diff --git a/examples/narrow-band/main.cpp b/src/Examples/narrow-band/main.cpp
similarity index 100%
rename from examples/narrow-band/main.cpp
rename to src/Examples/narrow-band/main.cpp
diff --git a/examples/narrow-band/main.cu b/src/Examples/narrow-band/main.cu
similarity index 100%
rename from examples/narrow-band/main.cu
rename to src/Examples/narrow-band/main.cu
diff --git a/examples/narrow-band/main.h b/src/Examples/narrow-band/main.h
similarity index 100%
rename from examples/narrow-band/main.h
rename to src/Examples/narrow-band/main.h
diff --git a/examples/narrow-band/narrowBandConfig.h b/src/Examples/narrow-band/narrowBandConfig.h
similarity index 100%
rename from examples/narrow-band/narrowBandConfig.h
rename to src/Examples/narrow-band/narrowBandConfig.h
diff --git a/examples/narrow-band/tnlNarrowBand.h b/src/Examples/narrow-band/tnlNarrowBand.h
similarity index 100%
rename from examples/narrow-band/tnlNarrowBand.h
rename to src/Examples/narrow-band/tnlNarrowBand.h
diff --git a/examples/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h b/src/Examples/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h
similarity index 100%
rename from examples/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h
rename to src/Examples/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h
diff --git a/examples/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h b/src/Examples/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h
similarity index 100%
rename from examples/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h
rename to src/Examples/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h
diff --git a/examples/narrow-band/tnlNarrowBand2D_impl.h b/src/Examples/narrow-band/tnlNarrowBand2D_impl.h
similarity index 100%
rename from examples/narrow-band/tnlNarrowBand2D_impl.h
rename to src/Examples/narrow-band/tnlNarrowBand2D_impl.h
diff --git a/examples/narrow-band/tnlNarrowBand3D_CUDA_impl.h b/src/Examples/narrow-band/tnlNarrowBand3D_CUDA_impl.h
similarity index 100%
rename from examples/narrow-band/tnlNarrowBand3D_CUDA_impl.h
rename to src/Examples/narrow-band/tnlNarrowBand3D_CUDA_impl.h
diff --git a/examples/narrow-band/tnlNarrowBand3D_impl.h b/src/Examples/narrow-band/tnlNarrowBand3D_impl.h
similarity index 100%
rename from examples/narrow-band/tnlNarrowBand3D_impl.h
rename to src/Examples/narrow-band/tnlNarrowBand3D_impl.h
diff --git a/examples/narrow-band/tnlNarrowBand_CUDA.h b/src/Examples/narrow-band/tnlNarrowBand_CUDA.h
similarity index 100%
rename from examples/narrow-band/tnlNarrowBand_CUDA.h
rename to src/Examples/narrow-band/tnlNarrowBand_CUDA.h
diff --git a/examples/navier-stokes/CMakeLists.txt b/src/Examples/navier-stokes/CMakeLists.txt
similarity index 100%
rename from examples/navier-stokes/CMakeLists.txt
rename to src/Examples/navier-stokes/CMakeLists.txt
diff --git a/examples/navier-stokes/Makefile b/src/Examples/navier-stokes/Makefile
similarity index 100%
rename from examples/navier-stokes/Makefile
rename to src/Examples/navier-stokes/Makefile
diff --git a/examples/navier-stokes/main.cpp b/src/Examples/navier-stokes/main.cpp
similarity index 100%
rename from examples/navier-stokes/main.cpp
rename to src/Examples/navier-stokes/main.cpp
diff --git a/examples/navier-stokes/make-png-from-gnuplot b/src/Examples/navier-stokes/make-png-from-gnuplot
similarity index 100%
rename from examples/navier-stokes/make-png-from-gnuplot
rename to src/Examples/navier-stokes/make-png-from-gnuplot
diff --git a/examples/navier-stokes/make-png-vectors-from-gnuplot b/src/Examples/navier-stokes/make-png-vectors-from-gnuplot
similarity index 100%
rename from examples/navier-stokes/make-png-vectors-from-gnuplot
rename to src/Examples/navier-stokes/make-png-vectors-from-gnuplot
diff --git a/examples/navier-stokes/merge-figures b/src/Examples/navier-stokes/merge-figures
similarity index 100%
rename from examples/navier-stokes/merge-figures
rename to src/Examples/navier-stokes/merge-figures
diff --git a/examples/navier-stokes/navier-stokes.cfg.desc b/src/Examples/navier-stokes/navier-stokes.cfg.desc
similarity index 100%
rename from examples/navier-stokes/navier-stokes.cfg.desc
rename to src/Examples/navier-stokes/navier-stokes.cfg.desc
diff --git a/examples/navier-stokes/navierStokesBoundaryConditions.h b/src/Examples/navier-stokes/navierStokesBoundaryConditions.h
similarity index 100%
rename from examples/navier-stokes/navierStokesBoundaryConditions.h
rename to src/Examples/navier-stokes/navierStokesBoundaryConditions.h
diff --git a/examples/navier-stokes/navierStokesBoundaryConditions_impl.h b/src/Examples/navier-stokes/navierStokesBoundaryConditions_impl.h
similarity index 100%
rename from examples/navier-stokes/navierStokesBoundaryConditions_impl.h
rename to src/Examples/navier-stokes/navierStokesBoundaryConditions_impl.h
diff --git a/examples/navier-stokes/navierStokesSetter.h b/src/Examples/navier-stokes/navierStokesSetter.h
similarity index 100%
rename from examples/navier-stokes/navierStokesSetter.h
rename to src/Examples/navier-stokes/navierStokesSetter.h
diff --git a/examples/navier-stokes/navierStokesSetter_impl.h b/src/Examples/navier-stokes/navierStokesSetter_impl.h
similarity index 100%
rename from examples/navier-stokes/navierStokesSetter_impl.h
rename to src/Examples/navier-stokes/navierStokesSetter_impl.h
diff --git a/examples/navier-stokes/navierStokesSolver.h b/src/Examples/navier-stokes/navierStokesSolver.h
similarity index 100%
rename from examples/navier-stokes/navierStokesSolver.h
rename to src/Examples/navier-stokes/navierStokesSolver.h
diff --git a/examples/navier-stokes/navierStokesSolverMonitor.h b/src/Examples/navier-stokes/navierStokesSolverMonitor.h
similarity index 100%
rename from examples/navier-stokes/navierStokesSolverMonitor.h
rename to src/Examples/navier-stokes/navierStokesSolverMonitor.h
diff --git a/examples/navier-stokes/navierStokesSolverMonitor_impl.h b/src/Examples/navier-stokes/navierStokesSolverMonitor_impl.h
similarity index 100%
rename from examples/navier-stokes/navierStokesSolverMonitor_impl.h
rename to src/Examples/navier-stokes/navierStokesSolverMonitor_impl.h
diff --git a/examples/navier-stokes/navierStokesSolver_impl.h b/src/Examples/navier-stokes/navierStokesSolver_impl.h
similarity index 100%
rename from examples/navier-stokes/navierStokesSolver_impl.h
rename to src/Examples/navier-stokes/navierStokesSolver_impl.h
diff --git a/examples/navier-stokes/share/CMakeLists.txt b/src/Examples/navier-stokes/share/CMakeLists.txt
similarity index 100%
rename from examples/navier-stokes/share/CMakeLists.txt
rename to src/Examples/navier-stokes/share/CMakeLists.txt
diff --git a/examples/navier-stokes/share/examples/CMakeLists.txt b/src/Examples/navier-stokes/share/examples/CMakeLists.txt
similarity index 100%
rename from examples/navier-stokes/share/examples/CMakeLists.txt
rename to src/Examples/navier-stokes/share/examples/CMakeLists.txt
diff --git a/examples/navier-stokes/share/examples/cavity b/src/Examples/navier-stokes/share/examples/cavity
similarity index 100%
rename from examples/navier-stokes/share/examples/cavity
rename to src/Examples/navier-stokes/share/examples/cavity
diff --git a/examples/quad-test/.gitignore b/src/Examples/quad-test/.gitignore
similarity index 100%
rename from examples/quad-test/.gitignore
rename to src/Examples/quad-test/.gitignore
diff --git a/examples/quad-test/Makefile b/src/Examples/quad-test/Makefile
similarity index 100%
rename from examples/quad-test/Makefile
rename to src/Examples/quad-test/Makefile
diff --git a/examples/quad-test/main.cpp b/src/Examples/quad-test/main.cpp
similarity index 100%
rename from examples/quad-test/main.cpp
rename to src/Examples/quad-test/main.cpp
diff --git a/examples/quad-test/quad-test.cfg.desc b/src/Examples/quad-test/quad-test.cfg.desc
similarity index 100%
rename from examples/quad-test/quad-test.cfg.desc
rename to src/Examples/quad-test/quad-test.cfg.desc
diff --git a/tests/simple_examples/CMakeLists.txt b/src/Examples/simple-examples/CMakeLists.txt
similarity index 100%
rename from tests/simple_examples/CMakeLists.txt
rename to src/Examples/simple-examples/CMakeLists.txt
diff --git a/tests/simple_examples/large-meshfunction-example.cpp b/src/Examples/simple-examples/large-meshfunction-example.cpp
similarity index 100%
rename from tests/simple_examples/large-meshfunction-example.cpp
rename to src/Examples/simple-examples/large-meshfunction-example.cpp
diff --git a/tests/simple_examples/large-meshfunction-example.cu b/src/Examples/simple-examples/large-meshfunction-example.cu
similarity index 100%
rename from tests/simple_examples/large-meshfunction-example.cu
rename to src/Examples/simple-examples/large-meshfunction-example.cu
diff --git a/tests/simple_examples/large-meshfunction-example.h b/src/Examples/simple-examples/large-meshfunction-example.h
similarity index 89%
rename from tests/simple_examples/large-meshfunction-example.h
rename to src/Examples/simple-examples/large-meshfunction-example.h
index 6ab1c2f4a515148237fe23f23ebd9ee85b7e8f9a..dfb5354cced4b512dc19e143b2b4510618a77ca9 100644
--- a/tests/simple_examples/large-meshfunction-example.h
+++ b/src/Examples/simple-examples/large-meshfunction-example.h
@@ -48,18 +48,18 @@ int main(int argc, char ** argv)
   PointType origin(-0.5);
   PointType proportions(size);
  
-  SharedPointer<MeshType> gridPtr;
+  Pointers::SharedPointer<MeshType> gridPtr;
   gridPtr->setDimensions(proportions);
   gridPtr->setDomain(origin,proportions);
 
-  SharedPointer<MeshFunctionType> meshFunctionptr;
+  Pointers::SharedPointer<MeshFunctionType> meshFunctionptr;
   MeshFunctionEvaluator< MeshFunctionType, LinearFunctionType > linearFunctionEvaluator;
 
   DofType dof(gridPtr->template getEntitiesCount< Cell >());
   dof.setValue(0);  
   meshFunctionptr->bind(gridPtr,dof);  
   
-  SharedPointer< LinearFunctionType, Device > linearFunctionPtr;
+  Pointers::SharedPointer< LinearFunctionType, Device > linearFunctionPtr;
 
   for(int i=0;i<cycles;i++)
   {
diff --git a/examples/transport-equation/CMakeLists.txt b/src/Examples/transport-equation/CMakeLists.txt
similarity index 100%
rename from examples/transport-equation/CMakeLists.txt
rename to src/Examples/transport-equation/CMakeLists.txt
diff --git a/examples/transport-equation/tnl-run-transport-equation b/src/Examples/transport-equation/tnl-run-transport-equation
similarity index 100%
rename from examples/transport-equation/tnl-run-transport-equation
rename to src/Examples/transport-equation/tnl-run-transport-equation
diff --git a/examples/transport-equation/tnl-run-transport-equation-eoc b/src/Examples/transport-equation/tnl-run-transport-equation-eoc
similarity index 100%
rename from examples/transport-equation/tnl-run-transport-equation-eoc
rename to src/Examples/transport-equation/tnl-run-transport-equation-eoc
diff --git a/examples/transport-equation/tnl-transport-equation-eoc.cpp b/src/Examples/transport-equation/tnl-transport-equation-eoc.cpp
similarity index 100%
rename from examples/transport-equation/tnl-transport-equation-eoc.cpp
rename to src/Examples/transport-equation/tnl-transport-equation-eoc.cpp
diff --git a/examples/transport-equation/tnl-transport-equation-eoc.cu b/src/Examples/transport-equation/tnl-transport-equation-eoc.cu
similarity index 100%
rename from examples/transport-equation/tnl-transport-equation-eoc.cu
rename to src/Examples/transport-equation/tnl-transport-equation-eoc.cu
diff --git a/examples/transport-equation/tnl-transport-equation-eoc.h b/src/Examples/transport-equation/tnl-transport-equation-eoc.h
similarity index 100%
rename from examples/transport-equation/tnl-transport-equation-eoc.h
rename to src/Examples/transport-equation/tnl-transport-equation-eoc.h
diff --git a/examples/transport-equation/tnl-transport-equation.cpp b/src/Examples/transport-equation/tnl-transport-equation.cpp
similarity index 100%
rename from examples/transport-equation/tnl-transport-equation.cpp
rename to src/Examples/transport-equation/tnl-transport-equation.cpp
diff --git a/examples/transport-equation/tnl-transport-equation.cu b/src/Examples/transport-equation/tnl-transport-equation.cu
similarity index 100%
rename from examples/transport-equation/tnl-transport-equation.cu
rename to src/Examples/transport-equation/tnl-transport-equation.cu
diff --git a/examples/transport-equation/tnl-transport-equation.h b/src/Examples/transport-equation/tnl-transport-equation.h
similarity index 100%
rename from examples/transport-equation/tnl-transport-equation.h
rename to src/Examples/transport-equation/tnl-transport-equation.h
diff --git a/examples/transport-equation/transportEquationBuildConfigTag.h b/src/Examples/transport-equation/transportEquationBuildConfigTag.h
similarity index 100%
rename from examples/transport-equation/transportEquationBuildConfigTag.h
rename to src/Examples/transport-equation/transportEquationBuildConfigTag.h
diff --git a/examples/transport-equation/transportEquationProblem.h b/src/Examples/transport-equation/transportEquationProblem.h
similarity index 85%
rename from examples/transport-equation/transportEquationProblem.h
rename to src/Examples/transport-equation/transportEquationProblem.h
index 5c43973d7e863415444f7c72796edea6d36d3a64..b6aa381d5f1aca5fc004c6274a4128ac28a1c791 100644
--- a/examples/transport-equation/transportEquationProblem.h
+++ b/src/Examples/transport-equation/transportEquationProblem.h
@@ -12,7 +12,7 @@
 
 #include <TNL/Problems/PDEProblem.h>
 #include <TNL/Functions/MeshFunction.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 
 using namespace TNL::Problems;
 
@@ -37,12 +37,12 @@ public PDEProblem< Mesh,
       typedef typename DifferentialOperator::IndexType IndexType;
       typedef Functions::MeshFunction< Mesh > MeshFunctionType;
       typedef PDEProblem< Mesh, Communicator, RealType, DeviceType, IndexType > BaseType;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
-      typedef SharedPointer< DifferentialOperator > DifferentialOperatorPointer;
-      typedef SharedPointer< BoundaryCondition > BoundaryConditionPointer;
-      typedef SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
+      typedef Pointers::SharedPointer<  MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer<  DifferentialOperator > DifferentialOperatorPointer;
+      typedef Pointers::SharedPointer<  BoundaryCondition > BoundaryConditionPointer;
+      typedef Pointers::SharedPointer<  RightHandSide, DeviceType > RightHandSidePointer;
       typedef typename DifferentialOperator::VelocityFieldType VelocityFieldType;
-      typedef SharedPointer< VelocityFieldType, DeviceType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer<  VelocityFieldType, DeviceType > VelocityFieldPointer;
 
       typedef Communicator CommunicatorType;
 
@@ -79,6 +79,9 @@ public PDEProblem< Mesh,
                               const RealType& tau,
                               DofVectorPointer& _u,
                               DofVectorPointer& _fu );
+      
+      void applyBoundaryConditions( const RealType& time,
+                                       DofVectorPointer& dofs );      
 
       template< typename Matrix >
       void assemblyLinearSystem( const RealType& time,
diff --git a/examples/transport-equation/transportEquationProblemEoc.h b/src/Examples/transport-equation/transportEquationProblemEoc.h
similarity index 78%
rename from examples/transport-equation/transportEquationProblemEoc.h
rename to src/Examples/transport-equation/transportEquationProblemEoc.h
index daffdef0adfada0dcaf43b6b492896b255b4c317..62f10e273f95938a48d28ba85230d7463c0db4ce 100644
--- a/examples/transport-equation/transportEquationProblemEoc.h
+++ b/src/Examples/transport-equation/transportEquationProblemEoc.h
@@ -12,7 +12,7 @@
 
 #include <TNL/Problems/PDEProblem.h>
 #include <TNL/Functions/MeshFunction.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 #include "transportEquationProblem.h"
 
 using namespace TNL::Problems;
@@ -22,10 +22,10 @@ namespace TNL {
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
-          typename CommType,
+          typename Communicator,
           typename DifferentialOperator >
 class transportEquationProblemEoc:
-public transportEquationProblem< Mesh, BoundaryCondition, RightHandSide, CommType, DifferentialOperator >
+public transportEquationProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >
 {
    public:
 
@@ -33,16 +33,16 @@ public transportEquationProblem< Mesh, BoundaryCondition, RightHandSide, CommTyp
       typedef typename Mesh::DeviceType DeviceType;
       typedef typename DifferentialOperator::IndexType IndexType;
       typedef Functions::MeshFunction< Mesh > MeshFunctionType;
-      typedef transportEquationProblem< Mesh, BoundaryCondition, RightHandSide, CommType, DifferentialOperator > BaseType;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
-      typedef SharedPointer< DifferentialOperator > DifferentialOperatorPointer;
-      typedef SharedPointer< BoundaryCondition > BoundaryConditionPointer;
-      typedef SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
+      typedef transportEquationProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator > BaseType;
+      typedef Pointers::SharedPointer<  MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer<  DifferentialOperator > DifferentialOperatorPointer;
+      typedef Pointers::SharedPointer<  BoundaryCondition > BoundaryConditionPointer;
+      typedef Pointers::SharedPointer<  RightHandSide, DeviceType > RightHandSidePointer;
       typedef typename DifferentialOperator::VelocityFieldType VelocityFieldType;
-      typedef SharedPointer< VelocityFieldType, DeviceType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer<  VelocityFieldType, DeviceType > VelocityFieldPointer;
       
       
-      typedef CommType CommunicatorType;
+      typedef Communicator CommunicatorType;
       using typename BaseType::MeshType;
       using typename BaseType::MeshPointer;
       using typename BaseType::DofVectorType;
diff --git a/examples/transport-equation/transportEquationProblemEoc_impl.h b/src/Examples/transport-equation/transportEquationProblemEoc_impl.h
similarity index 97%
rename from examples/transport-equation/transportEquationProblemEoc_impl.h
rename to src/Examples/transport-equation/transportEquationProblemEoc_impl.h
index 2c893a6aa3932ff64764fb8a99a9c49b8c043f46..4818188774c2ceedb53266e54367e371251e51ac 100644
--- a/examples/transport-equation/transportEquationProblemEoc_impl.h
+++ b/src/Examples/transport-equation/transportEquationProblemEoc_impl.h
@@ -72,7 +72,7 @@ setup( const Config::ParameterContainer& parameters,
    static const int Dimension = Mesh::getMeshDimension();
    typedef typename MeshPointer::ObjectType MeshType;
    typedef Functions::MeshFunction< MeshType > MeshFunction;
-   SharedPointer< MeshFunction > u( this->getMesh() );
+   Pointers::SharedPointer< MeshFunction > u( this->getMesh() );
    if( initialCondition == "heaviside-vector-norm" )
    {
       typedef Functions::Analytic::VectorNorm< Dimension, RealType > VectorNormType;
@@ -83,7 +83,7 @@ setup( const Config::ParameterContainer& parameters,
       {      
          typedef Operators::Analytic::Shift< Dimension, RealType > ShiftOperatorType;
          typedef Functions::OperatorFunction< ShiftOperatorType, InitialConditionType > ExactSolutionType;
-         SharedPointer< ExactSolutionType, Devices::Host > exactSolution;
+         Pointers::SharedPointer<  ExactSolutionType, Devices::Host > exactSolution;
          if( ! exactSolution->getFunction().setup( parameters, prefix + "vector-norm-" ) ||
              ! exactSolution->getOperator().setup( parameters, prefix + "heaviside-" ) )
             return false;
diff --git a/examples/transport-equation/transportEquationProblem_impl.h b/src/Examples/transport-equation/transportEquationProblem_impl.h
similarity index 92%
rename from examples/transport-equation/transportEquationProblem_impl.h
rename to src/Examples/transport-equation/transportEquationProblem_impl.h
index 2c54d8c8549864059466e6aaf305482169b909a3..2d019602cc69f4703ac8caa5d81d8ad9d26e7275 100644
--- a/examples/transport-equation/transportEquationProblem_impl.h
+++ b/src/Examples/transport-equation/transportEquationProblem_impl.h
@@ -137,7 +137,7 @@ setupLinearSystem( Matrix& matrix )
 {
    /*const IndexType dofs = this->getDofs();
    typedef typename Matrix::ObjectType::CompressedRowsLengthsVector CompressedRowsLengthsVectorType;
-   SharedPointer< CompressedRowsLengthsVectorType > rowLengths;
+   Pointers::SharedPointer<  CompressedRowsLengthsVectorType > rowLengths;
    if( ! rowLengths->setSize( dofs ) )
       return false;
    Matrices::MatrixSetter< MeshType, DifferentialOperator, BoundaryCondition, CompressedRowsLengthsVectorType > matrixSetter;
@@ -199,8 +199,8 @@ getExplicitUpdate( const RealType& time,
    int count = ::sqrt(mesh->template getEntitiesCount< Cell >());
    this->bindDofs( _u );
    Solvers::PDE::ExplicitUpdater< Mesh, MeshFunctionType, DifferentialOperator, BoundaryCondition, RightHandSide > explicitUpdater;
-   SharedPointer< MeshFunctionType > u( mesh, _u ); 
-   SharedPointer< MeshFunctionType > fu( mesh, _fu );
+   Pointers::SharedPointer<  MeshFunctionType > u( mesh, _u ); 
+   Pointers::SharedPointer<  MeshFunctionType > fu( mesh, _fu );
    differentialOperatorPointer->setTau(tau); 
    differentialOperatorPointer->setVelocityField( this->velocityField );
    explicitUpdater.setDifferentialOperator( this->differentialOperatorPointer );
@@ -209,6 +209,20 @@ getExplicitUpdate( const RealType& time,
    explicitUpdater.template update< typename Mesh::Cell, Communicator >( time, tau, mesh, u, fu );
 }
 
+template< typename Mesh,
+          typename BoundaryCondition,
+          typename RightHandSide,
+          typename Communicator,
+          typename DifferentialOperator >
+void
+transportEquationProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
+applyBoundaryConditions( const RealType& time,
+                         DofVectorPointer& dofs )
+{
+   this->bindDofs( dofs );
+   TNL_ASSERT( false, "TODO: implement BC ... see HeatEquationProblem" );   
+}
+
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
diff --git a/src/Python/pytnl/tnl/Array.h b/src/Python/pytnl/tnl/Array.h
index 0d74acdccef35773616fb6b7e695caa32c767361..c90ac44ad0c5911e9cd1d64ef3f2141892e17ecd 100644
--- a/src/Python/pytnl/tnl/Array.h
+++ b/src/Python/pytnl/tnl/Array.h
@@ -45,15 +45,15 @@ void export_Array(py::module & m, const char* name)
                 // Pointer to buffer
                 a.getData(),
                 // Size of one scalar
-                sizeof( typename ArrayType::ElementType ),
+                sizeof( typename ArrayType::ValueType ),
                 // Python struct-style format descriptor
-                py::format_descriptor< typename ArrayType::ElementType >::format(),
+                py::format_descriptor< typename ArrayType::ValueType >::format(),
                 // Number of dimensions
                 1,
                 // Buffer dimensions
                 { a.getSize() },
                 // Strides (in bytes) for each index
-                { sizeof( typename ArrayType::ElementType ) }
+                { sizeof( typename ArrayType::ValueType ) }
             );
         })
     ;
diff --git a/src/Python/pytnl/tnl/SparseMatrix.h b/src/Python/pytnl/tnl/SparseMatrix.h
index 5d278ae8a17fc204f51b39652cc5dfe735c28dca..7f8aa040478eec2b3628d7fab6262c415d083ec0 100644
--- a/src/Python/pytnl/tnl/SparseMatrix.h
+++ b/src/Python/pytnl/tnl/SparseMatrix.h
@@ -99,7 +99,7 @@ void export_Matrix( py::module & m, const char* name )
         // TODO: these two don't work
         //.def("addMatrix",           &Matrix::addMatrix)
         //.def("getTransposition",    &Matrix::getTransposition)
-        .def("performSORIteration", &Matrix::template performSORIteration< VectorType >)
+        .def("performSORIteration", &Matrix::template performSORIteration< VectorType, VectorType >)
 //        .def("assign",              &Matrix::operator=)
         .def("assign", []( Matrix& matrix, const Matrix& other ) -> Matrix& {
                 return matrix = other;
diff --git a/src/Python/pytnl/tnl_indexing.h b/src/Python/pytnl/tnl_indexing.h
index 795226537dd4caf98f64ec0e478e511b814ad16e..33b14747d4bd92073d21855650be55099a2e8428 100644
--- a/src/Python/pytnl/tnl_indexing.h
+++ b/src/Python/pytnl/tnl_indexing.h
@@ -9,15 +9,15 @@ template< typename Array, typename Scope >
 void tnl_indexing( Scope & scope )
 {
     using Index = typename Array::IndexType;
-    using Element = typename Array::ElementType;
+    using Value = typename Array::ValueType;
 
     scope.def("__len__", &Array::getSize);
 
     scope.def("__iter__",
         []( Array& array ) {
             return py::make_iterator(
-                        RawIterator<Element>(array.getData()),
-                        RawIterator<Element>(array.getData() + array.getSize()) );
+                        RawIterator<Value>(array.getData()),
+                        RawIterator<Value>(array.getData() + array.getSize()) );
         },
         py::keep_alive<0, 1>()  // keep array alive while iterator is used
     );
@@ -31,7 +31,7 @@ void tnl_indexing( Scope & scope )
     );
 
     scope.def("__setitem__",
-        [](Array &a, Index i, const Element& e) {
+        [](Array &a, Index i, const Value& e) {
             if (i >= a.getSize())
                 throw py::index_error();
             a[i] = e;
diff --git a/src/TNL/Atomic.h b/src/TNL/Atomic.h
new file mode 100644
index 0000000000000000000000000000000000000000..3f0defe5e996d1d01d64bc5ec237a6bf1e47e96c
--- /dev/null
+++ b/src/TNL/Atomic.h
@@ -0,0 +1,348 @@
+/***************************************************************************
+                          Atomic.h  -  description
+                             -------------------
+    begin                : Sep 14, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// Implemented by: Jakub Klinkovský
+
+#pragma once
+
+#include <atomic>  // std::atomic
+
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+#include <TNL/param-types.h>
+
+namespace TNL {
+
+template< typename T, typename Device >
+class Atomic
+{};
+
+template< typename T >
+class Atomic< T, Devices::Host >
+: public std::atomic< T >
+{
+public:
+   Atomic() noexcept = default;
+
+   // inherit constructors
+   using std::atomic< T >::atomic;
+
+   // NOTE: std::atomic is not copyable (see https://stackoverflow.com/a/15250851 for
+   // an explanation), but we need copyability for TNL::Containers::Array. Note that
+   // this copy-constructor and copy-assignment operator are not atomic as they
+   // synchronize only with respect to one or the other object.
+   Atomic( const Atomic& desired ) noexcept
+   {
+      this->store(desired.load());
+   }
+   Atomic& operator=( const Atomic& desired ) noexcept
+   {
+      this->store(desired.load());
+      return *this;
+   }
+
+   // just for compatibility with TNL::Containers::Array...
+   static String getType()
+   {
+      return "Atomic< " +
+             TNL::getType< T >() + ", " +
+             Devices::Host::getDeviceType() + " >";
+   }
+
+   // CAS loops for updating maximum and minimum
+   // reference: https://stackoverflow.com/a/16190791
+   T fetch_max( T value ) noexcept
+   {
+      const T old = *this;
+      T prev_value = old;
+      while(prev_value < value &&
+            ! this->compare_exchange_weak(prev_value, value))
+         ;
+      return old;
+   }
+
+   T fetch_min( T value ) noexcept
+   {
+      const T old = *this;
+      T prev_value = old;
+      while(prev_value > value &&
+            ! this->compare_exchange_weak(prev_value, value))
+         ;
+      return old;
+   }
+};
+
+template< typename T >
+class Atomic< T, Devices::Cuda >
+{
+public:
+   using value_type = T;
+   // FIXME
+//   using difference_type = typename std::atomic< T >::difference_type;
+
+   __cuda_callable__
+   Atomic() noexcept = default;
+
+   __cuda_callable__
+   constexpr Atomic( T desired ) noexcept : value(desired) {}
+
+   __cuda_callable__
+   T operator=( T desired ) noexcept
+   {
+      store( desired );
+      return desired;
+   }
+
+   // NOTE: std::atomic is not copyable (see https://stackoverflow.com/a/15250851 for
+   // an explanation), but we need copyability for TNL::Containers::Array. Note that
+   // this copy-constructor and copy-assignment operator are not atomic as they
+   // synchronize only with respect to one or the other object.
+   __cuda_callable__
+   Atomic( const Atomic& desired ) noexcept
+   {
+      // FIXME
+//      *this = desired.load();
+      *this = desired.value;
+   }
+   __cuda_callable__
+   Atomic& operator=( const Atomic& desired ) noexcept
+   {
+      // FIXME
+//      *this = desired.load();
+      *this = desired.value;
+      return *this;
+   }
+
+   // just for compatibility with TNL::Containers::Array...
+   static String getType()
+   {
+      return "Atomic< " +
+             TNL::getType< T >() + ", " +
+             Devices::Host::getDeviceType() + " >";
+   }
+
+   bool is_lock_free() const noexcept
+   {
+      return true;
+   }
+
+   constexpr bool is_always_lock_free() const noexcept
+   {
+      return true;
+   }
+
+   __cuda_callable__
+   void store( T desired ) noexcept
+   {
+      // CUDA does not have a native atomic store, but it can be emulated with atomic exchange
+      exchange( desired );
+   }
+
+   __cuda_callable__
+   T load() const noexcept
+   {
+      // CUDA does not have a native atomic load:
+      // https://stackoverflow.com/questions/32341081/how-to-have-atomic-load-in-cuda
+      return const_cast<Atomic*>(this)->fetch_add( 0 );
+   }
+
+   __cuda_callable__
+   operator T() const noexcept
+   {
+      return load();
+   }
+
+   __cuda_callable__
+   T exchange( T desired ) noexcept
+   {
+#ifdef __CUDA_ARCH__
+      return atomicExch( &value, desired );
+#else
+      const T old = value;
+      value = desired;
+      return old;
+#endif
+   }
+
+   __cuda_callable__
+   bool compare_exchange_weak( T& expected, T desired ) noexcept
+   {
+      return compare_exchange_strong( expected, desired );
+   }
+
+   __cuda_callable__
+   bool compare_exchange_strong( T& expected, T desired ) noexcept
+   {
+#ifdef __CUDA_ARCH__
+      const T old = atomicCAS( &value, expected, desired );
+      const bool result = old == expected;
+      expected = old;
+      return result;
+#else
+      if( value == expected ) {
+         value = desired;
+         return true;
+      }
+      else {
+         expected = value;
+         return false;
+      }
+#endif
+   }
+
+   __cuda_callable__
+   T fetch_add( T arg )
+   {
+#ifdef __CUDA_ARCH__
+      return atomicAdd( &value, arg );
+#else
+      const T old = value;
+      value += arg;
+      return old;
+#endif
+   }
+
+   __cuda_callable__
+   T fetch_sub( T arg )
+   {
+#ifdef __CUDA_ARCH__
+      return atomicSub( &value, arg );
+#else
+      const T old = value;
+      value -= arg;
+      return old;
+#endif
+   }
+
+   __cuda_callable__
+   T fetch_and( T arg )
+   {
+#ifdef __CUDA_ARCH__
+      return atomicAnd( &value, arg );
+#else
+      const T old = value;
+      value = value & arg;
+      return old;
+#endif
+   }
+
+   __cuda_callable__
+   T fetch_or( T arg )
+   {
+#ifdef __CUDA_ARCH__
+      return atomicOr( &value, arg );
+#else
+      const T old = value;
+      value = value | arg;
+      return old;
+#endif
+   }
+
+   __cuda_callable__
+   T fetch_xor( T arg )
+   {
+#ifdef __CUDA_ARCH__
+      return atomicXor( &value, arg );
+#else
+      const T old = value;
+      value = value ^ arg;
+      return old;
+#endif
+   }
+
+   __cuda_callable__
+   T operator+=( T arg ) noexcept
+   {
+      return fetch_add( arg ) + arg;
+   }
+
+   __cuda_callable__
+   T operator-=( T arg ) noexcept
+   {
+      return fetch_sub( arg ) - arg;
+   }
+
+   __cuda_callable__
+   T operator&=( T arg ) noexcept
+   {
+      return fetch_and( arg ) & arg;
+   }
+
+   __cuda_callable__
+   T operator|=( T arg ) noexcept
+   {
+      return fetch_or( arg ) | arg;
+   }
+
+   __cuda_callable__
+   T operator^=( T arg ) noexcept
+   {
+      return fetch_xor( arg ) ^ arg;
+   }
+
+   // pre-increment
+   __cuda_callable__
+   T operator++() noexcept
+   {
+      return fetch_add(1) + 1;
+   }
+
+   // post-increment
+   __cuda_callable__
+   T operator++(int) noexcept
+   {
+      return fetch_add(1);
+   }
+
+   // pre-decrement
+   __cuda_callable__
+   T operator--() noexcept
+   {
+      return fetch_sub(1) - 1;
+   }
+
+   // post-decrement
+   __cuda_callable__
+   T operator--(int) noexcept
+   {
+      return fetch_sub(1);
+   }
+
+   // extensions (methods not present in C++ standards)
+
+   __cuda_callable__
+   T fetch_max( T arg ) noexcept
+   {
+#ifdef __CUDA_ARCH__
+      return atomicMax( &value, arg );
+#else
+      const T old = value;
+      value = ( value > arg ) ? value : arg;
+      return old;
+#endif
+   }
+
+   __cuda_callable__
+   T fetch_min( T arg ) noexcept
+   {
+#ifdef __CUDA_ARCH__
+      return atomicMin( &value, arg );
+#else
+      const T old = value;
+      value = ( value < arg ) ? value : arg;
+      return old;
+#endif
+   }
+
+protected:
+   T value;
+};
+
+} // namespace TNL
diff --git a/src/TNL/CMakeLists.txt b/src/TNL/CMakeLists.txt
index 37a16a1ab25d98c2c648eb83b050e4ebcbebfd96..cd07ae65910ab69bf004f69bb84231caf78aaab3 100644
--- a/src/TNL/CMakeLists.txt
+++ b/src/TNL/CMakeLists.txt
@@ -10,20 +10,19 @@ ADD_SUBDIRECTORY( Images )
 ADD_SUBDIRECTORY( Matrices )
 ADD_SUBDIRECTORY( Meshes )
 ADD_SUBDIRECTORY( Operators )
+ADD_SUBDIRECTORY( Pointers )
 ADD_SUBDIRECTORY( Problems )
 ADD_SUBDIRECTORY( Solvers )
 
-#ADD_SUBDIRECTORY( core )
 ADD_SUBDIRECTORY( legacy )
 
 SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/TNL )
 
 set( headers
+     Atomic.h
      Assert.h
-     Constants.h
      CudaSharedMemory.h
      CudaStreamPool.h
-     DevicePointer.h
      File.h
      File_impl.h
      FileName.h
@@ -33,13 +32,9 @@ set( headers
      Math.h
      ParallelFor.h
      param-types.h
-     SharedPointer.h
-     SmartPointer.h
-     SmartPointersRegister.h
      StaticFor.h
      String.h
      Timer.h
-     UniquePointer.h
      StaticVectorFor.h )
 
 set( common_SOURCES
@@ -47,7 +42,6 @@ set( common_SOURCES
      FileName.cpp
      Object.cpp
      Logger.cpp
-     SmartPointersRegister.cpp
      String.cpp
      Timer.cpp )
 
@@ -60,9 +54,9 @@ set( tnl_SOURCES ${tnl_config_SOURCES}
                  ${tnl_matrices_SOURCES}
                  ${tnl_operators_SOURCES}
                  ${tnl_problems_SOURCES}
+                 ${tnl_pointers_SOURCES}
                  ${tnl_solvers_SOURCES}
 
-                 ${tnl_core_SOURCES}
                  ${tnl_legacy_SOURCES}
 
                  ${common_SOURCES} )
@@ -75,11 +69,10 @@ set( tnl_CUDA__SOURCES ${tnl_config_CUDA__SOURCES}
                        ${tnl_images_CUDA__SOURCES}
                        ${tnl_matrices_CUDA__SOURCES}
                        ${tnl_operators_CUDA__SOURCES}
+                       ${tnl_pointers_CUDA__SOURCES}
                        ${tnl_problems_CUDA__SOURCES}
                        ${tnl_solvers_CUDA__SOURCES}
 
-
-                       ${tnl_core_CUDA__SOURCES}
                        ${tnl_legacy_CUDA__SOURCES}
                        ${common_SOURCES} )
 
@@ -107,31 +100,32 @@ TARGET_LINK_LIBRARIES( tnl
 
 INSTALL( TARGETS tnl DESTINATION lib )
 
-IF( BUILD_MPI )
-
-   ADD_LIBRARY( tnl-mpi_static STATIC ${tnl_SOURCES} )
-   INSTALL( TARGETS tnl-mpi_static DESTINATION lib )
-
-   if( BUILD_CUDA )
-      CUDA_ADD_LIBRARY( tnl-mpi SHARED ${tnl_CUDA__SOURCES}
-                        OPTIONS ${CUDA_ADD_LIBRARY_OPTIONS} )
-      # the static library with CUDA support has to be built separately
-      CUDA_ADD_LIBRARY( tnl-mpi-cuda_static STATIC ${tnl_CUDA__SOURCES} )
-      INSTALL( TARGETS tnl-mpi-cuda_static DESTINATION lib )
-   else( BUILD_CUDA )
-      ADD_LIBRARY( tnl-mpi SHARED ${tnl_SOURCES} )
-   endif( BUILD_CUDA )
-
-   SET_TARGET_PROPERTIES( tnl-mpi PROPERTIES
-                          VERSION ${tnlVersion} )
-#   SET_TARGET_PROPERTIES( tnl-mpi
-#                          LINK_INTERFACE_LIBRARIES "")
-
-
-   TARGET_LINK_LIBRARIES( tnl-mpi
-                          ${MPI_LIBRARIES} )
-   INSTALL( TARGETS tnl-mpi DESTINATION lib )
-
-endif()
+# NOTE: this is not necessary until something in the library file actually depends on MPI
+#IF( BUILD_MPI )
+#
+#   ADD_LIBRARY( tnl-mpi_static STATIC ${tnl_SOURCES} )
+#   INSTALL( TARGETS tnl-mpi_static DESTINATION lib )
+#
+#   if( BUILD_CUDA )
+#      CUDA_ADD_LIBRARY( tnl-mpi SHARED ${tnl_CUDA__SOURCES}
+#                        OPTIONS ${CUDA_ADD_LIBRARY_OPTIONS} )
+#      # the static library with CUDA support has to be built separately
+#      CUDA_ADD_LIBRARY( tnl-mpi-cuda_static STATIC ${tnl_CUDA__SOURCES} )
+#      INSTALL( TARGETS tnl-mpi-cuda_static DESTINATION lib )
+#   else( BUILD_CUDA )
+#      ADD_LIBRARY( tnl-mpi SHARED ${tnl_SOURCES} )
+#   endif( BUILD_CUDA )
+#
+#   SET_TARGET_PROPERTIES( tnl-mpi PROPERTIES
+#                          VERSION ${tnlVersion} )
+##   SET_TARGET_PROPERTIES( tnl-mpi
+##                          LINK_INTERFACE_LIBRARIES "")
+#
+#
+#   TARGET_LINK_LIBRARIES( tnl-mpi
+#                          ${MPI_LIBRARIES} )
+#   INSTALL( TARGETS tnl-mpi DESTINATION lib )
+#
+#endif()
 
 INSTALL( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY} )
diff --git a/src/TNL/Communicators/CMakeLists.txt b/src/TNL/Communicators/CMakeLists.txt
index 7a58eaa2a04efe42bc444b09e28b25c51fc5d7e4..fb3193b739c75ea41ddae9ecf664592e44821d79 100644
--- a/src/TNL/Communicators/CMakeLists.txt
+++ b/src/TNL/Communicators/CMakeLists.txt
@@ -1,6 +1,7 @@
 SET( headers MpiCommunicator.h
              MpiDefs.h             
              NoDistrCommunicator.h 
+             ScopedInitializer.h
     )
 
 INSTALL( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Communicators )
diff --git a/src/TNL/Communicators/MpiCommunicator.h b/src/TNL/Communicators/MpiCommunicator.h
index 1d34160d4d6ecaecbefb067d1838e4a83b29aeef..c233004a602f31ce8b7220b9983c9541f47f6331 100644
--- a/src/TNL/Communicators/MpiCommunicator.h
+++ b/src/TNL/Communicators/MpiCommunicator.h
@@ -16,15 +16,17 @@
 
 #ifdef HAVE_MPI
 #include <mpi.h>
-#include <mpi-ext.h>
+#ifdef OMPI_MAJOR_VERSION
+   // header specific to OpenMPI (needed for CUDA-aware detection)
+   #include <mpi-ext.h>
+#endif
 
 #ifdef HAVE_CUDA
     #include <TNL/Devices/Cuda.h>
 
     typedef struct __attribute__((__packed__))  {
-	    char name[MPI_MAX_PROCESSOR_NAME];
+       char name[MPI_MAX_PROCESSOR_NAME];
     } procName;
-
 #endif
 
 #endif
@@ -58,7 +60,13 @@ class MpiCommunicator
       inline static MPI_Datatype MPIDataType( const double* ) { return MPI_DOUBLE; };
       inline static MPI_Datatype MPIDataType( const long double* ) { return MPI_LONG_DOUBLE; };
 
-      // TODO: How to deal with bool
+      // TODO: tested with MPI_LOR and MPI_LAND, but there should probably be unit tests for all operations
+      inline static MPI_Datatype MPIDataType( const bool* )
+      {
+         // sizeof(bool) is implementation-defined: https://stackoverflow.com/a/4897859
+         static_assert( sizeof(bool) == 1, "The programmer did not count with systems where sizeof(bool) != 1." );
+         return MPI_CHAR;
+      };
 
       using Request = MPI_Request;
       using CommunicationGroup = MPI_Comm;
@@ -69,7 +77,11 @@ class MpiCommunicator
 
       static bool isDistributed()
       {
+#ifdef HAVE_MPI
          return GetSize(AllGroup)>1;
+#else
+         return false;
+#endif
       }
 
       static void configSetup( Config::ConfigDescription& config, const String& prefix = "" )
@@ -88,7 +100,7 @@ class MpiCommunicator
          if(IsInitialized())//i.e. - isUsed
          {
             redirect = parameters.getParameter< bool >( "redirect-mpi-output" );
-            setupRedirection();                        
+            setupRedirection();
 #ifdef HAVE_CUDA
    #if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
             std::cout << "CUDA-aware MPI detected on this system ... " << std::endl;
@@ -100,13 +112,13 @@ class MpiCommunicator
    #endif
 #endif // HAVE_CUDA
             bool gdbDebug = parameters.getParameter< bool >( "mpi-gdb-debug" );
-            int processToAttach = parameters.getParameter< int >( "mpi-process-to-attach" );            
-    
+            int processToAttach = parameters.getParameter< int >( "mpi-process-to-attach" );
+
             if( gdbDebug )
             {
                int rank = GetRank( MPI_COMM_WORLD );
                int pid = getpid();
-                              
+
                volatile int tnlMPIDebugAttached = 0;
                MPI_Send( &pid, 1, MPI_INT, 0, 0, MPI_COMM_WORLD );
                MPI_Barrier( MPI_COMM_WORLD );
@@ -121,8 +133,8 @@ class MpiCommunicator
 
                      if( i == processToAttach || processToAttach == -1 )
                      {
-                        std::cout << "  For MPI process " << i << ": gdb -q -ex \"attach " << recvPid << "\"" 
-                                  << " -ex \"set variable tnlMPIDebugAttached=1\"" 
+                        std::cout << "  For MPI process " << i << ": gdb -q -ex \"attach " << recvPid << "\""
+                                  << " -ex \"set variable tnlMPIDebugAttached=1\""
                                   << " -ex \"finish\"" << std::endl;
                      }
                   }
@@ -137,14 +149,10 @@ class MpiCommunicator
          return true;
       }
 
-      static void Init(int argc, char **argv )
+      static void Init(int& argc, char**& argv )
       {
 #ifdef HAVE_MPI
          MPI_Init( &argc, &argv );
-         NullRequest=MPI_REQUEST_NULL;
-         AllGroup=MPI_COMM_WORLD;
-         redirect = true;
-
          selectGPU();
 #endif
       }
@@ -165,14 +173,15 @@ class MpiCommunicator
             //redirect output to files...
             if(GetRank(AllGroup)!=0)
             {
-               std::cout<< GetRank(AllGroup) <<": Redirecting std::out to file" <<std::endl;
-               String stdoutFile;
-               stdoutFile=String( "./stdout-")+convertToString(GetRank(MPI_COMM_WORLD))+String(".txt");
-               filestr.open (stdoutFile.getString()); 
-               psbuf = filestr.rdbuf(); 
+               std::cout << GetRank(AllGroup) << ": Redirecting std::cout to file" << std::endl;
+               const String stdoutFile = String("./stdout-") + convertToString(GetRank(AllGroup)) + String(".txt");
+               filestr.open(stdoutFile.getString());
+               psbuf = filestr.rdbuf();
                std::cout.rdbuf(psbuf);
             }
          }
+#else
+         throw Exceptions::MPISupportMissing();
 #endif
       }
 
@@ -194,12 +203,12 @@ class MpiCommunicator
       static bool IsInitialized()
       {
 #ifdef HAVE_MPI
-         int inicialized, finalized;
-         MPI_Initialized(&inicialized);
+         int initialized, finalized;
+         MPI_Initialized(&initialized);
          MPI_Finalized(&finalized);
-         return inicialized && !finalized;
+         return initialized && !finalized;
 #else
-        return false;
+         throw Exceptions::MPISupportMissing();
 #endif
       }
 
@@ -207,23 +216,25 @@ class MpiCommunicator
       {
 #ifdef HAVE_MPI
         TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
+        TNL_ASSERT_NE(group, NullGroup, "GetRank cannot be called with NullGroup");
         int rank;
         MPI_Comm_rank(group,&rank);
         return rank;
 #else
-        return 1;
+         throw Exceptions::MPISupportMissing();
 #endif
       }
 
       static int GetSize(CommunicationGroup group)
       {
 #ifdef HAVE_MPI
-        TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
-        int size;
-        MPI_Comm_size(group,&size);
-        return size;
+         TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
+         TNL_ASSERT_NE(group, NullGroup, "GetSize cannot be called with NullGroup");
+         int size;
+         MPI_Comm_size(group,&size);
+         return size;
 #else
-        return 1;
+         throw Exceptions::MPISupportMissing();
 #endif
       }
 
@@ -248,26 +259,30 @@ class MpiCommunicator
             /***END OF HACK***/
 
             MPI_Dims_create(nproc, dim, distr);
+#else
+            throw Exceptions::MPISupportMissing();
 #endif
         }
 
-         static void Barrier(CommunicationGroup comm)
+         static void Barrier(CommunicationGroup group)
          {
 #ifdef HAVE_MPI
-            TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not inicialized");
-            MPI_Barrier(comm);
+            TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
+            TNL_ASSERT_NE(group, NullGroup, "Barrier cannot be called with NullGroup");
+            MPI_Barrier(group);
 #else
             throw Exceptions::MPISupportMissing();
 #endif
         }
 
          template <typename T>
-         static Request ISend( const T *data, int count, int dest, CommunicationGroup group)
+         static Request ISend( const T* data, int count, int dest, CommunicationGroup group)
          {
 #ifdef HAVE_MPI
-            TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not inicialized");
+            TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
+            TNL_ASSERT_NE(group, NullGroup, "ISend cannot be called with NullGroup");
             Request req;
-            MPI_Isend((void*) data, count, MPIDataType(data) , dest, 0, group, &req);
+            MPI_Isend((const void*) data, count, MPIDataType(data) , dest, 0, group, &req);
             return req;
 #else
             throw Exceptions::MPISupportMissing();
@@ -275,10 +290,11 @@ class MpiCommunicator
         }
 
          template <typename T>
-         static Request IRecv( const T *data, int count, int src, CommunicationGroup group)
+         static Request IRecv( T* data, int count, int src, CommunicationGroup group)
          {
 #ifdef HAVE_MPI
-            TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not inicialized");
+            TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
+            TNL_ASSERT_NE(group, NullGroup, "IRecv cannot be called with NullGroup");
             Request req;
             MPI_Irecv((void*) data, count, MPIDataType(data) , src, 0, group, &req);
             return req;
@@ -290,33 +306,35 @@ class MpiCommunicator
          static void WaitAll(Request *reqs, int length)
          {
 #ifdef HAVE_MPI
-            TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not inicialized");
+            TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
             MPI_Waitall(length, reqs, MPI_STATUSES_IGNORE);
 #else
             throw Exceptions::MPISupportMissing();
 #endif
         }
 
-        template< typename T > 
+        template< typename T >
         static void Bcast(  T& data, int count, int root,CommunicationGroup group)
         {
 #ifdef HAVE_MPI
-        TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not inicialized");
-        MPI_Bcast((void*) &data, count,  MPIDataType(data), root, group);
+           TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized");
+           TNL_ASSERT_NE(group, NullGroup, "BCast cannot be called with NullGroup");
+           MPI_Bcast((void*) &data, count,  MPIDataType(data), root, group);
 #else
-        throw Exceptions::MPISupportMissing();
+           throw Exceptions::MPISupportMissing();
 #endif
         }
 
         template< typename T >
-        static void Allreduce( T* data,
+        static void Allreduce( const T* data,
                                T* reduced_data,
                                int count,
                                const MPI_Op &op,
                                CommunicationGroup group)
         {
 #ifdef HAVE_MPI
-            MPI_Allreduce( (void*) data, (void*) reduced_data,count,MPIDataType(data),op,group);
+            TNL_ASSERT_NE(group, NullGroup, "Allreduce cannot be called with NullGroup");
+            MPI_Allreduce( (const void*) data, (void*) reduced_data,count,MPIDataType(data),op,group);
 #else
             throw Exceptions::MPISupportMissing();
 #endif
@@ -324,7 +342,7 @@ class MpiCommunicator
 
 
          template< typename T >
-         static void Reduce( T* data,
+         static void Reduce( const T* data,
                     T* reduced_data,
                     int count,
                     MPI_Op &op,
@@ -332,14 +350,15 @@ class MpiCommunicator
                     CommunicationGroup group)
          {
 #ifdef HAVE_MPI
-            MPI_Reduce( (void*) data, (void*) reduced_data,count,MPIDataType(data),op,root,group);
+            TNL_ASSERT_NE(group, NullGroup, "Reduce cannot be called with NullGroup");
+            MPI_Reduce( (const void*) data, (void*) reduced_data,count,MPIDataType(data),op,root,group);
 #else
             throw Exceptions::MPISupportMissing();
 #endif
         }
-         
+
          template< typename T >
-         static void SendReceive( T* sendData,
+         static void SendReceive( const T* sendData,
                                   int sendCount,
                                   int destination,
                                   int sendTag,
@@ -350,8 +369,9 @@ class MpiCommunicator
                                   CommunicationGroup group )
          {
 #ifdef HAVE_MPI
+            TNL_ASSERT_NE(group, NullGroup, "SendReceive cannot be called with NullGroup");
             MPI_Status status;
-            MPI_Sendrecv( ( void* ) sendData,
+            MPI_Sendrecv( ( const void* ) sendData,
                           sendCount,
                           MPIDataType( sendData ),
                           destination,
@@ -365,11 +385,32 @@ class MpiCommunicator
                           &status );
 #else
             throw Exceptions::MPISupportMissing();
-#endif            
+#endif
+         }
+
+         template< typename T >
+         static void Alltoall( const T* sendData,
+                               int sendCount,
+                               T* receiveData,
+                               int receiveCount,
+                               CommunicationGroup group )
+         {
+#ifdef HAVE_MPI
+            TNL_ASSERT_NE(group, NullGroup, "SendReceive cannot be called with NullGroup");
+            MPI_Alltoall( ( const void* ) sendData,
+                          sendCount,
+                          MPIDataType( sendData ),
+                          ( void* ) receiveData,
+                          receiveCount,
+                          MPIDataType( receiveData ),
+                          group );
+#else
+            throw Exceptions::MPISupportMissing();
+#endif
          }
 
 
-      static void writeProlog( Logger& logger ) 
+      static void writeProlog( Logger& logger )
       {
          if( isDistributed() )
          {
@@ -389,32 +430,32 @@ class MpiCommunicator
             MPI_Comm_split(oldGroup, MPI_UNDEFINED, GetRank(oldGroup), &newGroup);
         }
 #else
-        newGroup=oldGroup;
-#endif         
+         throw Exceptions::MPISupportMissing();
+#endif
       }
 
 #ifdef HAVE_MPI
       static MPI_Request NullRequest;
       static MPI_Comm AllGroup;
+      static MPI_Comm NullGroup;
 #else
-      static int NullRequest;
-      static int AllGroup;
+      static constexpr int NullRequest = -1;
+      static constexpr int AllGroup = 1;
+      static constexpr int NullGroup = 0;
 #endif
     private :
-      static std::streambuf *psbuf;
-      static std::streambuf *backup;
+      static std::streambuf* psbuf;
+      static std::streambuf* backup;
       static std::ofstream filestr;
       static bool redirect;
-      static bool inited;
 
       static void selectGPU(void)
       {
 #ifdef HAVE_MPI
     #ifdef HAVE_CUDA
-        	int count,rank, gpuCount, gpuNumber;
-         MPI_Comm_size(MPI_COMM_WORLD,&count);
-         MPI_Comm_rank(MPI_COMM_WORLD,&rank);
-
+         const int count = GetSize(AllGroup);
+         const int rank = GetRank(AllGroup);
+         int gpuCount;
          cudaGetDeviceCount(&gpuCount);
 
          procName names[count];
@@ -437,7 +478,7 @@ class MpiCommunicator
                nodeRank++;
          }
 
-         gpuNumber=nodeRank % gpuCount;
+         const int gpuNumber = nodeRank % gpuCount;
 
          cudaSetDevice(gpuNumber);
          TNL_CHECK_CUDA_DEVICE;
@@ -452,23 +493,20 @@ class MpiCommunicator
 };
 
 #ifdef HAVE_MPI
-MPI_Request MpiCommunicator::NullRequest;
-MPI_Comm MpiCommunicator::AllGroup;
-#else
-int MpiCommunicator::NullRequest;
-int MpiCommunicator::AllGroup;
+MPI_Request MpiCommunicator::NullRequest = MPI_REQUEST_NULL;
+MPI_Comm MpiCommunicator::AllGroup = MPI_COMM_WORLD;
+MPI_Comm MpiCommunicator::NullGroup = MPI_COMM_NULL;
 #endif
-std::streambuf *MpiCommunicator::psbuf;
-std::streambuf *MpiCommunicator::backup;
+std::streambuf* MpiCommunicator::psbuf = nullptr;
+std::streambuf* MpiCommunicator::backup = nullptr;
 std::ofstream MpiCommunicator::filestr;
-bool MpiCommunicator::redirect;
-bool MpiCommunicator::inited;
+bool MpiCommunicator::redirect = true;
 
 #ifdef HAVE_MPI
+// TODO: this duplicates MpiCommunicator::MPIDataType
 template<typename Type>
-class MPITypeResolver
+struct MPITypeResolver
 {
-    public:
     static inline MPI_Datatype getType()
     {
         TNL_ASSERT_TRUE(false, "Fatal Error - Unknown MPI Type");
@@ -476,54 +514,54 @@ class MPITypeResolver
     };
 };
 
-template<> class MPITypeResolver<char>
+template<> struct MPITypeResolver<char>
 {
-    public:static inline MPI_Datatype getType(){return MPI_CHAR;};
+    static inline MPI_Datatype getType(){return MPI_CHAR;};
 };
 
-template<> class MPITypeResolver<short int>
+template<> struct MPITypeResolver<short int>
 {
-    public:static inline MPI_Datatype getType(){return MPI_SHORT;};
+    static inline MPI_Datatype getType(){return MPI_SHORT;};
 };
 
-template<> class MPITypeResolver<long int>
+template<> struct MPITypeResolver<long int>
 {
-    public:static inline MPI_Datatype getType(){return MPI_LONG;};
+    static inline MPI_Datatype getType(){return MPI_LONG;};
 };
 
-template<> class MPITypeResolver<unsigned char>
+template<> struct MPITypeResolver<unsigned char>
 {
-    public:static inline MPI_Datatype getType(){return MPI_UNSIGNED_CHAR;};
+    static inline MPI_Datatype getType(){return MPI_UNSIGNED_CHAR;};
 };
 
-template<> class MPITypeResolver<unsigned short int>
+template<> struct MPITypeResolver<unsigned short int>
 {
-    public:static inline MPI_Datatype getType(){return MPI_UNSIGNED_SHORT;};
+    static inline MPI_Datatype getType(){return MPI_UNSIGNED_SHORT;};
 };
 
-template<> class MPITypeResolver<unsigned int>
+template<> struct MPITypeResolver<unsigned int>
 {
-    public:static inline MPI_Datatype getType(){return MPI_UNSIGNED;};
+    static inline MPI_Datatype getType(){return MPI_UNSIGNED;};
 };
 
-template<> class MPITypeResolver<unsigned long int>
+template<> struct MPITypeResolver<unsigned long int>
 {
-    public:static inline MPI_Datatype getType(){return MPI_UNSIGNED_LONG;};
+    static inline MPI_Datatype getType(){return MPI_UNSIGNED_LONG;};
 };
 
-template<> class MPITypeResolver<float>
+template<> struct MPITypeResolver<float>
 {
-    public:static inline MPI_Datatype getType(){return MPI_FLOAT;};
+    static inline MPI_Datatype getType(){return MPI_FLOAT;};
 };
 
-template<> class MPITypeResolver<double>
+template<> struct MPITypeResolver<double>
 {
-    public:static inline MPI_Datatype getType(){return MPI_DOUBLE;};
+    static inline MPI_Datatype getType(){return MPI_DOUBLE;};
 };
 
-template<> class MPITypeResolver<long double>
+template<> struct MPITypeResolver<long double>
 {
-    public:static inline MPI_Datatype getType(){return MPI_LONG_DOUBLE;};
+    static inline MPI_Datatype getType(){return MPI_LONG_DOUBLE;};
 };
 #endif
 
@@ -540,6 +578,6 @@ for( int j = 0; j < TNL::Communicators::MpiCommunicator::GetSize( TNL::Communica
                    << TNL::Communicators::MpiCommunicator::GetSize( TNL::Communicators::MpiCommunicator::AllGroup )      \
                    << " : " << message << std::endl;                                                                     \
       }                                                                                                                  \
-      TNL::Communicators::MpiCommunicator::Barrier( Communicator::AllGroup );                                            \
+      TNL::Communicators::MpiCommunicator::Barrier( TNL::Communicators::MpiCommunicator::AllGroup );                     \
    }
 
diff --git a/src/TNL/Communicators/MpiDefs.h b/src/TNL/Communicators/MpiDefs.h
index 4202bbfcb4ab2feb8dd2bc74e1da6b8e362cfabf..957354b9d0ea911c4269154486af8e95f4a865a9 100644
--- a/src/TNL/Communicators/MpiDefs.h
+++ b/src/TNL/Communicators/MpiDefs.h
@@ -11,5 +11,18 @@
 #pragma once
 
 #ifndef HAVE_MPI
-enum MPI_Op { MPI_SUM, MPI_MAX };
-#endif
\ No newline at end of file
+enum MPI_Op {
+   MPI_MAX,
+   MPI_MIN,
+   MPI_SUM,
+   MPI_PROD,
+   MPI_LAND,
+   MPI_BAND,
+   MPI_LOR,
+   MPI_BOR,
+   MPI_LXOR,
+   MPI_BXOR,
+   MPI_MINLOC,
+   MPI_MAXLOC,
+};
+#endif
diff --git a/src/TNL/Communicators/NoDistrCommunicator.h b/src/TNL/Communicators/NoDistrCommunicator.h
index 9f0bf96187af7c0c096246144507d22f11af3bff..aac58b916bf17656e9d6c33bead7a4d37441fca7 100644
--- a/src/TNL/Communicators/NoDistrCommunicator.h
+++ b/src/TNL/Communicators/NoDistrCommunicator.h
@@ -13,10 +13,6 @@
 #include <TNL/Logger.h>
 #include <TNL/Communicators/MpiDefs.h>
 
-#ifdef HAVE_MPI
-#include <mpi.h>
-#endif
-
 namespace TNL {
 namespace Communicators {
 namespace {
@@ -29,8 +25,9 @@ class NoDistrCommunicator
 
       typedef int Request;
       typedef int CommunicationGroup;
-      static Request NullRequest;
-      static CommunicationGroup AllGroup;
+      static constexpr Request NullRequest = -1;
+      static constexpr CommunicationGroup AllGroup = 1;
+      static constexpr CommunicationGroup NullGroup = 0;
 
       static void configSetup( Config::ConfigDescription& config, const String& prefix = "" ){};
  
@@ -40,10 +37,7 @@ class NoDistrCommunicator
          return true;
       }
       
-      static void Init(int argc, char **argv, bool redirect=false)
-      {
-          NullRequest=-1;
-      }
+      static void Init(int& argc, char**& argv) {}
       
       static void setRedirection( bool redirect_ ) {}
       
@@ -105,13 +99,13 @@ class NoDistrCommunicator
       }
 
       template< typename T >
-      static void Allreduce( T* data,
+      static void Allreduce( const T* data,
                              T* reduced_data,
                              int count,
                              const MPI_Op &op,
                              CommunicationGroup group )
       {
-         memcpy( ( void* ) reduced_data, ( void* ) data, count * sizeof( T ) );
+         memcpy( ( void* ) reduced_data, ( const void* ) data, count * sizeof( T ) );
       }
 
       template< typename T >
@@ -125,6 +119,15 @@ class NoDistrCommunicator
          memcpy( ( void* ) reduced_data, ( void* ) data, count * sizeof( T ) );
       }
 
+      template< typename T >
+      static void Alltoall( const T* sendData,
+                            int sendCount,
+                            T* receiveData,
+                            int receiveCount,
+                            CommunicationGroup group )
+      {
+      }
+
       static void CreateNewGroup(bool meToo, int myRank, CommunicationGroup &oldGroup, CommunicationGroup &newGroup)
       {
          newGroup=oldGroup;
@@ -133,10 +136,6 @@ class NoDistrCommunicator
       static void writeProlog( Logger& logger ){};
 };
 
-
-  int NoDistrCommunicator::NullRequest;
-  int NoDistrCommunicator::AllGroup;
-
 } // namespace <unnamed>
 } // namespace Communicators
 } // namespace TNL
diff --git a/src/TNL/Communicators/ScopedInitializer.h b/src/TNL/Communicators/ScopedInitializer.h
new file mode 100644
index 0000000000000000000000000000000000000000..2970bc628319bdf9d4c40d7a2cb32694a8148f7d
--- /dev/null
+++ b/src/TNL/Communicators/ScopedInitializer.h
@@ -0,0 +1,33 @@
+/***************************************************************************
+                          ScopedInitializer.h  -  description
+                             -------------------
+    begin                : Sep 16, 2018
+    copyright            : (C) 2005 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// Implemented by: Jakub Klinkovský
+
+#pragma once
+
+namespace TNL {
+namespace Communicators {
+
+template< typename Communicator >
+struct ScopedInitializer
+{
+   ScopedInitializer( int& argc, char**& argv )
+   {
+      Communicator::Init( argc, argv );
+   }
+
+   ~ScopedInitializer()
+   {
+      Communicator::Finalize();
+   }
+};
+
+} // namespace Communicators
+} // namespace TNL
diff --git a/src/TNL/Constants.h b/src/TNL/Constants.h
deleted file mode 100644
index 09a175489cbffc5e800ddfffd37dc967050d0d1c..0000000000000000000000000000000000000000
--- a/src/TNL/Constants.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/***************************************************************************
-                           tnlConstants.h -  description
-                             -------------------
-    begin                : June 17, 2015
-    copyright            : (C) 2015 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <limits.h>
-#include <float.h>
-#include <TNL/Assert.h>
-#include <TNL/Devices/Cuda.h>
-
-namespace TNL {
-
-template< typename T > constexpr T MinValue() { return T();};
-template<> constexpr char               MinValue< char >() { return CHAR_MIN; }
-template<> constexpr unsigned char      MinValue< unsigned char >() { return 0; }
-template<> constexpr short int          MinValue< short int >() { return SHRT_MIN; }
-template<> constexpr unsigned short int MinValue< unsigned short int >() { return 0; }
-template<> constexpr int                MinValue< int >() { return INT_MIN; }
-template<> constexpr unsigned int       MinValue< unsigned int >() { return 0; }
-template<> constexpr long int           MinValue< long int >() { return LONG_MIN; }
-template<> constexpr unsigned long int  MinValue< unsigned long int >() { return 0; }
-template<> constexpr float              MinValue< float >() { return -FLT_MAX; }
-template<> constexpr double             MinValue< double >() { return -DBL_MAX; }
-template<> constexpr long double        MinValue< long double >() { return -LDBL_MAX; }
-
-template< typename T > constexpr T MaxValue() { return T();};
-template<> constexpr char               MaxValue< char >() { return CHAR_MAX; }
-template<> constexpr unsigned char      MaxValue< unsigned char >() { return UCHAR_MAX; }
-template<> constexpr short int          MaxValue< short int >() { return SHRT_MAX; }
-template<> constexpr unsigned short int MaxValue< unsigned short int >() { return USHRT_MAX; }
-template<> constexpr int                MaxValue< int >() { return INT_MAX; }
-template<> constexpr unsigned int       MaxValue< unsigned int >() { return UINT_MAX; }
-template<> constexpr long int           MaxValue< long int >() { return LONG_MAX; }
-template<> constexpr unsigned long int  MaxValue< unsigned long int >() { return ULONG_MAX; }
-template<> constexpr float              MaxValue< float >() { return FLT_MAX; }
-template<> constexpr double             MaxValue< double >() { return DBL_MAX; }
-template<> constexpr long double        MaxValue< long double >() { return LDBL_MAX; }
-
-} // namespace TNL
-
diff --git a/src/TNL/Containers/Algorithms/ArrayOperations.h b/src/TNL/Containers/Algorithms/ArrayOperations.h
index c4e4e31d6d9ab0a993029ea447c0c4f475004c20..ad852e10f78e5b4e08d7cf66abf7071f33e5e73e 100644
--- a/src/TNL/Containers/Algorithms/ArrayOperations.h
+++ b/src/TNL/Containers/Algorithms/ArrayOperations.h
@@ -39,7 +39,7 @@ class ArrayOperations< Devices::Host >
                                     const Element& value );
 
       template< typename Element >
-      static Element getMemoryElement( Element* data );
+      static Element getMemoryElement( const Element* data );
 
       template< typename Element, typename Index >
       static bool setMemory( Element* data,
@@ -81,17 +81,15 @@ class ArrayOperations< Devices::Cuda >
       template< typename Element, typename Index >
       static void allocateMemory( Element*& data,
                                   const Index size );
-      
+
       template< typename Element >
       static void freeMemory( Element* data );
 
       template< typename Element >
-      __cuda_callable__
       static void setMemoryElement( Element* data,
                                     const Element& value );
 
       template< typename Element >
-      __cuda_callable__
       static Element getMemoryElement( const Element* data );
 
       template< typename Element, typename Index >
diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h b/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h
index bb5b6c76e8b94b524652d4b811cff937a61de6fa..bca6bdb0479eb38a329234f92305421873864dc5 100644
--- a/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h
+++ b/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h
@@ -60,32 +60,24 @@ freeMemory( Element* data )
 }
 
 template< typename Element >
-__cuda_callable__ void
+void
 ArrayOperations< Devices::Cuda >::
 setMemoryElement( Element* data,
                   const Element& value )
 {
    TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
-#ifdef __CUDAARCH__
-   *data = value;
-#else   
    ArrayOperations< Devices::Cuda >::setMemory( data, value, 1 );
-#endif   
 }
 
 template< typename Element >
-__cuda_callable__ Element
+Element
 ArrayOperations< Devices::Cuda >::
 getMemoryElement( const Element* data )
 {
    TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." );
-#ifdef __CUDAARCH__
-   return *data;
-#else   
    Element result;
    ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< Element, Element, int >( &result, data, 1 );
    return result;
-#endif   
 }
 
 
@@ -209,10 +201,9 @@ containsValue( const Element* data,
    TNL_ASSERT_GE( size, 0, "" );
    if( size == 0 ) return false;
    bool result = false;
-   using Operation = Algorithms::ParallelReductionContainsValue< Element >;
-   Operation reductionContainsValue;
+   Algorithms::ParallelReductionContainsValue< Element > reductionContainsValue;
    reductionContainsValue.setValue( value );
-   Reduction< Devices::Cuda >::template reduce< Operation, Index >( reductionContainsValue, size, data, 0, result );
+   Reduction< Devices::Cuda >::reduce( reductionContainsValue, size, data, 0, result );
    return result;
 }
 
@@ -228,10 +219,9 @@ containsOnlyValue( const Element* data,
    TNL_ASSERT_GE( size, 0, "" );
    if( size == 0 ) return false;
    bool result = false;
-   using Operation = Algorithms::ParallelReductionContainsOnlyValue< Element >;
-   Operation reductionContainsOnlyValue;
+   Algorithms::ParallelReductionContainsOnlyValue< Element > reductionContainsOnlyValue;
    reductionContainsOnlyValue.setValue( value );
-   Reduction< Devices::Cuda >::template reduce< Operation, Index >( reductionContainsOnlyValue, size, data, 0, result );
+   Reduction< Devices::Cuda >::reduce( reductionContainsOnlyValue, size, data, 0, result );
    return result;
 }
 
diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsHost_impl.h b/src/TNL/Containers/Algorithms/ArrayOperationsHost_impl.h
index f2141ae9ef75acf8ad740579ec1918666526491c..756731ca9fe645948aa7c9f8e1634e16ebba9a17 100644
--- a/src/TNL/Containers/Algorithms/ArrayOperationsHost_impl.h
+++ b/src/TNL/Containers/Algorithms/ArrayOperationsHost_impl.h
@@ -51,15 +51,15 @@ setMemoryElement( Element* data,
                   const Element& value )
 {
    *data = value;
-};
+}
 
 template< typename Element >
 Element
 ArrayOperations< Devices::Host >::
-getMemoryElement( Element* data )
+getMemoryElement( const Element* data )
 {
    return *data;
-};
+}
 
 template< typename Element, typename Index >
 bool
diff --git a/src/TNL/Containers/Algorithms/CMakeLists.txt b/src/TNL/Containers/Algorithms/CMakeLists.txt
index a8c9bf7699f129c21c4930df090af5c7d5910325..c63837351076bdf4996016d685abf4fdb163033b 100644
--- a/src/TNL/Containers/Algorithms/CMakeLists.txt
+++ b/src/TNL/Containers/Algorithms/CMakeLists.txt
@@ -1,5 +1,3 @@
-ADD_SUBDIRECTORY( TemplateExplicitInstantiation )
-
 set( headers ArrayOperations.h
              ArrayOperationsHost_impl.h
              ArrayOperationsCuda_impl.h
diff --git a/src/TNL/Containers/Algorithms/ReductionOperations.h b/src/TNL/Containers/Algorithms/ReductionOperations.h
index d3e58f55eaaf997a33ab8f86725486eae71770cc..c6be17ed9ba270bf2beaab883ff21473de8bb9a9 100644
--- a/src/TNL/Containers/Algorithms/ReductionOperations.h
+++ b/src/TNL/Containers/Algorithms/ReductionOperations.h
@@ -10,7 +10,8 @@
 
 #pragma once
 
-#include <TNL/Constants.h>
+#include <limits>  // std::numeric_limits
+
 #include <TNL/Math.h>
 #include <TNL/Devices/CudaCallable.h>
 
@@ -31,7 +32,7 @@ public:
    using ResultType = Result;
    using LaterReductionOperation = ParallelReductionSum< Result >;
 
-   __cuda_callable__ Result initialValue() { return 0; };
+   static constexpr Result initialValue() { return 0; };
 
    template< typename Index >
    __cuda_callable__ void
@@ -67,7 +68,7 @@ public:
    using ResultType = Result;
    using LaterReductionOperation = ParallelReductionMin< Result >;
 
-   __cuda_callable__ Result initialValue() { return MaxValue< Result >(); };
+   static constexpr Result initialValue() { return std::numeric_limits< Result >::max(); };
 
    template< typename Index >
    __cuda_callable__ void
@@ -103,7 +104,7 @@ public:
    using ResultType = Result;
    using LaterReductionOperation = ParallelReductionMax< Result >;
 
-   __cuda_callable__ Result initialValue() { return MinValue< Result>(); };
+   static constexpr Result initialValue() { return std::numeric_limits< Result >::lowest(); };
 
    template< typename Index >
    __cuda_callable__ void
@@ -139,7 +140,7 @@ public:
    using ResultType = Result;
    using LaterReductionOperation = ParallelReductionLogicalAnd< Result >;
 
-   __cuda_callable__ Result initialValue() { return ( Result ) true; };
+   static constexpr Result initialValue() { return true; };
 
    template< typename Index >
    __cuda_callable__ void
@@ -176,7 +177,7 @@ public:
    using ResultType = Result;
    using LaterReductionOperation = ParallelReductionLogicalOr< Result >;
 
-   __cuda_callable__ Result initialValue() { return ( Result ) false; };
+   static constexpr Result initialValue() { return false; };
 
    template< typename Index >
    __cuda_callable__ void
@@ -212,7 +213,7 @@ public:
    using ResultType = Result;
    using LaterReductionOperation = ParallelReductionSum< Result >;
 
-   __cuda_callable__ Result initialValue() { return ( Result ) 0; };
+   static constexpr Result initialValue() { return 0; };
 
    template< typename Index >
    __cuda_callable__ void
@@ -234,7 +235,7 @@ public:
    using ResultType = Result;
    using LaterReductionOperation = ParallelReductionMin< Result >;
 
-   __cuda_callable__ Result initialValue() { return MaxValue< Result>(); };
+   static constexpr Result initialValue() { return std::numeric_limits< Result >::max(); };
 
    template< typename Index >
    __cuda_callable__ void
@@ -256,7 +257,7 @@ public:
    using ResultType = Result;
    using LaterReductionOperation = ParallelReductionMax< Result >;
 
-   __cuda_callable__ Result initialValue() { return ( Result ) 0; };
+   static constexpr Result initialValue() { return 0; };
 
    template< typename Index >
    __cuda_callable__ void
@@ -278,7 +279,7 @@ public:
    using ResultType = Result;
    using LaterReductionOperation = ParallelReductionSum< Result >;
 
-   __cuda_callable__ Result initialValue() { return ( Result ) 0; };
+   static constexpr Result initialValue() { return 0; };
 
    template< typename Index >
    __cuda_callable__ void
@@ -307,7 +308,7 @@ public:
       this->p = p;
    }
 
-   __cuda_callable__ Result initialValue() { return ( Result ) 0; };
+   static constexpr Result initialValue() { return 0; };
 
    template< typename Index >
    __cuda_callable__ void
@@ -337,7 +338,7 @@ public:
    using ResultType = Result;
    using LaterReductionOperation = ParallelReductionLogicalAnd< Result >;
 
-   __cuda_callable__ Result initialValue() { return ( Result ) true; };
+   static constexpr Result initialValue() { return true; };
 
    template< typename Index >
    __cuda_callable__ void
@@ -359,7 +360,7 @@ public:
    using ResultType = Result;
    using LaterReductionOperation = ParallelReductionLogicalAnd< Result >;
 
-   __cuda_callable__ Result initialValue() { return ( Result ) false; };
+   static constexpr Result initialValue() { return false; };
 
    template< typename Index >
    __cuda_callable__ void
@@ -381,7 +382,7 @@ public:
    using ResultType = Result;
    using LaterReductionOperation = ParallelReductionSum< Result >;
 
-   __cuda_callable__ Result initialValue() { return ( Result ) 0; };
+   static constexpr Result initialValue() { return 0; };
 
    template< typename Index >
    __cuda_callable__ void
@@ -403,7 +404,7 @@ public:
    using ResultType = Result;
    using LaterReductionOperation = ParallelReductionSum< Result >;
 
-   __cuda_callable__ Result initialValue() { return ( Result ) 0; };
+   static constexpr Result initialValue() { return 0; };
 
    template< typename Index >
    __cuda_callable__ void
@@ -425,7 +426,7 @@ public:
    using ResultType = Result;
    using LaterReductionOperation = ParallelReductionMin< Result >;
 
-   __cuda_callable__ Result initialValue() { return MaxValue< Result>(); };
+   static constexpr Result initialValue() { return std::numeric_limits< Result >::max(); };
 
    template< typename Index >
    __cuda_callable__ void
@@ -447,7 +448,7 @@ public:
    using ResultType = Result;
    using LaterReductionOperation = ParallelReductionMax< Result >;
 
-   __cuda_callable__ Result initialValue() { return ( Result ) 0; };
+   static constexpr Result initialValue() { return std::numeric_limits< Result >::lowest(); };
 
    template< typename Index >
    __cuda_callable__ void
@@ -469,7 +470,7 @@ public:
    using ResultType = Result;
    using LaterReductionOperation = ParallelReductionSum< Result >;
 
-   __cuda_callable__ Result initialValue() { return ( Result ) 0; };
+   static constexpr Result initialValue() { return 0; };
 
    template< typename Index >
    __cuda_callable__ void
@@ -491,7 +492,7 @@ public:
    using ResultType = Result;
    using LaterReductionOperation = ParallelReductionMin< Result >;
 
-   __cuda_callable__ Result initialValue() { return MaxValue< Result>(); };
+   static constexpr Result initialValue() { return std::numeric_limits< Result >::max(); };
 
    template< typename Index >
    __cuda_callable__ void
@@ -513,7 +514,7 @@ public:
    using ResultType = Result;
    using LaterReductionOperation = ParallelReductionMax< Result >;
 
-   __cuda_callable__ Result initialValue() { return ( Result ) 0; };
+   static constexpr Result initialValue() { return 0; };
 
    template< typename Index >
    __cuda_callable__ void
@@ -535,7 +536,7 @@ public:
    using ResultType = Result;
    using LaterReductionOperation = ParallelReductionSum< Result >;
 
-   __cuda_callable__ Result initialValue() { return ( Result ) 0; };
+   static constexpr Result initialValue() { return 0; };
 
    template< typename Index >
    __cuda_callable__ void
@@ -563,7 +564,7 @@ public:
       this->p = p;
    }
 
-   __cuda_callable__ Result initialValue() { return ( Result ) 0; };
+   static constexpr Result initialValue() { return 0; };
 
    template< typename Index >
    __cuda_callable__ void
diff --git a/src/TNL/Containers/Algorithms/Reduction_impl.h b/src/TNL/Containers/Algorithms/Reduction_impl.h
index 121c005d8965ad81462df6321ef88c528f217e4c..9ebfce43f487759df2e55c7fe3224f2a9e95aa30 100644
--- a/src/TNL/Containers/Algorithms/Reduction_impl.h
+++ b/src/TNL/Containers/Algorithms/Reduction_impl.h
@@ -69,12 +69,12 @@ reduce( Operation& operation,
     */
    if( can_reduce_all_on_host && size <= Reduction_minGpuDataSize )
    {
-      DataType1 hostArray1[ Reduction_minGpuDataSize ];
+      typename std::remove_const< DataType1 >::type hostArray1[ Reduction_minGpuDataSize ];
       if( ! ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray1, deviceInput1, size ) )
          return false;
       if( deviceInput2 ) {
          using _DT2 = typename std::conditional< std::is_same< DataType2, void >::value, DataType1, DataType2 >::type;
-         _DT2 hostArray2[ Reduction_minGpuDataSize ];
+         typename std::remove_const< _DT2 >::type hostArray2[ Reduction_minGpuDataSize ];
          if( ! ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray2, (_DT2*) deviceInput2, size ) )
             return false;
          return Reduction< Devices::Host >::reduce( operation, size, hostArray1, hostArray2, result );
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsCuda_impl.cpp b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsCuda_impl.cpp
deleted file mode 100644
index e3225d4568d1d209abc314816d4c9b88cfb60e62..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsCuda_impl.cpp
+++ /dev/null
@@ -1,248 +0,0 @@
-/***************************************************************************
-                          ArrayOperationsCuda_impl.cpp  -  description
-                             -------------------
-    begin                : Jul 16, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
-
-namespace TNL {
-namespace Containers {    
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< char,        int >( char*& data, const int size );
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< int,         int >( int*& data, const int size );
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< long int,    int >( long int*& data, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< float,       int >( float*& data, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< double,      int >( double*& data, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< long double, int >( long double*& data, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< char,        long int >( char*& data, const long int size );
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< int,         long int >( int*& data, const long int size );
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< long int,    long int >( long int*& data, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< float,       long int >( float*& data, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< double,      long int >( double*& data, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< long double, long int >( long double*& data, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Cuda >::freeMemory< char        >( char* data );
-template bool ArrayOperations< Devices::Cuda >::freeMemory< int         >( int* data );
-template bool ArrayOperations< Devices::Cuda >::freeMemory< long int    >( long int* data );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::freeMemory< float       >( float* data );
-#endif
-template bool ArrayOperations< Devices::Cuda >::freeMemory< double      >( double* data );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::freeMemory< long double >( long double* data );
-#endif
-
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< char        >( char* data, const char& value );
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< int         >( int* data, const int& value );
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< long int    >( long int* data, const long int& value );
-#ifdef INSTANTIATE_FLOAT
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< float       >( float* data, const float& value );
-#endif
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< double      >( double* data, const double& value );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< long double >( long double* data, const long double& value );
-#endif
-
-template char        ArrayOperations< Devices::Cuda >::getMemoryElement< char        >( const char* data );
-template int         ArrayOperations< Devices::Cuda >::getMemoryElement< int         >( const int* data );
-template long int    ArrayOperations< Devices::Cuda >::getMemoryElement< long int    >( const long int* data );
-#ifdef INSTANTIATE_FLOAT
-template float       ArrayOperations< Devices::Cuda >::getMemoryElement< float       >( const float* data );
-#endif
-template double      ArrayOperations< Devices::Cuda >::getMemoryElement< double      >( const double* data );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double ArrayOperations< Devices::Cuda >::getMemoryElement< long double >( const long double* data );
-#endif
-
-template bool ArrayOperations< Devices::Cuda >::copyMemory< char,               char, int >( char* destination, const char* source, const int size );
-template bool ArrayOperations< Devices::Cuda >::copyMemory< int,                 int, int >( int* destination, const int* source, const int size );
-template bool ArrayOperations< Devices::Cuda >::copyMemory< long int,       long int, int >( long int* destination, const long int* source, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::copyMemory< float,             float, int >( float* destination, const float* source, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::copyMemory< double,           double, int >( double* destination, const double* source, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda >::copyMemory< char,               char, long int >( char* destination, const char* source, const long int size );
-template bool ArrayOperations< Devices::Cuda >::copyMemory< int,                 int, long int >( int* destination, const int* source, const long int size );
-template bool ArrayOperations< Devices::Cuda >::copyMemory< long int,       long int, long int >( long int* destination, const long int* source, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::copyMemory< float,             float, long int >( float* destination, const float* source, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::copyMemory< double,           double, long int >( double* destination, const double* source, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< char,               char, int >( char* destination, const char* source, const int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< int,                 int, int >( int* destination, const int* source, const int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long int,       long int, int >( long int* destination, const long int* source, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< float,             float, int >( float* destination, const float* source, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< double,           double, int >( double* destination, const double* source, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< char,               char, long int >( char* destination, const char* source, const long int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< int,                 int, long int >( int* destination, const int* source, const long int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long int,       long int, long int >( long int* destination, const long int* source, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< float,             float, long int >( float* destination, const float* source, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< double,           double, long int >( double* destination, const double* source, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< char,               char, int >( char* destination, const char* source, const int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< int,                 int, int >( int* destination, const int* source, const int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long int,       long int, int >( long int* destination, const long int* source, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< float,             float, int >( float* destination, const float* source, const int size );
-#endif
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< double,           double, int >( double* destination, const double* source, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< char,               char, long int >( char* destination, const char* source, const long int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< int,                 int, long int >( int* destination, const int* source, const long int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long int,       long int, long int >( long int* destination, const long int* source, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< float,             float, long int >( float* destination, const float* source, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< double,           double, long int >( double* destination, const double* source, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Cuda >::compareMemory< char,               char, int >( const char* data1, const char* data2, const int size );
-template bool ArrayOperations< Devices::Cuda >::compareMemory< int,                 int, int >( const int* data1, const int* data2, const int size );
-template bool ArrayOperations< Devices::Cuda >::compareMemory< long int,       long int, int >( const long int* data1, const long int* data2, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::compareMemory< float,             float, int >( const float* data1, const float* data2, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::compareMemory< double,           double, int >( const double* data1, const double* data2, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda >::compareMemory< char,               char, long int >( const char* data1, const char* data2, const long int size );
-template bool ArrayOperations< Devices::Cuda >::compareMemory< int,                 int, long int >( const int* data1, const int* data2, const long int size );
-template bool ArrayOperations< Devices::Cuda >::compareMemory< long int,       long int, long int >( const long int* data1, const long int* data2, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::compareMemory< float,             float, long int >( const float* data1, const float* data2, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::compareMemory< double,           double, long int >( const double* data1, const double* data2, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< char,               char, int >( const char* data1, const char* data2, const int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< int,                 int, int >( const int* data1, const int* data2, const int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long int,       long int, int >( const long int* data1, const long int* data2, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< float,             float, int >( const float* data1, const float* data2, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< double,           double, int >( const double* data1, const double* data2, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< char,               char, long int >( const char* data1, const char* data2, const long int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< int,                 int, long int >( const int* data1, const int* data2, const long int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long int,       long int, long int >( const long int* data1, const long int* data2, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< float,             float, long int >( const float* data1, const float* data2, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< double,           double, long int >( const double* data1, const double* data2, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< char,               char, int >( const char* data1, const char* data2, const int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< int,                 int, int >( const int* data1, const int* data2, const int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long int,       long int, int >( const long int* data1, const long int* data2, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< float,             float, int >( const float* data1, const float* data2, const int size );
-#endif
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< double,           double, int >( const double* data1, const double* data2, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< char,               char, long int >( const char* data1, const char* data2, const long int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< int,                 int, long int >( const int* data1, const int* data2, const long int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long int,       long int, long int >( const long int* data1, const long int* data2, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< float,             float, long int >( const float* data1, const float* data2, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< double,           double, long int >( const double* data1, const double* data2, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Cuda >::setMemory< char,        int >( char* destination, const char& value, const int size );
-template bool ArrayOperations< Devices::Cuda >::setMemory< int,         int >( int* destination, const int& value, const int size );
-template bool ArrayOperations< Devices::Cuda >::setMemory< long int,    int >( long int* destination, const long int& value, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::setMemory< float,       int >( float* destination, const float& value, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::setMemory< double,      int >( double* destination, const double& value, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::setMemory< long double, int >( long double* destination, const long double& value, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda >::setMemory< char,        long int >( char* destination, const char& value, const long int size );
-template bool ArrayOperations< Devices::Cuda >::setMemory< int,         long int >( int* destination, const int& value, const long int size );
-template bool ArrayOperations< Devices::Cuda >::setMemory< long int,    long int >( long int* destination, const long int& value, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::setMemory< float,       long int >( float* destination, const float& value, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::setMemory< double,      long int >( double* destination, const double& value, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::setMemory< long double, long int >( long double* destination, const long double& value, const long int size );
-#endif
-#endif
-
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsCuda_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsCuda_impl.cu
deleted file mode 100644
index 73affc5cc5dbdf488be5f91379bbd5170d534771..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsCuda_impl.cu
+++ /dev/null
@@ -1,248 +0,0 @@
-/***************************************************************************
-                          ArrayOperationsCuda_impl.cu  -  description
-                             -------------------
-    begin                : Jul 16, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
-
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< char,        int >( char*& data, const int size );
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< int,         int >( int*& data, const int size );
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< long int,    int >( long int*& data, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< float,       int >( float*& data, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< double,      int >( double*& data, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< long double, int >( long double*& data, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< char,        long int >( char*& data, const long int size );
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< int,         long int >( int*& data, const long int size );
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< long int,    long int >( long int*& data, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< float,       long int >( float*& data, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< double,      long int >( double*& data, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< long double, long int >( long double*& data, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Cuda >::freeMemory< char        >( char* data );
-template bool ArrayOperations< Devices::Cuda >::freeMemory< int         >( int* data );
-template bool ArrayOperations< Devices::Cuda >::freeMemory< long int    >( long int* data );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::freeMemory< float       >( float* data );
-#endif
-template bool ArrayOperations< Devices::Cuda >::freeMemory< double      >( double* data );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::freeMemory< long double >( long double* data );
-#endif
-
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< char        >( char* data, const char& value );
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< int         >( int* data, const int& value );
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< long int    >( long int* data, const long int& value );
-#ifdef INSTANTIATE_FLOAT
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< float       >( float* data, const float& value );
-#endif
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< double      >( double* data, const double& value );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< long double >( long double* data, const long double& value );
-#endif
-
-template char        ArrayOperations< Devices::Cuda >::getMemoryElement< char        >( const char* data );
-template int         ArrayOperations< Devices::Cuda >::getMemoryElement< int         >( const int* data );
-template long int    ArrayOperations< Devices::Cuda >::getMemoryElement< long int    >( const long int* data );
-#ifdef INSTANTIATE_FLOAT
-template float       ArrayOperations< Devices::Cuda >::getMemoryElement< float       >( const float* data );
-#endif
-template double      ArrayOperations< Devices::Cuda >::getMemoryElement< double      >( const double* data );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double ArrayOperations< Devices::Cuda >::getMemoryElement< long double >( const long double* data );
-#endif
-
-template bool ArrayOperations< Devices::Cuda >::copyMemory< char,               char, int >( char* destination, const char* source, const int size );
-template bool ArrayOperations< Devices::Cuda >::copyMemory< int,                 int, int >( int* destination, const int* source, const int size );
-template bool ArrayOperations< Devices::Cuda >::copyMemory< long int,       long int, int >( long int* destination, const long int* source, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::copyMemory< float,             float, int >( float* destination, const float* source, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::copyMemory< double,           double, int >( double* destination, const double* source, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda >::copyMemory< char,               char, long int >( char* destination, const char* source, const long int size );
-template bool ArrayOperations< Devices::Cuda >::copyMemory< int,                 int, long int >( int* destination, const int* source, const long int size );
-template bool ArrayOperations< Devices::Cuda >::copyMemory< long int,       long int, long int >( long int* destination, const long int* source, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::copyMemory< float,             float, long int >( float* destination, const float* source, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::copyMemory< double,           double, long int >( double* destination, const double* source, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< char,               char, int >( char* destination, const char* source, const int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< int,                 int, int >( int* destination, const int* source, const int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long int,       long int, int >( long int* destination, const long int* source, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< float,             float, int >( float* destination, const float* source, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< double,           double, int >( double* destination, const double* source, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< char,               char, long int >( char* destination, const char* source, const long int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< int,                 int, long int >( int* destination, const int* source, const long int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long int,       long int, long int >( long int* destination, const long int* source, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< float,             float, long int >( float* destination, const float* source, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< double,           double, long int >( double* destination, const double* source, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< char,               char, int >( char* destination, const char* source, const int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< int,                 int, int >( int* destination, const int* source, const int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long int,       long int, int >( long int* destination, const long int* source, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< float,             float, int >( float* destination, const float* source, const int size );
-#endif
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< double,           double, int >( double* destination, const double* source, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< char,               char, long int >( char* destination, const char* source, const long int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< int,                 int, long int >( int* destination, const int* source, const long int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long int,       long int, long int >( long int* destination, const long int* source, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< float,             float, long int >( float* destination, const float* source, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< double,           double, long int >( double* destination, const double* source, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Cuda >::compareMemory< char,               char, int >( const char* data1, const char* data2, const int size );
-template bool ArrayOperations< Devices::Cuda >::compareMemory< int,                 int, int >( const int* data1, const int* data2, const int size );
-template bool ArrayOperations< Devices::Cuda >::compareMemory< long int,       long int, int >( const long int* data1, const long int* data2, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::compareMemory< float,             float, int >( const float* data1, const float* data2, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::compareMemory< double,           double, int >( const double* data1, const double* data2, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda >::compareMemory< char,               char, long int >( const char* data1, const char* data2, const long int size );
-template bool ArrayOperations< Devices::Cuda >::compareMemory< int,                 int, long int >( const int* data1, const int* data2, const long int size );
-template bool ArrayOperations< Devices::Cuda >::compareMemory< long int,       long int, long int >( const long int* data1, const long int* data2, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::compareMemory< float,             float, long int >( const float* data1, const float* data2, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::compareMemory< double,           double, long int >( const double* data1, const double* data2, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< char,               char, int >( const char* data1, const char* data2, const int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< int,                 int, int >( const int* data1, const int* data2, const int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long int,       long int, int >( const long int* data1, const long int* data2, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< float,             float, int >( const float* data1, const float* data2, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< double,           double, int >( const double* data1, const double* data2, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< char,               char, long int >( const char* data1, const char* data2, const long int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< int,                 int, long int >( const int* data1, const int* data2, const long int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long int,       long int, long int >( const long int* data1, const long int* data2, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< float,             float, long int >( const float* data1, const float* data2, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< double,           double, long int >( const double* data1, const double* data2, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< char,               char, int >( const char* data1, const char* data2, const int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< int,                 int, int >( const int* data1, const int* data2, const int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long int,       long int, int >( const long int* data1, const long int* data2, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< float,             float, int >( const float* data1, const float* data2, const int size );
-#endif
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< double,           double, int >( const double* data1, const double* data2, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< char,               char, long int >( const char* data1, const char* data2, const long int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< int,                 int, long int >( const int* data1, const int* data2, const long int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long int,       long int, long int >( const long int* data1, const long int* data2, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< float,             float, long int >( const float* data1, const float* data2, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< double,           double, long int >( const double* data1, const double* data2, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Cuda >::setMemory< char,        int >( char* destination, const char& value, const int size );
-template bool ArrayOperations< Devices::Cuda >::setMemory< int,         int >( int* destination, const int& value, const int size );
-template bool ArrayOperations< Devices::Cuda >::setMemory< long int,    int >( long int* destination, const long int& value, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::setMemory< float,       int >( float* destination, const float& value, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::setMemory< double,      int >( double* destination, const double& value, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::setMemory< long double, int >( long double* destination, const long double& value, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda >::setMemory< char,        long int >( char* destination, const char& value, const long int size );
-template bool ArrayOperations< Devices::Cuda >::setMemory< int,         long int >( int* destination, const int& value, const long int size );
-template bool ArrayOperations< Devices::Cuda >::setMemory< long int,    long int >( long int* destination, const long int& value, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::setMemory< float,       long int >( float* destination, const float& value, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::setMemory< double,      long int >( double* destination, const double& value, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::setMemory< long double, long int >( long double* destination, const long double& value, const long int size );
-#endif
-#endif
-
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsHost_impl.cpp b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsHost_impl.cpp
deleted file mode 100644
index 2c60d95d832545a92ad9ffb4aba7b5eaf285c4c5..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsHost_impl.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-/***************************************************************************
-                          ArrayOperationsHost_impl.cpp  -  description
-                             -------------------
-    begin                : Jul 16, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
-
-namespace TNL {
-namespace Containers {    
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-template bool ArrayOperations< Devices::Host >::allocateMemory< char,        int >( char*& data, const int size );
-template bool ArrayOperations< Devices::Host >::allocateMemory< int,         int >( int*& data, const int size );
-template bool ArrayOperations< Devices::Host >::allocateMemory< long int,    int >( long int*& data, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::allocateMemory< float,       int >( float*& data, const int size );
-#endif
-template bool ArrayOperations< Devices::Host >::allocateMemory< double,      int >( double*& data, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::allocateMemory< long double, int >( long double*& data, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host >::allocateMemory< char,        long int >( char*& data, const long int size );
-template bool ArrayOperations< Devices::Host >::allocateMemory< int,         long int >( int*& data, const long int size );
-template bool ArrayOperations< Devices::Host >::allocateMemory< long int,    long int >( long int*& data, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::allocateMemory< float,       long int >( float*& data, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host >::allocateMemory< double,      long int >( double*& data, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::allocateMemory< long double, long int >( long double*& data, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Host >::freeMemory< char        >( char* data );
-template bool ArrayOperations< Devices::Host >::freeMemory< int         >( int* data );
-template bool ArrayOperations< Devices::Host >::freeMemory< long int    >( long int* data );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::freeMemory< float       >( float* data );
-#endif
-template bool ArrayOperations< Devices::Host >::freeMemory< double      >( double* data );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::freeMemory< long double >( long double* data );
-#endif
-
-template void ArrayOperations< Devices::Host >::setMemoryElement< char        >( char* data, const char& value );
-template void ArrayOperations< Devices::Host >::setMemoryElement< int         >( int* data, const int& value );
-template void ArrayOperations< Devices::Host >::setMemoryElement< long int    >( long int* data, const long int& value );
-#ifdef INSTANTIATE_FLOAT
-template void ArrayOperations< Devices::Host >::setMemoryElement< float       >( float* data, const float& value );
-#endif
-template void ArrayOperations< Devices::Host >::setMemoryElement< double      >( double* data, const double& value );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template void ArrayOperations< Devices::Host >::setMemoryElement< long double >( long double* data, const long double& value );
-#endif
-
-template char        ArrayOperations< Devices::Host >::getMemoryElement< char        >( char* data );
-template int         ArrayOperations< Devices::Host >::getMemoryElement< int         >( int* data );
-template long int    ArrayOperations< Devices::Host >::getMemoryElement< long int    >( long int* data );
-#ifdef INSTANTIATE_FLOAT
-template float       ArrayOperations< Devices::Host >::getMemoryElement< float       >( float* data );
-#endif
-template double      ArrayOperations< Devices::Host >::getMemoryElement< double      >( double* data );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double ArrayOperations< Devices::Host >::getMemoryElement< long double >( long double* data );
-#endif
-
-template bool ArrayOperations< Devices::Host >::copyMemory< char,               char, int >( char* destination, const char* source, const int size );
-template bool ArrayOperations< Devices::Host >::copyMemory< int,                 int, int >( int* destination, const int* source, const int size );
-template bool ArrayOperations< Devices::Host >::copyMemory< long int,       long int, int >( long int* destination, const long int* source, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::copyMemory< float,             float, int >( float* destination, const float* source, const int size );
-#endif
-template bool ArrayOperations< Devices::Host >::copyMemory< double,           double, int >( double* destination, const double* source, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host >::copyMemory< char,               char, long int >( char* destination, const char* source, const long int size );
-template bool ArrayOperations< Devices::Host >::copyMemory< int,                 int, long int >( int* destination, const int* source, const long int size );
-template bool ArrayOperations< Devices::Host >::copyMemory< long int,       long int, long int >( long int* destination, const long int* source, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::copyMemory< float,             float, long int >( float* destination, const float* source, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host >::copyMemory< double,           double, long int >( double* destination, const double* source, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Host >::compareMemory< char,               char, int >( const char* data1, const char* data2, const int size );
-template bool ArrayOperations< Devices::Host >::compareMemory< int,                 int, int >( const int* data1, const int* data2, const int size );
-template bool ArrayOperations< Devices::Host >::compareMemory< long int,       long int, int >( const long int* data1, const long int* data2, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::compareMemory< float,             float, int >( const float* data1, const float* data2, const int size );
-#endif
-template bool ArrayOperations< Devices::Host >::compareMemory< double,           double, int >( const double* data1, const double* data2, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host >::compareMemory< char,               char, long int >( const char* data1, const char* data2, const long int size );
-template bool ArrayOperations< Devices::Host >::compareMemory< int,                 int, long int >( const int* data1, const int* data2, const long int size );
-template bool ArrayOperations< Devices::Host >::compareMemory< long int,       long int, long int >( const long int* data1, const long int* data2, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::compareMemory< float,             float, long int >( const float* data1, const float* data2, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host >::compareMemory< double,           double, long int >( const double* data1, const double* data2, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Host >::setMemory< char,        int >( char* destination, const char& value, const int size );
-template bool ArrayOperations< Devices::Host >::setMemory< int,         int >( int* destination, const int& value, const int size );
-template bool ArrayOperations< Devices::Host >::setMemory< long int,    int >( long int* destination, const long int& value, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::setMemory< float,       int >( float* destination, const float& value, const int size );
-#endif
-template bool ArrayOperations< Devices::Host >::setMemory< double,      int >( double* destination, const double& value, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::setMemory< long double, int >( long double* destination, const long double& value, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host >::setMemory< char,        long int >( char* destination, const char& value, const long int size );
-template bool ArrayOperations< Devices::Host >::setMemory< int,         long int >( int* destination, const int& value, const long int size );
-template bool ArrayOperations< Devices::Host >::setMemory< long int,    long int >( long int* destination, const long int& value, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::setMemory< float,       long int >( float* destination, const float& value, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host >::setMemory< double,      long int >( double* destination, const double& value, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::setMemory< long double, long int >( long double* destination, const long double& value, const long int size );
-#endif
-#endif
-
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsHost_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsHost_impl.cu
deleted file mode 100644
index 65fdbae2d7731d83c4d2af177e9a6ce070ec5d60..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsHost_impl.cu
+++ /dev/null
@@ -1,152 +0,0 @@
-/***************************************************************************
-                          ArrayOperationsHost_impl.cu  -  description
-                             -------------------
-    begin                : Jul 16, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
-
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-template bool ArrayOperations< Devices::Host >::allocateMemory< char,        int >( char*& data, const int size );
-template bool ArrayOperations< Devices::Host >::allocateMemory< int,         int >( int*& data, const int size );
-template bool ArrayOperations< Devices::Host >::allocateMemory< long int,    int >( long int*& data, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::allocateMemory< float,       int >( float*& data, const int size );
-#endif
-template bool ArrayOperations< Devices::Host >::allocateMemory< double,      int >( double*& data, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::allocateMemory< long double, int >( long double*& data, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host >::allocateMemory< char,        long int >( char*& data, const long int size );
-template bool ArrayOperations< Devices::Host >::allocateMemory< int,         long int >( int*& data, const long int size );
-template bool ArrayOperations< Devices::Host >::allocateMemory< long int,    long int >( long int*& data, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::allocateMemory< float,       long int >( float*& data, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host >::allocateMemory< double,      long int >( double*& data, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::allocateMemory< long double, long int >( long double*& data, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Host >::freeMemory< char        >( char* data );
-template bool ArrayOperations< Devices::Host >::freeMemory< int         >( int* data );
-template bool ArrayOperations< Devices::Host >::freeMemory< long int    >( long int* data );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::freeMemory< float       >( float* data );
-#endif
-template bool ArrayOperations< Devices::Host >::freeMemory< double      >( double* data );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::freeMemory< long double >( long double* data );
-#endif
-
-template void ArrayOperations< Devices::Host >::setMemoryElement< char        >( char* data, const char& value );
-template void ArrayOperations< Devices::Host >::setMemoryElement< int         >( int* data, const int& value );
-template void ArrayOperations< Devices::Host >::setMemoryElement< long int    >( long int* data, const long int& value );
-#ifdef INSTANTIATE_FLOAT
-template void ArrayOperations< Devices::Host >::setMemoryElement< float       >( float* data, const float& value );
-#endif
-template void ArrayOperations< Devices::Host >::setMemoryElement< double      >( double* data, const double& value );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template void ArrayOperations< Devices::Host >::setMemoryElement< long double >( long double* data, const long double& value );
-#endif
-
-template char        ArrayOperations< Devices::Host >::getMemoryElement< char        >( char* data );
-template int         ArrayOperations< Devices::Host >::getMemoryElement< int         >( int* data );
-template long int    ArrayOperations< Devices::Host >::getMemoryElement< long int    >( long int* data );
-#ifdef INSTANTIATE_FLOAT
-template float       ArrayOperations< Devices::Host >::getMemoryElement< float       >( float* data );
-#endif
-template double      ArrayOperations< Devices::Host >::getMemoryElement< double      >( double* data );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double ArrayOperations< Devices::Host >::getMemoryElement< long double >( long double* data );
-#endif
-
-template bool ArrayOperations< Devices::Host >::copyMemory< char,               char, int >( char* destination, const char* source, const int size );
-template bool ArrayOperations< Devices::Host >::copyMemory< int,                 int, int >( int* destination, const int* source, const int size );
-template bool ArrayOperations< Devices::Host >::copyMemory< long int,       long int, int >( long int* destination, const long int* source, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::copyMemory< float,             float, int >( float* destination, const float* source, const int size );
-#endif
-template bool ArrayOperations< Devices::Host >::copyMemory< double,           double, int >( double* destination, const double* source, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host >::copyMemory< char,               char, long int >( char* destination, const char* source, const long int size );
-template bool ArrayOperations< Devices::Host >::copyMemory< int,                 int, long int >( int* destination, const int* source, const long int size );
-template bool ArrayOperations< Devices::Host >::copyMemory< long int,       long int, long int >( long int* destination, const long int* source, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::copyMemory< float,             float, long int >( float* destination, const float* source, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host >::copyMemory< double,           double, long int >( double* destination, const double* source, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Host >::compareMemory< char,               char, int >( const char* data1, const char* data2, const int size );
-template bool ArrayOperations< Devices::Host >::compareMemory< int,                 int, int >( const int* data1, const int* data2, const int size );
-template bool ArrayOperations< Devices::Host >::compareMemory< long int,       long int, int >( const long int* data1, const long int* data2, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::compareMemory< float,             float, int >( const float* data1, const float* data2, const int size );
-#endif
-template bool ArrayOperations< Devices::Host >::compareMemory< double,           double, int >( const double* data1, const double* data2, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host >::compareMemory< char,               char, long int >( const char* data1, const char* data2, const long int size );
-template bool ArrayOperations< Devices::Host >::compareMemory< int,                 int, long int >( const int* data1, const int* data2, const long int size );
-template bool ArrayOperations< Devices::Host >::compareMemory< long int,       long int, long int >( const long int* data1, const long int* data2, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::compareMemory< float,             float, long int >( const float* data1, const float* data2, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host >::compareMemory< double,           double, long int >( const double* data1, const double* data2, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Host >::setMemory< char,        int >( char* destination, const char& value, const int size );
-template bool ArrayOperations< Devices::Host >::setMemory< int,         int >( int* destination, const int& value, const int size );
-template bool ArrayOperations< Devices::Host >::setMemory< long int,    int >( long int* destination, const long int& value, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::setMemory< float,       int >( float* destination, const float& value, const int size );
-#endif
-template bool ArrayOperations< Devices::Host >::setMemory< double,      int >( double* destination, const double& value, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::setMemory< long double, int >( long double* destination, const long double& value, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host >::setMemory< char,        long int >( char* destination, const char& value, const long int size );
-template bool ArrayOperations< Devices::Host >::setMemory< int,         long int >( int* destination, const int& value, const long int size );
-template bool ArrayOperations< Devices::Host >::setMemory< long int,    long int >( long int* destination, const long int& value, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::setMemory< float,       long int >( float* destination, const float& value, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host >::setMemory< double,      long int >( double* destination, const double& value, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::setMemory< long double, long int >( long double* destination, const long double& value, const long int size );
-#endif
-#endif
-
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/CMakeLists.txt b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/CMakeLists.txt
deleted file mode 100644
index 49409b2ab595a7c47e689f87b469785e32c80fed..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/CMakeLists.txt
+++ /dev/null
@@ -1,48 +0,0 @@
-if( ${WITH_TEMPLATES_INSTANTIATION} )
-
-   SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation )
-   set( common_SOURCES
-        ${CURRENT_DIR}/VectorOperationsHost_impl.cpp
-   )
-   IF( BUILD_CUDA )
-      set( tnl_core_cuda_CUDA__SOURCES
-           ${common_SOURCES}
-           ${CURRENT_DIR}/ArrayOperationsHost_impl.cu
-           ${CURRENT_DIR}/ArrayOperationsCuda_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-sum_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-min_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-max_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-abs-sum_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-abs-min_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-abs-max_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-and_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-or_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-l2-norm_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-lp-norm_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-equalities_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-inequalities_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-scalar-product_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-diff-sum_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-diff-min_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-diff-max_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-diff-abs-sum_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-diff-abs-min_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-diff-abs-max_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-diff-l2-norm_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-diff-lp-norm_impl.cu
-           ${CURRENT_DIR}/cuda-prefix-sum_impl.cu
-           ${CURRENT_DIR}/VectorOperationsCuda_impl.cu
-           PARENT_SCOPE )
-   ELSE()
-      set( common_SOURCES
-           ${common_SOURCES}
-           ${CURRENT_DIR}/ArrayOperationsHost_impl.cpp
-           ${CURRENT_DIR}/ArrayOperationsCuda_impl.cpp
-      )
-   ENDIF()
-
-   set( tnl_core_cuda_SOURCES
-        ${common_SOURCES}
-        ${CURRENT_DIR}/cuda-reduction_impl.cpp
-        PARENT_SCOPE )
-endif()
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/VectorOperationsCuda_impl.cpp b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/VectorOperationsCuda_impl.cpp
deleted file mode 100644
index 7263405cfdea5e465604044fe33060341029908f..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/VectorOperationsCuda_impl.cpp
+++ /dev/null
@@ -1,325 +0,0 @@
-/***************************************************************************
-                          VectorOperationsCuda_impl.cpp  -  description
-                             -------------------
-    begin                : Dec 10, 2015
-    copyright            : (C) 2015 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/VectorOperations.h>
-
-namespace TNL {
-namespace Containers {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Max
- */
-template int         VectorOperations< Devices::Cuda >::getVectorMax( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorMax( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorMax( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorMax( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorMax( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorMax( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Min
- */
-template int         VectorOperations< Devices::Cuda >::getVectorMin( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorMin( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorMin( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorMin( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorMin( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorMin( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Abs max
- */
-template int         VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-
-/****
- * Abs min
- */
-template int         VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * L2 norm
- */
-template int         VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * L1 norm
- */
-template int         VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Lp norm
- */
-template int         VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< int, Devices::Cuda, int >& v, const int& p );
-template long int    VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long int, Devices::Cuda, int >& v, const long int& p );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< float, Devices::Cuda, int >& v, const float& p );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< double, Devices::Cuda, int >& v, const double& p );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long double, Devices::Cuda, int >& v, const long double& p );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< int, Devices::Cuda, long int >& v, const int& p );
-template long int    VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long int, Devices::Cuda, long int >& v, const long int& p );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< float, Devices::Cuda, long int >& v, const float& p );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< double, Devices::Cuda, long int >& v, const double& p );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long double, Devices::Cuda, long int >& v, const long double& p );
-#endif
-#endif
-
-
-
-/****
- * Sum
- */
-template int         VectorOperations< Devices::Cuda >::getVectorSum( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorSum( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorSum( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorSum( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorSum( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorSum( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Difference max
- */
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference min
- */
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference abs max
- */
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
-
-
-/****
- * Difference abs min
- */
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
- 
-#endif
- 
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/VectorOperationsCuda_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/VectorOperationsCuda_impl.cu
deleted file mode 100644
index 99d715a7516271f2dc882a9def91ccb2f9493b5c..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/VectorOperationsCuda_impl.cu
+++ /dev/null
@@ -1,325 +0,0 @@
-/***************************************************************************
-                          VectorOperationsCuda_impl.cu  -  description
-                             -------------------
-    begin                : Jul 20, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/VectorOperations.h>
-
-namespace TNL {
-namespace Vectors {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Max
- */
-template int         VectorOperations< Devices::Cuda >::getVectorMax( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorMax( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorMax( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorMax( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorMax( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorMax( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Min
- */
-template int         VectorOperations< Devices::Cuda >::getVectorMin( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorMin( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorMin( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorMin( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorMin( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorMin( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Abs max
- */
-template int         VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-
-/****
- * Abs min
- */
-template int         VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * L2 norm
- */
-template int         VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * L1 norm
- */
-template int         VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Lp norm
- */
-template int         VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< int, Devices::Cuda, int >& v, const int& p );
-template long int    VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long int, Devices::Cuda, int >& v, const long int& p );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< float, Devices::Cuda, int >& v, const float& p );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< double, Devices::Cuda, int >& v, const double& p );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long double, Devices::Cuda, int >& v, const long double& p );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< int, Devices::Cuda, long int >& v, const int& p );
-template long int    VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long int, Devices::Cuda, long int >& v, const long int& p );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< float, Devices::Cuda, long int >& v, const float& p );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< double, Devices::Cuda, long int >& v, const double& p );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long double, Devices::Cuda, long int >& v, const long double& p );
-#endif
-#endif
-
-
-
-/****
- * Sum
- */
-template int         VectorOperations< Devices::Cuda >::getVectorSum( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorSum( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorSum( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorSum( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorSum( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorSum( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Difference max
- */
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference min
- */
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference abs max
- */
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
-
-
-/****
- * Difference abs min
- */
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
- 
-#endif
- 
-} // namespace Vectors
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/VectorOperationsHost_impl.cpp b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/VectorOperationsHost_impl.cpp
deleted file mode 100644
index 9803167c6d5cc257fb215840af3f39d1a32e3d23..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/VectorOperationsHost_impl.cpp
+++ /dev/null
@@ -1,325 +0,0 @@
-/***************************************************************************
-                          VectorOperationsHost_impl.cpp  -  description
-                             -------------------
-    begin                : Jul 20, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/VectorOperations.h>
-
-namespace TNL {
-namespace Containers {    
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Max
- */
-template int         VectorOperations< Devices::Host >::getVectorMax( const Vector< int, Devices::Host, int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorMax( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorMax( const Vector< float, Devices::Host, int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorMax( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorMax( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorMax( const Vector< int, Devices::Host, long int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorMax( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorMax( const Vector< float, Devices::Host, long int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorMax( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorMax( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-/****
- * Min
- */
-template int         VectorOperations< Devices::Host >::getVectorMin( const Vector< int, Devices::Host, int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorMin( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorMin( const Vector< float, Devices::Host, int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorMin( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorMin( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorMin( const Vector< int, Devices::Host, long int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorMin( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorMin( const Vector< float, Devices::Host, long int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorMin( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorMin( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-/****
- * Abs max
- */
-template int         VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< int, Devices::Host, int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< float, Devices::Host, int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< int, Devices::Host, long int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< float, Devices::Host, long int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-/****
- * Abs min
- */
-template int         VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< int, Devices::Host, int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< float, Devices::Host, int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< int, Devices::Host, long int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< float, Devices::Host, long int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-/****
- * L1 norm
- */
-template int         VectorOperations< Devices::Host >::getVectorL1Norm( const Vector< int, Devices::Host, int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorL1Norm( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorL1Norm( const Vector< float, Devices::Host, int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorL1Norm( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorL1Norm( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorL1Norm( const Vector< int, Devices::Host, long int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorL1Norm( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorL1Norm( const Vector< float, Devices::Host, long int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorL1Norm( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorL1Norm( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-/****
- * L2 norm
- */
-template int         VectorOperations< Devices::Host >::getVectorL2Norm( const Vector< int, Devices::Host, int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorL2Norm( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorL2Norm( const Vector< float, Devices::Host, int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorL2Norm( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorL2Norm( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorL2Norm( const Vector< int, Devices::Host, long int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorL2Norm( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorL2Norm( const Vector< float, Devices::Host, long int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorL2Norm( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorL2Norm( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-
-/****
- * Lp norm
- */
-template int         VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< int, Devices::Host, int >& v, const int& p );
-template long int    VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< long int, Devices::Host, int >& v, const long int& p );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< float, Devices::Host, int >& v, const float& p );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< double, Devices::Host, int >& v, const double& p );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< long double, Devices::Host, int >& v, const long double& p );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< int, Devices::Host, long int >& v, const int& p );
-template long int    VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< long int, Devices::Host, long int >& v, const long int& p );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< float, Devices::Host, long int >& v, const float& p );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< double, Devices::Host, long int >& v, const double& p );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< long double, Devices::Host, long int >& v, const long double& p );
-#endif
-#endif
-
-
-
-/****
- * Sum
- */
-template int         VectorOperations< Devices::Host >::getVectorSum( const Vector< int, Devices::Host, int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorSum( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorSum( const Vector< float, Devices::Host, int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorSum( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorSum( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorSum( const Vector< int, Devices::Host, long int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorSum( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorSum( const Vector< float, Devices::Host, long int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorSum( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorSum( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-/****
- * Difference max
- */
-template int         VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< int, Devices::Host, int >& v1, const Vector< int, Devices::Host, int >& v2 );
-template long int    VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< long int, Devices::Host, int >& v1, const Vector< long int, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< float, Devices::Host, int >& v1,  const Vector< float, Devices::Host, int >& v2);
-#endif
-template double      VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< double, Devices::Host, int >& v1, const Vector< double, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< long double, Devices::Host, int >& v1, const Vector< long double, Devices::Host, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< int, Devices::Host, long int >& v1, const Vector< int, Devices::Host, long int >& v2 );
-template long int    VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< long int, Devices::Host, long int >& v1, const Vector< long int, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< float, Devices::Host, long int >& v1, const Vector< float, Devices::Host, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< double, Devices::Host, long int >& v1, const Vector< double, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< long double, Devices::Host, long int >& v1, const Vector< long double, Devices::Host, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference min
- */
-template int         VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< int, Devices::Host, int >& v1, const Vector< int, Devices::Host, int >& v2 );
-template long int    VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< long int, Devices::Host, int >& v1, const Vector< long int, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< float, Devices::Host, int >& v1,  const Vector< float, Devices::Host, int >& v2);
-#endif
-template double      VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< double, Devices::Host, int >& v1, const Vector< double, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< long double, Devices::Host, int >& v1, const Vector< long double, Devices::Host, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< int, Devices::Host, long int >& v1, const Vector< int, Devices::Host, long int >& v2 );
-template long int    VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< long int, Devices::Host, long int >& v1, const Vector< long int, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< float, Devices::Host, long int >& v1, const Vector< float, Devices::Host, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< double, Devices::Host, long int >& v1, const Vector< double, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< long double, Devices::Host, long int >& v1, const Vector< long double, Devices::Host, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference abs max
- */
-template int         VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< int, Devices::Host, int >& v1, const Vector< int, Devices::Host, int >& v2 );
-template long int    VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< long int, Devices::Host, int >& v1, const Vector< long int, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< float, Devices::Host, int >& v1,  const Vector< float, Devices::Host, int >& v2);
-#endif
-template double      VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< double, Devices::Host, int >& v1, const Vector< double, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< long double, Devices::Host, int >& v1, const Vector< long double, Devices::Host, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< int, Devices::Host, long int >& v1, const Vector< int, Devices::Host, long int >& v2 );
-template long int    VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< long int, Devices::Host, long int >& v1, const Vector< long int, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< float, Devices::Host, long int >& v1, const Vector< float, Devices::Host, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< double, Devices::Host, long int >& v1, const Vector< double, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< long double, Devices::Host, long int >& v1, const Vector< long double, Devices::Host, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference abs min
- */
-template int         VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< int, Devices::Host, int >& v1, const Vector< int, Devices::Host, int >& v2 );
-template long int    VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< long int, Devices::Host, int >& v1, const Vector< long int, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< float, Devices::Host, int >& v1,  const Vector< float, Devices::Host, int >& v2);
-#endif
-template double      VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< double, Devices::Host, int >& v1, const Vector< double, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< long double, Devices::Host, int >& v1, const Vector< long double, Devices::Host, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< int, Devices::Host, long int >& v1, const Vector< int, Devices::Host, long int >& v2 );
-template long int    VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< long int, Devices::Host, long int >& v1, const Vector< long int, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< float, Devices::Host, long int >& v1, const Vector< float, Devices::Host, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< double, Devices::Host, long int >& v1, const Vector< double, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< long double, Devices::Host, long int >& v1, const Vector< long double, Devices::Host, long int >& v2 );
-#endif
-#endif
-
-
-#endif
-
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-prefix-sum_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-prefix-sum_impl.cu
deleted file mode 100644
index 466654f2d0dd169119b4da2305999548123fca92..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-prefix-sum_impl.cu
+++ /dev/null
@@ -1,90 +0,0 @@
-/***************************************************************************
-                          cuda-prefix-sum_impl.cu  -  description
-                             -------------------
-    begin                : Jan 18, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/Algorithms/cuda-prefix-sum.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {   
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-template bool cudaPrefixSum( const int size,
-                             const int blockSize,
-                             const int *deviceInput,
-                             int* deviceOutput,
-                             tnlParallelReductionSum< int, int >& operation,
-                             const enumPrefixSumType prefixSumType );
-
-
-#ifdef INSTANTIATE_FLOAT
-template bool cudaPrefixSum( const int size,
-                             const int blockSize,
-                             const float *deviceInput,
-                             float* deviceOutput,
-                             tnlParallelReductionSum< float, int >& operation,
-                             const enumPrefixSumType prefixSumType );
-#endif
-
-template bool cudaPrefixSum( const int size,
-                             const int blockSize,
-                             const double *deviceInput,
-                             double* deviceOutput,
-                             tnlParallelReductionSum< double, int >& operation,
-                             const enumPrefixSumType prefixSumType );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool cudaPrefixSum( const int size,
-                             const int blockSize,
-                             const long double *deviceInput,
-                             long double* deviceOutput,
-                             tnlParallelReductionSum< long double, int >& operation,
-                             const enumPrefixSumType prefixSumType );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool cudaPrefixSum( const long int size,
-                             const long int blockSize,
-                             const int *deviceInput,
-                             int* deviceOutput,
-                             tnlParallelReductionSum< int, long int >& operation,
-                             const enumPrefixSumType prefixSumType );
-
-
-#ifdef INSTANTIATE_FLOAT
-template bool cudaPrefixSum( const long int size,
-                             const long int blockSize,
-                             const float *deviceInput,
-                             float* deviceOutput,
-                             tnlParallelReductionSum< float, long int >& operation,
-                             const enumPrefixSumType prefixSumType );
-#endif
-
-template bool cudaPrefixSum( const long int size,
-                             const long int blockSize,
-                             const double *deviceInput,
-                             double* deviceOutput,
-                             tnlParallelReductionSum< double, long int >& operation,
-                             const enumPrefixSumType prefixSumType );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool cudaPrefixSum( const long int size,
-                             const long int blockSize,
-                             const long double *deviceInput,
-                             long double* deviceOutput,
-                             tnlParallelReductionSum< long double, long int >& operation,
-                             const enumPrefixSumType prefixSumType );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-abs-max_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-abs-max_impl.cu
deleted file mode 100644
index f4569b196c1b0f56af4d10690d88c4ab320bb780..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-abs-max_impl.cu
+++ /dev/null
@@ -1,104 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-abs-max_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {   
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Abs max
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, int > >
-                                   ( tnlParallelReductionAbsMax< char, int >& operation,
-                                     const typename tnlParallelReductionAbsMax< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< int, int > >
-                                   ( tnlParallelReductionAbsMax< int, int >& operation,
-                                     const typename tnlParallelReductionAbsMax< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< float, int > >
-                                   ( tnlParallelReductionAbsMax< float, int >& operation,
-                                     const typename tnlParallelReductionAbsMax< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< double, int > >
-                                   ( tnlParallelReductionAbsMax< double, int>& operation,
-                                     const typename tnlParallelReductionAbsMax< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, int > >
-                                   ( tnlParallelReductionAbsMax< long double, int>& operation,
-                                     const typename tnlParallelReductionAbsMax< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< long double, int> :: ResultType& result );
-#endif
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, long int > >
-                                   ( tnlParallelReductionAbsMax< char, long int >& operation,
-                                     const typename tnlParallelReductionAbsMax< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< char, long int > :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< int, long int > >
-                                   ( tnlParallelReductionAbsMax< int, long int >& operation,
-                                     const typename tnlParallelReductionAbsMax< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< float, long int > >
-                                   ( tnlParallelReductionAbsMax< float, long int >& operation,
-                                     const typename tnlParallelReductionAbsMax< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< double, long int > >
-                                   ( tnlParallelReductionAbsMax< double, long int>& operation,
-                                     const typename tnlParallelReductionAbsMax< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, long int > >
-                                   ( tnlParallelReductionAbsMax< long double, long int>& operation,
-                                     const typename tnlParallelReductionAbsMax< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-abs-min_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-abs-min_impl.cu
deleted file mode 100644
index 6206cba87118ad2b347c516ca5896f1eb7a0dcb4..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-abs-min_impl.cu
+++ /dev/null
@@ -1,103 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-abs-min_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Abs min
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< char, int > >
-                                   ( tnlParallelReductionAbsMin< char, int >& operation,
-                                     const typename tnlParallelReductionAbsMin< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< int, int > >
-                                   ( tnlParallelReductionAbsMin< int, int >& operation,
-                                     const typename tnlParallelReductionAbsMin< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< float, int > >
-                                   ( tnlParallelReductionAbsMin< float, int >& operation,
-                                     const typename tnlParallelReductionAbsMin< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< double, int > >
-                                   ( tnlParallelReductionAbsMin< double, int>& operation,
-                                     const typename tnlParallelReductionAbsMin< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< long double, int > >
-                                   ( tnlParallelReductionAbsMin< long double, int>& operation,
-                                     const typename tnlParallelReductionAbsMin< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< long double, int> :: ResultType& result );
-#endif
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< char, long int > >
-                                   ( tnlParallelReductionAbsMin< char, long int >& operation,
-                                     const typename tnlParallelReductionAbsMin< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< char, long int > :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< int, long int > >
-                                   ( tnlParallelReductionAbsMin< int, long int >& operation,
-                                     const typename tnlParallelReductionAbsMin< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< float, long int > >
-                                   ( tnlParallelReductionAbsMin< float, long int >& operation,
-                                     const typename tnlParallelReductionAbsMin< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< double, long int > >
-                                   ( tnlParallelReductionAbsMin< double, long int>& operation,
-                                     const typename tnlParallelReductionAbsMin< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< long double, long int > >
-                                   ( tnlParallelReductionAbsMin< long double, long int>& operation,
-                                     const typename tnlParallelReductionAbsMin< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-abs-sum_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-abs-sum_impl.cu
deleted file mode 100644
index 15819cb4b2e111a6304e1e9c3c2a64d6a914c369..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-abs-sum_impl.cu
+++ /dev/null
@@ -1,103 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-abs-sum_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
-
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Abs sum
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< char, int > >
-                                   ( tnlParallelReductionAbsSum< char, int >& operation,
-                                     const typename tnlParallelReductionAbsSum< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< int, int > >
-                                   ( tnlParallelReductionAbsSum< int, int >& operation,
-                                     const typename tnlParallelReductionAbsSum< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< float, int > >
-                                   ( tnlParallelReductionAbsSum< float, int >& operation,
-                                     const typename tnlParallelReductionAbsSum< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< double, int > >
-                                   ( tnlParallelReductionAbsSum< double, int>& operation,
-                                     const typename tnlParallelReductionAbsSum< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< long double, int > >
-                                   ( tnlParallelReductionAbsSum< long double, int>& operation,
-                                     const typename tnlParallelReductionAbsSum< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< long double, int> :: ResultType& result );
-#endif
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< char, long int > >
-                                   ( tnlParallelReductionAbsSum< char, long int >& operation,
-                                     const typename tnlParallelReductionAbsSum< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< char, long int > :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< int, long int > >
-                                   ( tnlParallelReductionAbsSum< int, long int >& operation,
-                                     const typename tnlParallelReductionAbsSum< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< float, long int > >
-                                   ( tnlParallelReductionAbsSum< float, long int >& operation,
-                                     const typename tnlParallelReductionAbsSum< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< double, long int > >
-                                   ( tnlParallelReductionAbsSum< double, long int>& operation,
-                                     const typename tnlParallelReductionAbsSum< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< long double, long int > >
-                                   ( tnlParallelReductionAbsSum< long double, long int>& operation,
-                                     const typename tnlParallelReductionAbsSum< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-and_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-and_impl.cu
deleted file mode 100644
index edb30509c62de2803b8bba24d24f3b973aed4a33..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-and_impl.cu
+++ /dev/null
@@ -1,102 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-and_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Logical AND
- */
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< char, int > >
-                                   ( tnlParallelReductionLogicalAnd< char, int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< int, int > >
-                                   ( tnlParallelReductionLogicalAnd< int, int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< float, int > >
-                                   ( tnlParallelReductionLogicalAnd< float, int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< double, int > >
-                                   ( tnlParallelReductionLogicalAnd< double, int>& operation,
-                                     const typename tnlParallelReductionLogicalAnd< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< long double, int > >
-                                   ( tnlParallelReductionLogicalAnd< long double, int>& operation,
-                                     const typename tnlParallelReductionLogicalAnd< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< char, long int > >
-                                   ( tnlParallelReductionLogicalAnd< char, long int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< int, long int > >
-                                   ( tnlParallelReductionLogicalAnd< int, long int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< float, long int > >
-                                   ( tnlParallelReductionLogicalAnd< float, long int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< double, long int > >
-                                   ( tnlParallelReductionLogicalAnd< double, long int>& operation,
-                                     const typename tnlParallelReductionLogicalAnd< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< long double, long int > >
-                                   ( tnlParallelReductionLogicalAnd< long double, long int>& operation,
-                                     const typename tnlParallelReductionLogicalAnd< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-abs-max_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-abs-max_impl.cu
deleted file mode 100644
index d402b1b490660b58df4c76227867944190779559..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-abs-max_impl.cu
+++ /dev/null
@@ -1,103 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-diff-abs-max_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {   
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Diff abs max
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< char, int > >
-                                   ( tnlParallelReductionDiffAbsMax< char, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< int, int > >
-                                   ( tnlParallelReductionDiffAbsMax< int, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< float, int > >
-                                   ( tnlParallelReductionDiffAbsMax< float, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< double, int > >
-                                   ( tnlParallelReductionDiffAbsMax< double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< long double, int > >
-                                   ( tnlParallelReductionDiffAbsMax< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< char, long int > >
-                                   ( tnlParallelReductionDiffAbsMax< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< int, long int > >
-                                   ( tnlParallelReductionDiffAbsMax< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< float, long int > >
-                                   ( tnlParallelReductionDiffAbsMax< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< double, long int > >
-                                   ( tnlParallelReductionDiffAbsMax< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< long double, long int > >
-                                   ( tnlParallelReductionDiffAbsMax< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-abs-min_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-abs-min_impl.cu
deleted file mode 100644
index f954631a6677013319d9e250fe3a6892cf06abcc..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-abs-min_impl.cu
+++ /dev/null
@@ -1,104 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-diff-abs-min_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-   
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-
-/****
- * Diff abs min
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< char, int > >
-                                   ( tnlParallelReductionDiffAbsMin< char, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< int, int > >
-                                   ( tnlParallelReductionDiffAbsMin< int, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< float, int > >
-                                   ( tnlParallelReductionDiffAbsMin< float, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< double, int > >
-                                   ( tnlParallelReductionDiffAbsMin< double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< long double, int > >
-                                   ( tnlParallelReductionDiffAbsMin< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< char, long int > >
-                                   ( tnlParallelReductionDiffAbsMin< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< int, long int > >
-                                   ( tnlParallelReductionDiffAbsMin< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< float, long int > >
-                                   ( tnlParallelReductionDiffAbsMin< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< double, long int > >
-                                   ( tnlParallelReductionDiffAbsMin< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< long double, long int > >
-                                   ( tnlParallelReductionDiffAbsMin< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-abs-sum_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-abs-sum_impl.cu
deleted file mode 100644
index 3e87fd7c8ec204376bea0db88ffa85282d792390..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-abs-sum_impl.cu
+++ /dev/null
@@ -1,103 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-diff-abs-sum_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {   
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Diff abs sum
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< char, int > >
-                                   ( tnlParallelReductionDiffAbsSum< char, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< int, int > >
-                                   ( tnlParallelReductionDiffAbsSum< int, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< float, int > >
-                                   ( tnlParallelReductionDiffAbsSum< float, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< double, int > >
-                                   ( tnlParallelReductionDiffAbsSum< double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< long double, int > >
-                                   ( tnlParallelReductionDiffAbsSum< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< char, long int > >
-                                   ( tnlParallelReductionDiffAbsSum< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< int, long int > >
-                                   ( tnlParallelReductionDiffAbsSum< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< float, long int > >
-                                   ( tnlParallelReductionDiffAbsSum< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< double, long int > >
-                                   ( tnlParallelReductionDiffAbsSum< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< long double, long int > >
-                                   ( tnlParallelReductionDiffAbsSum< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-l2-norm_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-l2-norm_impl.cu
deleted file mode 100644
index c0f23b3102e45b51c754b771573129926efde8e8..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-l2-norm_impl.cu
+++ /dev/null
@@ -1,88 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-diff-lp-norm_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {   
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Diff L2 Norm
- */
-template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< float, int > >
-                                   ( tnlParallelReductionDiffL2Norm< float, int >& operation,
-                                     const typename tnlParallelReductionDiffL2Norm< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffL2Norm< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffL2Norm< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffL2Norm< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< double, int > >
-                                   ( tnlParallelReductionDiffL2Norm< double, int>& operation,
-                                     const typename tnlParallelReductionDiffL2Norm< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffL2Norm< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffL2Norm< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffL2Norm< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< long double, int > >
-                                   ( tnlParallelReductionDiffL2Norm< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffL2Norm< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffL2Norm< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffL2Norm< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffL2Norm< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< char, long int > >
-                                   ( tnlParallelReductionDiffL2Norm< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffL2Norm< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffL2Norm< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffL2Norm< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffL2Norm< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< int, long int > >
-                                   ( tnlParallelReductionDiffL2Norm< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffL2Norm< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffL2Norm< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffL2Norm< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffL2Norm< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< float, long int > >
-                                   ( tnlParallelReductionDiffL2Norm< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffL2Norm< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffL2Norm< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffL2Norm< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffL2Norm< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< double, long int > >
-                                   ( tnlParallelReductionDiffL2Norm< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffL2Norm< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffL2Norm< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffL2Norm< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffL2Norm< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< long double, long int > >
-                                   ( tnlParallelReductionDiffL2Norm< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffL2Norm< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffL2Norm< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffL2Norm< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffL2Norm< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-lp-norm_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-lp-norm_impl.cu
deleted file mode 100644
index a0d4a00262633dafc8b023e927647fd18fb760dd..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-lp-norm_impl.cu
+++ /dev/null
@@ -1,88 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-diff-lp-norm_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Diff Lp Norm
- */
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< float, int > >
-                                   ( tnlParallelReductionDiffLpNorm< float, int >& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< double, int > >
-                                   ( tnlParallelReductionDiffLpNorm< double, int>& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< long double, int > >
-                                   ( tnlParallelReductionDiffLpNorm< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< char, long int > >
-                                   ( tnlParallelReductionDiffLpNorm< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< int, long int > >
-                                   ( tnlParallelReductionDiffLpNorm< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< float, long int > >
-                                   ( tnlParallelReductionDiffLpNorm< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< double, long int > >
-                                   ( tnlParallelReductionDiffLpNorm< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< long double, long int > >
-                                   ( tnlParallelReductionDiffLpNorm< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-max_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-max_impl.cu
deleted file mode 100644
index 3eaf7558b545ee30d4bbcf7de1394e2c7e357bb7..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-max_impl.cu
+++ /dev/null
@@ -1,103 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-diff-max_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Diff max
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< char, int > >
-                                   ( tnlParallelReductionDiffMax< char, int >& operation,
-                                     const typename tnlParallelReductionDiffMax< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< int, int > >
-                                   ( tnlParallelReductionDiffMax< int, int >& operation,
-                                     const typename tnlParallelReductionDiffMax< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< float, int > >
-                                   ( tnlParallelReductionDiffMax< float, int >& operation,
-                                     const typename tnlParallelReductionDiffMax< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< double, int > >
-                                   ( tnlParallelReductionDiffMax< double, int>& operation,
-                                     const typename tnlParallelReductionDiffMax< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< long double, int > >
-                                   ( tnlParallelReductionDiffMax< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffMax< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< char, long int > >
-                                   ( tnlParallelReductionDiffMax< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffMax< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< int, long int > >
-                                   ( tnlParallelReductionDiffMax< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffMax< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< float, long int > >
-                                   ( tnlParallelReductionDiffMax< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffMax< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< double, long int > >
-                                   ( tnlParallelReductionDiffMax< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffMax< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< long double, long int > >
-                                   ( tnlParallelReductionDiffMax< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffMax< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-min_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-min_impl.cu
deleted file mode 100644
index 9e0a1b447f1e54889f72aca3008c41a626b911ca..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-min_impl.cu
+++ /dev/null
@@ -1,104 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-diff-min_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {   
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-
-/****
- * Diff min
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< char, int > >
-                                   ( tnlParallelReductionDiffMin< char, int >& operation,
-                                     const typename tnlParallelReductionDiffMin< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< int, int > >
-                                   ( tnlParallelReductionDiffMin< int, int >& operation,
-                                     const typename tnlParallelReductionDiffMin< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< float, int > >
-                                   ( tnlParallelReductionDiffMin< float, int >& operation,
-                                     const typename tnlParallelReductionDiffMin< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< double, int > >
-                                   ( tnlParallelReductionDiffMin< double, int>& operation,
-                                     const typename tnlParallelReductionDiffMin< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< long double, int > >
-                                   ( tnlParallelReductionDiffMin< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffMin< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< char, long int > >
-                                   ( tnlParallelReductionDiffMin< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffMin< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< int, long int > >
-                                   ( tnlParallelReductionDiffMin< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffMin< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< float, long int > >
-                                   ( tnlParallelReductionDiffMin< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffMin< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< double, long int > >
-                                   ( tnlParallelReductionDiffMin< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffMin< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< long double, long int > >
-                                   ( tnlParallelReductionDiffMin< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffMin< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-sum_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-sum_impl.cu
deleted file mode 100644
index cbf0958556eb1d6c0b50654a81ecdc17f1c47650..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-sum_impl.cu
+++ /dev/null
@@ -1,103 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-diff-sum_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Diff sum
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< char, int > >
-                                   ( tnlParallelReductionDiffSum< char, int >& operation,
-                                     const typename tnlParallelReductionDiffSum< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< int, int > >
-                                   ( tnlParallelReductionDiffSum< int, int >& operation,
-                                     const typename tnlParallelReductionDiffSum< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< float, int > >
-                                   ( tnlParallelReductionDiffSum< float, int >& operation,
-                                     const typename tnlParallelReductionDiffSum< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< double, int > >
-                                   ( tnlParallelReductionDiffSum< double, int>& operation,
-                                     const typename tnlParallelReductionDiffSum< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< long double, int > >
-                                   ( tnlParallelReductionDiffSum< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffSum< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< char, long int > >
-                                   ( tnlParallelReductionDiffSum< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffSum< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< int, long int > >
-                                   ( tnlParallelReductionDiffSum< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffSum< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< float, long int > >
-                                   ( tnlParallelReductionDiffSum< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffSum< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< double, long int > >
-                                   ( tnlParallelReductionDiffSum< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffSum< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< long double, long int > >
-                                   ( tnlParallelReductionDiffSum< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffSum< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-equalities_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-equalities_impl.cu
deleted file mode 100644
index 7b7c322b7e51e54e9ae3c4826391dff661ec3456..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-equalities_impl.cu
+++ /dev/null
@@ -1,102 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-equalities_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Equalities
- */
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< char, int > >
-                                   ( tnlParallelReductionEqualities< char, int >& operation,
-                                     const typename tnlParallelReductionEqualities< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< int, int > >
-                                   ( tnlParallelReductionEqualities< int, int >& operation,
-                                     const typename tnlParallelReductionEqualities< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< float, int > >
-                                   ( tnlParallelReductionEqualities< float, int >& operation,
-                                     const typename tnlParallelReductionEqualities< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< double, int > >
-                                   ( tnlParallelReductionEqualities< double, int>& operation,
-                                     const typename tnlParallelReductionEqualities< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< long double, int > >
-                                   ( tnlParallelReductionEqualities< long double, int>& operation,
-                                     const typename tnlParallelReductionEqualities< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< char, long int > >
-                                   ( tnlParallelReductionEqualities< char, long int >& operation,
-                                     const typename tnlParallelReductionEqualities< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< int, long int > >
-                                   ( tnlParallelReductionEqualities< int, long int >& operation,
-                                     const typename tnlParallelReductionEqualities< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< float, long int > >
-                                   ( tnlParallelReductionEqualities< float, long int >& operation,
-                                     const typename tnlParallelReductionEqualities< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< double, long int > >
-                                   ( tnlParallelReductionEqualities< double, long int>& operation,
-                                     const typename tnlParallelReductionEqualities< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< long double, long int > >
-                                   ( tnlParallelReductionEqualities< long double, long int>& operation,
-                                     const typename tnlParallelReductionEqualities< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-inequalities_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-inequalities_impl.cu
deleted file mode 100644
index 08ca8d8bdc421c345d17671d2ea27829de080b82..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-inequalities_impl.cu
+++ /dev/null
@@ -1,102 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-inequalities_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Inequalities
- */
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< char, int > >
-                                   ( tnlParallelReductionInequalities< char, int >& operation,
-                                     const typename tnlParallelReductionInequalities< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< int, int > >
-                                   ( tnlParallelReductionInequalities< int, int >& operation,
-                                     const typename tnlParallelReductionInequalities< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< float, int > >
-                                   ( tnlParallelReductionInequalities< float, int >& operation,
-                                     const typename tnlParallelReductionInequalities< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< double, int > >
-                                   ( tnlParallelReductionInequalities< double, int>& operation,
-                                     const typename tnlParallelReductionInequalities< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< long double, int > >
-                                   ( tnlParallelReductionInequalities< long double, int>& operation,
-                                     const typename tnlParallelReductionInequalities< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< char, long int > >
-                                   ( tnlParallelReductionInequalities< char, long int >& operation,
-                                     const typename tnlParallelReductionInequalities< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< int, long int > >
-                                   ( tnlParallelReductionInequalities< int, long int >& operation,
-                                     const typename tnlParallelReductionInequalities< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< float, long int > >
-                                   ( tnlParallelReductionInequalities< float, long int >& operation,
-                                     const typename tnlParallelReductionInequalities< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< double, long int > >
-                                   ( tnlParallelReductionInequalities< double, long int>& operation,
-                                     const typename tnlParallelReductionInequalities< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< long double, long int > >
-                                   ( tnlParallelReductionInequalities< long double, long int>& operation,
-                                     const typename tnlParallelReductionInequalities< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-l2-norm_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-l2-norm_impl.cu
deleted file mode 100644
index 5169e1a2adc8eed4422221887f663dc15f5d612d..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-l2-norm_impl.cu
+++ /dev/null
@@ -1,81 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-l2-norm_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * L2 Norm
- */
-template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< float, int > >
-                                   ( tnlParallelReductionL2Norm< float, int >& operation,
-                                     const typename tnlParallelReductionL2Norm< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionL2Norm< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionL2Norm< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionL2Norm< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< double, int > >
-                                   ( tnlParallelReductionL2Norm< double, int>& operation,
-                                     const typename tnlParallelReductionL2Norm< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionL2Norm< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionL2Norm< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionL2Norm< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< long double, int > >
-                                   ( tnlParallelReductionL2Norm< long double, int>& operation,
-                                     const typename tnlParallelReductionL2Norm< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionL2Norm< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionL2Norm< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionL2Norm< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< int, long int > >
-                                   ( tnlParallelReductionL2Norm< int, long int >& operation,
-                                     const typename tnlParallelReductionL2Norm< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionL2Norm< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionL2Norm< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionL2Norm< int, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< float, long int > >
-                                   ( tnlParallelReductionL2Norm< float, long int >& operation,
-                                     const typename tnlParallelReductionL2Norm< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionL2Norm< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionL2Norm< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionL2Norm< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< double, long int > >
-                                   ( tnlParallelReductionL2Norm< double, long int>& operation,
-                                     const typename tnlParallelReductionL2Norm< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionL2Norm< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionL2Norm< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionL2Norm< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< long double, long int > >
-                                   ( tnlParallelReductionL2Norm< long double, long int>& operation,
-                                     const typename tnlParallelReductionL2Norm< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionL2Norm< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionL2Norm< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionL2Norm< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-lp-norm_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-lp-norm_impl.cu
deleted file mode 100644
index 3d5366013a114a5e5dfe2ec317bfc0015485019c..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-lp-norm_impl.cu
+++ /dev/null
@@ -1,81 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-lp-norm_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Lp Norm
- */
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< float, int > >
-                                   ( tnlParallelReductionLpNorm< float, int >& operation,
-                                     const typename tnlParallelReductionLpNorm< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< double, int > >
-                                   ( tnlParallelReductionLpNorm< double, int>& operation,
-                                     const typename tnlParallelReductionLpNorm< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< long double, int > >
-                                   ( tnlParallelReductionLpNorm< long double, int>& operation,
-                                     const typename tnlParallelReductionLpNorm< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< int, long int > >
-                                   ( tnlParallelReductionLpNorm< int, long int >& operation,
-                                     const typename tnlParallelReductionLpNorm< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< int, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< float, long int > >
-                                   ( tnlParallelReductionLpNorm< float, long int >& operation,
-                                     const typename tnlParallelReductionLpNorm< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< double, long int > >
-                                   ( tnlParallelReductionLpNorm< double, long int>& operation,
-                                     const typename tnlParallelReductionLpNorm< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< long double, long int > >
-                                   ( tnlParallelReductionLpNorm< long double, long int>& operation,
-                                     const typename tnlParallelReductionLpNorm< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-max_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-max_impl.cu
deleted file mode 100644
index a2965136d9816f4ad4ba3b5eaf1d29a9c49b7d82..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-max_impl.cu
+++ /dev/null
@@ -1,103 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-max_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Max
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< char, int > >
-                                   ( tnlParallelReductionMax< char, int >& operation,
-                                     const typename tnlParallelReductionMax< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< int, int > >
-                                   ( tnlParallelReductionMax< int, int >& operation,
-                                     const typename tnlParallelReductionMax< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< float, int > >
-                                   ( tnlParallelReductionMax< float, int >& operation,
-                                     const typename tnlParallelReductionMax< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< double, int > >
-                                   ( tnlParallelReductionMax< double, int>& operation,
-                                     const typename tnlParallelReductionMax< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionMax< long double, int > >
-                                   ( tnlParallelReductionMax< long double, int>& operation,
-                                     const typename tnlParallelReductionMax< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionMax< char, long int > >
-                                   ( tnlParallelReductionMax< char, long int >& operation,
-                                     const typename tnlParallelReductionMax< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< int, long int > >
-                                   ( tnlParallelReductionMax< int, long int >& operation,
-                                     const typename tnlParallelReductionMax< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< float, long int > >
-                                   ( tnlParallelReductionMax< float, long int >& operation,
-                                     const typename tnlParallelReductionMax< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< double, long int > >
-                                   ( tnlParallelReductionMax< double, long int>& operation,
-                                     const typename tnlParallelReductionMax< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionMax< long double, long int > >
-                                   ( tnlParallelReductionMax< long double, long int>& operation,
-                                     const typename tnlParallelReductionMax< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-min_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-min_impl.cu
deleted file mode 100644
index 2434189c4374574eff2a0a51ad5d8f50da2b833b..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-min_impl.cu
+++ /dev/null
@@ -1,103 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-min_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Min
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< char, int > >
-                                   ( tnlParallelReductionMin< char, int >& operation,
-                                     const typename tnlParallelReductionMin< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< int, int > >
-                                   ( tnlParallelReductionMin< int, int >& operation,
-                                     const typename tnlParallelReductionMin< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< float, int > >
-                                   ( tnlParallelReductionMin< float, int >& operation,
-                                     const typename tnlParallelReductionMin< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< double, int > >
-                                   ( tnlParallelReductionMin< double, int>& operation,
-                                     const typename tnlParallelReductionMin< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionMin< long double, int > >
-                                   ( tnlParallelReductionMin< long double, int>& operation,
-                                     const typename tnlParallelReductionMin< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionMin< char, long int > >
-                                   ( tnlParallelReductionMin< char, long int >& operation,
-                                     const typename tnlParallelReductionMin< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< int, long int > >
-                                   ( tnlParallelReductionMin< int, long int >& operation,
-                                     const typename tnlParallelReductionMin< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< float, long int > >
-                                   ( tnlParallelReductionMin< float, long int >& operation,
-                                     const typename tnlParallelReductionMin< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< double, long int > >
-                                   ( tnlParallelReductionMin< double, long int>& operation,
-                                     const typename tnlParallelReductionMin< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionMin< long double, long int > >
-                                   ( tnlParallelReductionMin< long double, long int>& operation,
-                                     const typename tnlParallelReductionMin< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-or_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-or_impl.cu
deleted file mode 100644
index 6e2c9849ec896138b4c8cf106a8922cd2437c8a9..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-or_impl.cu
+++ /dev/null
@@ -1,102 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-or_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Logical OR
- */
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< char, int > >
-                                   ( tnlParallelReductionLogicalOr< char, int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< int, int > >
-                                   ( tnlParallelReductionLogicalOr< int, int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< float, int > >
-                                   ( tnlParallelReductionLogicalOr< float, int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< double, int > >
-                                   ( tnlParallelReductionLogicalOr< double, int>& operation,
-                                     const typename tnlParallelReductionLogicalOr< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< long double, int > >
-                                   ( tnlParallelReductionLogicalOr< long double, int>& operation,
-                                     const typename tnlParallelReductionLogicalOr< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< char, long int > >
-                                   ( tnlParallelReductionLogicalOr< char, long int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< int, long int > >
-                                   ( tnlParallelReductionLogicalOr< int, long int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< float, long int > >
-                                   ( tnlParallelReductionLogicalOr< float, long int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< double, long int > >
-                                   ( tnlParallelReductionLogicalOr< double, long int>& operation,
-                                     const typename tnlParallelReductionLogicalOr< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< long double, long int > >
-                                   ( tnlParallelReductionLogicalOr< long double, long int>& operation,
-                                     const typename tnlParallelReductionLogicalOr< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-scalar-product_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-scalar-product_impl.cu
deleted file mode 100644
index eabb3aff6c912faa88b45727c0082dac0538afea..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-scalar-product_impl.cu
+++ /dev/null
@@ -1,102 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-scalar-product_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
-
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * ScalarProduct
- */
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< char, int > >
-                                   ( tnlParallelReductionScalarProduct< char, int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< int, int > >
-                                   ( tnlParallelReductionScalarProduct< int, int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< float, int > >
-                                   ( tnlParallelReductionScalarProduct< float, int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< double, int > >
-                                   ( tnlParallelReductionScalarProduct< double, int>& operation,
-                                     const typename tnlParallelReductionScalarProduct< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< long double, int > >
-                                   ( tnlParallelReductionScalarProduct< long double, int>& operation,
-                                     const typename tnlParallelReductionScalarProduct< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< char, long int > >
-                                   ( tnlParallelReductionScalarProduct< char, long int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< int, long int > >
-                                   ( tnlParallelReductionScalarProduct< int, long int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< float, long int > >
-                                   ( tnlParallelReductionScalarProduct< float, long int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< double, long int > >
-                                   ( tnlParallelReductionScalarProduct< double, long int>& operation,
-                                     const typename tnlParallelReductionScalarProduct< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< long double, long int > >
-                                   ( tnlParallelReductionScalarProduct< long double, long int>& operation,
-                                     const typename tnlParallelReductionScalarProduct< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-sum_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-sum_impl.cu
deleted file mode 100644
index 79d9263ab4922d91caf08493653a8b1effdc1cf6..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-sum_impl.cu
+++ /dev/null
@@ -1,103 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-sum_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Sum
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< char, int > >
-                                   ( tnlParallelReductionSum< char, int >& operation,
-                                     const typename tnlParallelReductionSum< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< int, int > >
-                                   ( tnlParallelReductionSum< int, int >& operation,
-                                     const typename tnlParallelReductionSum< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< float, int > >
-                                   ( tnlParallelReductionSum< float, int >& operation,
-                                     const typename tnlParallelReductionSum< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< double, int > >
-                                   ( tnlParallelReductionSum< double, int>& operation,
-                                     const typename tnlParallelReductionSum< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionSum< long double, int > >
-                                   ( tnlParallelReductionSum< long double, int>& operation,
-                                     const typename tnlParallelReductionSum< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionSum< char, long int > >
-                                   ( tnlParallelReductionSum< char, long int >& operation,
-                                     const typename tnlParallelReductionSum< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< int, long int > >
-                                   ( tnlParallelReductionSum< int, long int >& operation,
-                                     const typename tnlParallelReductionSum< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< float, long int > >
-                                   ( tnlParallelReductionSum< float, long int >& operation,
-                                     const typename tnlParallelReductionSum< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< double, long int > >
-                                   ( tnlParallelReductionSum< double, long int>& operation,
-                                     const typename tnlParallelReductionSum< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionSum< long double, long int > >
-                                   ( tnlParallelReductionSum< long double, long int>& operation,
-                                     const typename tnlParallelReductionSum< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction_impl.cpp b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction_impl.cpp
deleted file mode 100644
index ce76fd397eea1d50f6fb873038f0a1223efd72e4..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction_impl.cpp
+++ /dev/null
@@ -1,1505 +0,0 @@
-/***************************************************************************
-                          cuda-reduction_impl.cpp  -  description
-                             -------------------
-    begin                : Mar 24, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
-
-namespace TNL {
-namespace Containers {
-namespace Algorithms {   
-
-#ifdef UNDEF //TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Sum
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< char, int > >
-                                   ( const tnlParallelReductionSum< char, int >& operation,
-                                     const typename tnlParallelReductionSum< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< int, int > >
-                                   ( const tnlParallelReductionSum< int, int >& operation,
-                                     const typename tnlParallelReductionSum< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< float, int > >
-                                   ( const tnlParallelReductionSum< float, int >& operation,
-                                     const typename tnlParallelReductionSum< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< double, int > >
-                                   ( const tnlParallelReductionSum< double, int>& operation,
-                                     const typename tnlParallelReductionSum< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionSum< long double, int > >
-                                   ( const tnlParallelReductionSum< long double, int>& operation,
-                                     const typename tnlParallelReductionSum< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionSum< char, long int > >
-                                   ( const tnlParallelReductionSum< char, long int >& operation,
-                                     const typename tnlParallelReductionSum< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< int, long int > >
-                                   ( const tnlParallelReductionSum< int, long int >& operation,
-                                     const typename tnlParallelReductionSum< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< float, long int > >
-                                   ( const tnlParallelReductionSum< float, long int >& operation,
-                                     const typename tnlParallelReductionSum< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< double, long int > >
-                                   ( const tnlParallelReductionSum< double, long int>& operation,
-                                     const typename tnlParallelReductionSum< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionSum< long double, long int > >
-                                   ( const tnlParallelReductionSum< long double, long int>& operation,
-                                     const typename tnlParallelReductionSum< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Min
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< char, int > >
-                                   ( const tnlParallelReductionMin< char, int >& operation,
-                                     const typename tnlParallelReductionMin< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< int, int > >
-                                   ( const tnlParallelReductionMin< int, int >& operation,
-                                     const typename tnlParallelReductionMin< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< float, int > >
-                                   ( const tnlParallelReductionMin< float, int >& operation,
-                                     const typename tnlParallelReductionMin< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< double, int > >
-                                   ( const tnlParallelReductionMin< double, int>& operation,
-                                     const typename tnlParallelReductionMin< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionMin< long double, int > >
-                                   ( const tnlParallelReductionMin< long double, int>& operation,
-                                     const typename tnlParallelReductionMin< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionMin< char, long int > >
-                                   ( const tnlParallelReductionMin< char, long int >& operation,
-                                     const typename tnlParallelReductionMin< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< int, long int > >
-                                   ( const tnlParallelReductionMin< int, long int >& operation,
-                                     const typename tnlParallelReductionMin< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< float, long int > >
-                                   ( const tnlParallelReductionMin< float, long int >& operation,
-                                     const typename tnlParallelReductionMin< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< double, long int > >
-                                   ( const tnlParallelReductionMin< double, long int>& operation,
-                                     const typename tnlParallelReductionMin< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionMin< long double, long int > >
-                                   ( const tnlParallelReductionMin< long double, long int>& operation,
-                                     const typename tnlParallelReductionMin< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Max
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< char, int > >
-                                   ( const tnlParallelReductionMax< char, int >& operation,
-                                     const typename tnlParallelReductionMax< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< int, int > >
-                                   ( const tnlParallelReductionMax< int, int >& operation,
-                                     const typename tnlParallelReductionMax< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< float, int > >
-                                   ( const tnlParallelReductionMax< float, int >& operation,
-                                     const typename tnlParallelReductionMax< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< double, int > >
-                                   ( const tnlParallelReductionMax< double, int>& operation,
-                                     const typename tnlParallelReductionMax< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionMax< long double, int > >
-                                   ( const tnlParallelReductionMax< long double, int>& operation,
-                                     const typename tnlParallelReductionMax< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionMax< char, long int > >
-                                   ( const tnlParallelReductionMax< char, long int >& operation,
-                                     const typename tnlParallelReductionMax< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< int, long int > >
-                                   ( const tnlParallelReductionMax< int, long int >& operation,
-                                     const typename tnlParallelReductionMax< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< float, long int > >
-                                   ( const tnlParallelReductionMax< float, long int >& operation,
-                                     const typename tnlParallelReductionMax< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< double, long int > >
-                                   ( const tnlParallelReductionMax< double, long int>& operation,
-                                     const typename tnlParallelReductionMax< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionMax< long double, long int > >
-                                   ( const tnlParallelReductionMax< long double, long int>& operation,
-                                     const typename tnlParallelReductionMax< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Abs sum
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< char, int > >
-                                   ( const tnlParallelReductionAbsSum< char, int >& operation,
-                                     const typename tnlParallelReductionAbsSum< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< int, int > >
-                                   ( const tnlParallelReductionAbsSum< int, int >& operation,
-                                     const typename tnlParallelReductionAbsSum< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< float, int > >
-                                   ( const tnlParallelReductionAbsSum< float, int >& operation,
-                                     const typename tnlParallelReductionAbsSum< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< double, int > >
-                                   ( const tnlParallelReductionAbsSum< double, int>& operation,
-                                     const typename tnlParallelReductionAbsSum< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< long double, int > >
-                                   ( const tnlParallelReductionAbsSum< long double, int>& operation,
-                                     const typename tnlParallelReductionAbsSum< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< char, long int > >
-                                   ( const tnlParallelReductionAbsSum< char, long int >& operation,
-                                     const typename tnlParallelReductionAbsSum< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< int, long int > >
-                                   ( const tnlParallelReductionAbsSum< int, long int >& operation,
-                                     const typename tnlParallelReductionAbsSum< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< float, long int > >
-                                   ( const tnlParallelReductionAbsSum< float, long int >& operation,
-                                     const typename tnlParallelReductionAbsSum< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< double, long int > >
-                                   ( const tnlParallelReductionAbsSum< double, long int>& operation,
-                                     const typename tnlParallelReductionAbsSum< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< long double, long int > >
-                                   ( const tnlParallelReductionAbsSum< long double, long int>& operation,
-                                     const typename tnlParallelReductionAbsSum< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Abs min
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< char, int > >
-                                   ( const tnlParallelReductionAbsMin< char, int >& operation,
-                                     const typename tnlParallelReductionAbsMin< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< int, int > >
-                                   ( const tnlParallelReductionAbsMin< int, int >& operation,
-                                     const typename tnlParallelReductionAbsMin< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< float, int > >
-                                   ( const tnlParallelReductionAbsMin< float, int >& operation,
-                                     const typename tnlParallelReductionAbsMin< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< double, int > >
-                                   ( const tnlParallelReductionAbsMin< double, int>& operation,
-                                     const typename tnlParallelReductionAbsMin< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< long double, int > >
-                                   ( const tnlParallelReductionAbsMin< long double, int>& operation,
-                                     const typename tnlParallelReductionAbsMin< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< char, long int > >
-                                   ( const tnlParallelReductionAbsMin< char, long int >& operation,
-                                     const typename tnlParallelReductionAbsMin< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< int, long int > >
-                                   ( const tnlParallelReductionAbsMin< int, long int >& operation,
-                                     const typename tnlParallelReductionAbsMin< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< float, long int > >
-                                   ( const tnlParallelReductionAbsMin< float, long int >& operation,
-                                     const typename tnlParallelReductionAbsMin< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< double, long int > >
-                                   ( const tnlParallelReductionAbsMin< double, long int>& operation,
-                                     const typename tnlParallelReductionAbsMin< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< long double, long int > >
-                                   ( const tnlParallelReductionAbsMin< long double, long int>& operation,
-                                     const typename tnlParallelReductionAbsMin< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< long double, long int> :: ResultType& result );
-#endif
-#endif
-/****
- * Abs max
- */
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, int > >
-                                   ( const tnlParallelReductionAbsMax< char, int >& operation,
-                                     const typename tnlParallelReductionAbsMax< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< int, int > >
-                                   ( const tnlParallelReductionAbsMax< int, int >& operation,
-                                     const typename tnlParallelReductionAbsMax< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< float, int > >
-                                   ( const tnlParallelReductionAbsMax< float, int >& operation,
-                                     const typename tnlParallelReductionAbsMax< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< double, int > >
-                                   ( const tnlParallelReductionAbsMax< double, int>& operation,
-                                     const typename tnlParallelReductionAbsMax< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, int > >
-                                   ( const tnlParallelReductionAbsMax< long double, int>& operation,
-                                     const typename tnlParallelReductionAbsMax< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, long int > >
-                                   ( const tnlParallelReductionAbsMax< char, long int >& operation,
-                                     const typename tnlParallelReductionAbsMax< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< int, long int > >
-                                   ( const tnlParallelReductionAbsMax< int, long int >& operation,
-                                     const typename tnlParallelReductionAbsMax< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< float, long int > >
-                                   ( const tnlParallelReductionAbsMax< float, long int >& operation,
-                                     const typename tnlParallelReductionAbsMax< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< double, long int > >
-                                   ( const tnlParallelReductionAbsMax< double, long int>& operation,
-                                     const typename tnlParallelReductionAbsMax< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, long int > >
-                                   ( const tnlParallelReductionAbsMax< long double, long int>& operation,
-                                     const typename tnlParallelReductionAbsMax< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Logical AND
- */
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< char, int > >
-                                   ( const tnlParallelReductionLogicalAnd< char, int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< int, int > >
-                                   ( const tnlParallelReductionLogicalAnd< int, int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< float, int > >
-                                   ( const tnlParallelReductionLogicalAnd< float, int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< double, int > >
-                                   ( const tnlParallelReductionLogicalAnd< double, int>& operation,
-                                     const typename tnlParallelReductionLogicalAnd< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< long double, int > >
-                                   ( const tnlParallelReductionLogicalAnd< long double, int>& operation,
-                                     const typename tnlParallelReductionLogicalAnd< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< char, long int > >
-                                   ( const tnlParallelReductionLogicalAnd< char, long int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< int, long int > >
-                                   ( const tnlParallelReductionLogicalAnd< int, long int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< float, long int > >
-                                   ( const tnlParallelReductionLogicalAnd< float, long int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< double, long int > >
-                                   ( const tnlParallelReductionLogicalAnd< double, long int>& operation,
-                                     const typename tnlParallelReductionLogicalAnd< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< long double, long int > >
-                                   ( const tnlParallelReductionLogicalAnd< long double, long int>& operation,
-                                     const typename tnlParallelReductionLogicalAnd< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Logical OR
- */
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< char, int > >
-                                   ( const tnlParallelReductionLogicalOr< char, int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< int, int > >
-                                   ( const tnlParallelReductionLogicalOr< int, int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< float, int > >
-                                   ( const tnlParallelReductionLogicalOr< float, int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< double, int > >
-                                   ( const tnlParallelReductionLogicalOr< double, int>& operation,
-                                     const typename tnlParallelReductionLogicalOr< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< long double, int > >
-                                   ( const tnlParallelReductionLogicalOr< long double, int>& operation,
-                                     const typename tnlParallelReductionLogicalOr< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< char, long int > >
-                                   ( const tnlParallelReductionLogicalOr< char, long int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< int, long int > >
-                                   ( const tnlParallelReductionLogicalOr< int, long int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< float, long int > >
-                                   ( const tnlParallelReductionLogicalOr< float, long int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< double, long int > >
-                                   ( const tnlParallelReductionLogicalOr< double, long int>& operation,
-                                     const typename tnlParallelReductionLogicalOr< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< long double, long int > >
-                                   ( const tnlParallelReductionLogicalOr< long double, long int>& operation,
-                                     const typename tnlParallelReductionLogicalOr< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Lp Norm
- */
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< float, int > >
-                                   ( const tnlParallelReductionLpNorm< float, int >& operation,
-                                     const typename tnlParallelReductionLpNorm< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< double, int > >
-                                   ( const tnlParallelReductionLpNorm< double, int>& operation,
-                                     const typename tnlParallelReductionLpNorm< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< long double, int > >
-                                   ( const tnlParallelReductionLpNorm< long double, int>& operation,
-                                     const typename tnlParallelReductionLpNorm< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< char, long int > >
-                                   ( const tnlParallelReductionLpNorm< char, long int >& operation,
-                                     const typename tnlParallelReductionLpNorm< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< int, long int > >
-                                   ( const tnlParallelReductionLpNorm< int, long int >& operation,
-                                     const typename tnlParallelReductionLpNorm< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< float, long int > >
-                                   ( const tnlParallelReductionLpNorm< float, long int >& operation,
-                                     const typename tnlParallelReductionLpNorm< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< double, long int > >
-                                   ( const tnlParallelReductionLpNorm< double, long int>& operation,
-                                     const typename tnlParallelReductionLpNorm< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< long double, long int > >
-                                   ( const tnlParallelReductionLpNorm< long double, long int>& operation,
-                                     const typename tnlParallelReductionLpNorm< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Equalities
- */
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< char, int > >
-                                   ( const tnlParallelReductionEqualities< char, int >& operation,
-                                     const typename tnlParallelReductionEqualities< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< int, int > >
-                                   ( const tnlParallelReductionEqualities< int, int >& operation,
-                                     const typename tnlParallelReductionEqualities< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< float, int > >
-                                   ( const tnlParallelReductionEqualities< float, int >& operation,
-                                     const typename tnlParallelReductionEqualities< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< double, int > >
-                                   ( const tnlParallelReductionEqualities< double, int>& operation,
-                                     const typename tnlParallelReductionEqualities< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< long double, int > >
-                                   ( const tnlParallelReductionEqualities< long double, int>& operation,
-                                     const typename tnlParallelReductionEqualities< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< char, long int > >
-                                   ( const tnlParallelReductionEqualities< char, long int >& operation,
-                                     const typename tnlParallelReductionEqualities< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< int, long int > >
-                                   ( const tnlParallelReductionEqualities< int, long int >& operation,
-                                     const typename tnlParallelReductionEqualities< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< float, long int > >
-                                   ( const tnlParallelReductionEqualities< float, long int >& operation,
-                                     const typename tnlParallelReductionEqualities< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< double, long int > >
-                                   ( const tnlParallelReductionEqualities< double, long int>& operation,
-                                     const typename tnlParallelReductionEqualities< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< long double, long int > >
-                                   ( const tnlParallelReductionEqualities< long double, long int>& operation,
-                                     const typename tnlParallelReductionEqualities< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Inequalities
- */
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< char, int > >
-                                   ( const tnlParallelReductionInequalities< char, int >& operation,
-                                     const typename tnlParallelReductionInequalities< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< int, int > >
-                                   ( const tnlParallelReductionInequalities< int, int >& operation,
-                                     const typename tnlParallelReductionInequalities< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< float, int > >
-                                   ( const tnlParallelReductionInequalities< float, int >& operation,
-                                     const typename tnlParallelReductionInequalities< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< double, int > >
-                                   ( const tnlParallelReductionInequalities< double, int>& operation,
-                                     const typename tnlParallelReductionInequalities< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< long double, int > >
-                                   ( const tnlParallelReductionInequalities< long double, int>& operation,
-                                     const typename tnlParallelReductionInequalities< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< char, long int > >
-                                   ( const tnlParallelReductionInequalities< char, long int >& operation,
-                                     const typename tnlParallelReductionInequalities< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< int, long int > >
-                                   ( const tnlParallelReductionInequalities< int, long int >& operation,
-                                     const typename tnlParallelReductionInequalities< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< float, long int > >
-                                   ( const tnlParallelReductionInequalities< float, long int >& operation,
-                                     const typename tnlParallelReductionInequalities< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< double, long int > >
-                                   ( const tnlParallelReductionInequalities< double, long int>& operation,
-                                     const typename tnlParallelReductionInequalities< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< long double, long int > >
-                                   ( const tnlParallelReductionInequalities< long double, long int>& operation,
-                                     const typename tnlParallelReductionInequalities< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * ScalarProduct
- */
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< char, int > >
-                                   ( const tnlParallelReductionScalarProduct< char, int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< int, int > >
-                                   ( const tnlParallelReductionScalarProduct< int, int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< float, int > >
-                                   ( const tnlParallelReductionScalarProduct< float, int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< double, int > >
-                                   ( const tnlParallelReductionScalarProduct< double, int>& operation,
-                                     const typename tnlParallelReductionScalarProduct< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< long double, int > >
-                                   ( const tnlParallelReductionScalarProduct< long double, int>& operation,
-                                     const typename tnlParallelReductionScalarProduct< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< char, long int > >
-                                   ( const tnlParallelReductionScalarProduct< char, long int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< int, long int > >
-                                   ( const tnlParallelReductionScalarProduct< int, long int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< float, long int > >
-                                   ( const tnlParallelReductionScalarProduct< float, long int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< double, long int > >
-                                   ( const tnlParallelReductionScalarProduct< double, long int>& operation,
-                                     const typename tnlParallelReductionScalarProduct< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< long double, long int > >
-                                   ( const tnlParallelReductionScalarProduct< long double, long int>& operation,
-                                     const typename tnlParallelReductionScalarProduct< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Diff sum
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< char, int > >
-                                   ( const tnlParallelReductionDiffSum< char, int >& operation,
-                                     const typename tnlParallelReductionDiffSum< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< int, int > >
-                                   ( const tnlParallelReductionDiffSum< int, int >& operation,
-                                     const typename tnlParallelReductionDiffSum< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< float, int > >
-                                   ( const tnlParallelReductionDiffSum< float, int >& operation,
-                                     const typename tnlParallelReductionDiffSum< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< double, int > >
-                                   ( const tnlParallelReductionDiffSum< double, int>& operation,
-                                     const typename tnlParallelReductionDiffSum< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< long double, int > >
-                                   ( const tnlParallelReductionDiffSum< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffSum< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< char, long int > >
-                                   ( const tnlParallelReductionDiffSum< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffSum< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< int, long int > >
-                                   ( const tnlParallelReductionDiffSum< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffSum< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< float, long int > >
-                                   ( const tnlParallelReductionDiffSum< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffSum< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< double, long int > >
-                                   ( const tnlParallelReductionDiffSum< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffSum< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< long double, long int > >
-                                   ( const tnlParallelReductionDiffSum< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffSum< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Diff min
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< char, int > >
-                                   ( const tnlParallelReductionDiffMin< char, int >& operation,
-                                     const typename tnlParallelReductionDiffMin< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< int, int > >
-                                   ( const tnlParallelReductionDiffMin< int, int >& operation,
-                                     const typename tnlParallelReductionDiffMin< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< float, int > >
-                                   ( const tnlParallelReductionDiffMin< float, int >& operation,
-                                     const typename tnlParallelReductionDiffMin< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< double, int > >
-                                   ( const tnlParallelReductionDiffMin< double, int>& operation,
-                                     const typename tnlParallelReductionDiffMin< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< long double, int > >
-                                   ( const tnlParallelReductionDiffMin< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffMin< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< char, long int > >
-                                   ( const tnlParallelReductionDiffMin< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffMin< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< int, long int > >
-                                   ( const tnlParallelReductionDiffMin< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffMin< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< float, long int > >
-                                   ( const tnlParallelReductionDiffMin< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffMin< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< double, long int > >
-                                   ( const tnlParallelReductionDiffMin< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffMin< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< long double, long int > >
-                                   ( const tnlParallelReductionDiffMin< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffMin< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Diff max
- */
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< char, int > >
-                                   ( const tnlParallelReductionDiffMax< char, int >& operation,
-                                     const typename tnlParallelReductionDiffMax< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< int, int > >
-                                   ( const tnlParallelReductionDiffMax< int, int >& operation,
-                                     const typename tnlParallelReductionDiffMax< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< float, int > >
-                                   ( const tnlParallelReductionDiffMax< float, int >& operation,
-                                     const typename tnlParallelReductionDiffMax< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< double, int > >
-                                   ( const tnlParallelReductionDiffMax< double, int>& operation,
-                                     const typename tnlParallelReductionDiffMax< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< long double, int > >
-                                   ( const tnlParallelReductionDiffMax< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffMax< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< char, long int > >
-                                   ( const tnlParallelReductionDiffMax< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffMax< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< int, long int > >
-                                   ( const tnlParallelReductionDiffMax< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffMax< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< float, long int > >
-                                   ( const tnlParallelReductionDiffMax< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffMax< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< double, long int > >
-                                   ( const tnlParallelReductionDiffMax< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffMax< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< long double, long int > >
-                                   ( const tnlParallelReductionDiffMax< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffMax< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Diff abs sum
- */
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< char, int > >
-                                   ( const tnlParallelReductionDiffAbsSum< char, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< int, int > >
-                                   ( const tnlParallelReductionDiffAbsSum< int, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< float, int > >
-                                   ( const tnlParallelReductionDiffAbsSum< float, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< double, int > >
-                                   ( const tnlParallelReductionDiffAbsSum< double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< long double, int > >
-                                   ( const tnlParallelReductionDiffAbsSum< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< char, long int > >
-                                   ( const tnlParallelReductionDiffAbsSum< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< int, long int > >
-                                   ( const tnlParallelReductionDiffAbsSum< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< float, long int > >
-                                   ( const tnlParallelReductionDiffAbsSum< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< double, long int > >
-                                   ( const tnlParallelReductionDiffAbsSum< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< long double, long int > >
-                                   ( const tnlParallelReductionDiffAbsSum< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Diff abs min
- */
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< char, int > >
-                                   ( const tnlParallelReductionDiffAbsMin< char, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< int, int > >
-                                   ( const tnlParallelReductionDiffAbsMin< int, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< float, int > >
-                                   ( const tnlParallelReductionDiffAbsMin< float, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< double, int > >
-                                   ( const tnlParallelReductionDiffAbsMin< double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< long double, int > >
-                                   ( const tnlParallelReductionDiffAbsMin< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< char, long int > >
-                                   ( const tnlParallelReductionDiffAbsMin< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< int, long int > >
-                                   ( const tnlParallelReductionDiffAbsMin< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< float, long int > >
-                                   ( const tnlParallelReductionDiffAbsMin< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< double, long int > >
-                                   ( const tnlParallelReductionDiffAbsMin< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< long double, long int > >
-                                   ( const tnlParallelReductionDiffAbsMin< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Diff abs max
- */
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< char, int > >
-                                   ( const tnlParallelReductionDiffAbsMax< char, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< int, int > >
-                                   ( const tnlParallelReductionDiffAbsMax< int, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< float, int > >
-                                   ( const tnlParallelReductionDiffAbsMax< float, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< double, int > >
-                                   ( const tnlParallelReductionDiffAbsMax< double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< long double, int > >
-                                   ( const tnlParallelReductionDiffAbsMax< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< char, long int > >
-                                   ( const tnlParallelReductionDiffAbsMax< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< int, long int > >
-                                   ( const tnlParallelReductionDiffAbsMax< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< float, long int > >
-                                   ( const tnlParallelReductionDiffAbsMax< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< double, long int > >
-                                   ( const tnlParallelReductionDiffAbsMax< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< long double, long int > >
-                                   ( const tnlParallelReductionDiffAbsMax< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Diff Lp Norm
- */
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< float, int > >
-                                   ( const tnlParallelReductionDiffLpNorm< float, int >& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< double, int > >
-                                   ( const tnlParallelReductionDiffLpNorm< double, int>& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< long double, int > >
-                                   ( const tnlParallelReductionDiffLpNorm< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< char, long int > >
-                                   ( const tnlParallelReductionDiffLpNorm< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< int, long int > >
-                                   ( const tnlParallelReductionDiffLpNorm< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< float, long int > >
-                                   ( const tnlParallelReductionDiffLpNorm< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< double, long int > >
-                                   ( const tnlParallelReductionDiffLpNorm< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< long double, long int > >
-                                   ( const tnlParallelReductionDiffLpNorm< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
-
-
diff --git a/src/TNL/Containers/Algorithms/VectorOperations.h b/src/TNL/Containers/Algorithms/VectorOperations.h
index bdec1a916ecc2cec578c5b6b4b0be5ad9db78901..3b0d4ae4bd8bb9cb8aee22f73236720d32a66857 100644
--- a/src/TNL/Containers/Algorithms/VectorOperations.h
+++ b/src/TNL/Containers/Algorithms/VectorOperations.h
@@ -8,14 +8,14 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#pragma once 
+#pragma once
 
 #include <TNL/Containers/Algorithms/Reduction.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 
 namespace TNL {
-namespace Containers {   
+namespace Containers {
 namespace Algorithms {
 
 template< typename Device >
@@ -24,8 +24,7 @@ class VectorOperations{};
 template<>
 class VectorOperations< Devices::Host >
 {
-   public:
-
+public:
    template< typename Vector >
    static void addElement( Vector& v,
                            const typename Vector::IndexType i,
@@ -37,24 +36,24 @@ class VectorOperations< Devices::Host >
                            const typename Vector::RealType& value,
                            const typename Vector::RealType& thisElementMultiplicator );
 
-   template< typename Vector >
-   static typename Vector::RealType getVectorMax( const Vector& v );
+   template< typename Vector, typename ResultType = typename Vector::RealType >
+   static ResultType getVectorMax( const Vector& v );
 
-   template< typename Vector >
-   static typename Vector::RealType getVectorMin( const Vector& v );
+   template< typename Vector, typename ResultType = typename Vector::RealType >
+   static ResultType getVectorMin( const Vector& v );
 
-   template< typename Vector >
-   static typename Vector::RealType getVectorAbsMax( const Vector& v );
+   template< typename Vector, typename ResultType = typename Vector::RealType >
+   static ResultType getVectorAbsMax( const Vector& v );
 
-   template< typename Vector >
-   static typename Vector::RealType getVectorAbsMin( const Vector& v );
+   template< typename Vector, typename ResultType = typename Vector::RealType >
+   static ResultType getVectorAbsMin( const Vector& v );
 
    template< typename Vector, typename ResultType = typename Vector::RealType >
    static ResultType getVectorL1Norm( const Vector& v );
- 
+
    template< typename Vector, typename ResultType = typename Vector::RealType >
    static ResultType getVectorL2Norm( const Vector& v );
- 
+
    template< typename Vector, typename ResultType = typename Vector::RealType, typename Real_ >
    static ResultType getVectorLpNorm( const Vector& v,
                                       const Real_ p );
@@ -62,54 +61,43 @@ class VectorOperations< Devices::Host >
    template< typename Vector, typename ResultType = typename Vector::RealType >
    static ResultType getVectorSum( const Vector& v );
 
-   template< typename Vector1, typename Vector2 >
-   static typename Vector1::RealType getVectorDifferenceMax( const Vector1& v1,
-                                                             const Vector2& v2 );
+   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
+   static ResultType getVectorDifferenceMax( const Vector1& v1, const Vector2& v2 );
 
-   template< typename Vector1, typename Vector2 >
-   static typename Vector1::RealType getVectorDifferenceMin( const Vector1& v1,
-                                                             const Vector2& v2 );
+   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
+   static ResultType getVectorDifferenceMin( const Vector1& v1, const Vector2& v2 );
 
-   template< typename Vector1, typename Vector2 >
-   static typename Vector1::RealType getVectorDifferenceAbsMax( const Vector1& v1,
-                                                                const Vector2& v2 );
+   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
+   static ResultType getVectorDifferenceAbsMax( const Vector1& v1, const Vector2& v2 );
 
-   template< typename Vector1, typename Vector2 >
-   static typename Vector1::RealType getVectorDifferenceAbsMin( const Vector1& v1,
-                                                                const Vector2& v2 );
+   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
+   static ResultType getVectorDifferenceAbsMin( const Vector1& v1, const Vector2& v2 );
 
    template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
-   static ResultType getVectorDifferenceL1Norm( const Vector1& v1,
-                                                const Vector2& v2 );
+   static ResultType getVectorDifferenceL1Norm( const Vector1& v1, const Vector2& v2 );
 
    template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
-   static ResultType getVectorDifferenceL2Norm( const Vector1& v1,
-                                                const Vector2& v2 );
- 
+   static ResultType getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 );
+
    template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Real_ >
-   static ResultType getVectorDifferenceLpNorm( const Vector1& v1,
-                                                const Vector2& v2,
-                                                const Real_ p );
+   static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Real_ p );
 
    template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
-   static ResultType getVectorDifferenceSum( const Vector1& v1,
-                                             const Vector2& v2 );
- 
- 
+   static ResultType getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 );
+
    template< typename Vector >
    static void vectorScalarMultiplication( Vector& v,
                                            const typename Vector::RealType& alpha );
 
-   template< typename Vector1, typename Vector2 >
-   static typename Vector1::RealType getScalarProduct( const Vector1& v1,
-                                                       const Vector2& v2 );
+   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
+   static ResultType getScalarProduct( const Vector1& v1, const Vector2& v2 );
 
    template< typename Vector1, typename Vector2 >
    static void addVector( Vector1& y,
-                          const Vector2& v,
-                          const typename Vector2::RealType& multiplicator,
+                          const Vector2& x,
+                          const typename Vector2::RealType& alpha,
                           const typename Vector1::RealType& thisMultiplicator = 1.0 );
- 
+
    template< typename Vector1, typename Vector2, typename Vector3 >
    static void addVectors( Vector1& v,
                            const Vector2& v1,
@@ -132,8 +120,7 @@ class VectorOperations< Devices::Host >
 template<>
 class VectorOperations< Devices::Cuda >
 {
-   public:
-
+public:
    template< typename Vector >
    static void addElement( Vector& v,
                            const typename Vector::IndexType i,
@@ -145,24 +132,24 @@ class VectorOperations< Devices::Cuda >
                            const typename Vector::RealType& value,
                            const typename Vector::RealType& thisElementMultiplicator );
 
-   template< typename Vector >
-   static typename Vector::RealType getVectorMax( const Vector& v );
+   template< typename Vector, typename ResultType = typename Vector::RealType >
+   static ResultType getVectorMax( const Vector& v );
 
-   template< typename Vector >
-   static typename Vector::RealType getVectorMin( const Vector& v );
+   template< typename Vector, typename ResultType = typename Vector::RealType >
+   static ResultType getVectorMin( const Vector& v );
 
-   template< typename Vector >
-   static typename Vector::RealType getVectorAbsMax( const Vector& v );
+   template< typename Vector, typename ResultType = typename Vector::RealType >
+   static ResultType getVectorAbsMax( const Vector& v );
+
+   template< typename Vector, typename ResultType = typename Vector::RealType >
+   static ResultType getVectorAbsMin( const Vector& v );
 
-   template< typename Vector >
-   static typename Vector::RealType getVectorAbsMin( const Vector& v );
- 
    template< typename Vector, typename ResultType = typename Vector::RealType >
    static ResultType getVectorL1Norm( const Vector& v );
- 
+
    template< typename Vector, typename ResultType = typename Vector::RealType >
    static ResultType getVectorL2Norm( const Vector& v );
- 
+
    template< typename Vector, typename ResultType = typename Vector::RealType, typename Real_ >
    static ResultType getVectorLpNorm( const Vector& v,
                                       const Real_ p );
@@ -170,54 +157,43 @@ class VectorOperations< Devices::Cuda >
    template< typename Vector, typename ResultType = typename Vector::RealType >
    static ResultType getVectorSum( const Vector& v );
 
-   template< typename Vector1, typename Vector2 >
-   static typename Vector1::RealType getVectorDifferenceMax( const Vector1& v1,
-                                                             const Vector2& v2 );
+   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
+   static ResultType getVectorDifferenceMax( const Vector1& v1, const Vector2& v2 );
 
-   template< typename Vector1, typename Vector2 >
-   static typename Vector1::RealType getVectorDifferenceMin( const Vector1& v1,
-                                                             const Vector2& v2 );
+   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
+   static ResultType getVectorDifferenceMin( const Vector1& v1, const Vector2& v2 );
 
-   template< typename Vector1, typename Vector2 >
-   static typename Vector1::RealType getVectorDifferenceAbsMax( const Vector1& v1,
-                                                                const Vector2& v2 );
+   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
+   static ResultType getVectorDifferenceAbsMax( const Vector1& v1, const Vector2& v2 );
 
-   template< typename Vector1, typename Vector2 >
-   static typename Vector1::RealType getVectorDifferenceAbsMin( const Vector1& v1,
-                                                                const Vector2& v2 );
- 
    template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
-   static ResultType getVectorDifferenceL1Norm( const Vector1& v1,
-                                                const Vector2& v2 );
+   static ResultType getVectorDifferenceAbsMin( const Vector1& v1, const Vector2& v2 );
 
    template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
-   static ResultType getVectorDifferenceL2Norm( const Vector1& v1,
-                                                const Vector2& v2 );
- 
+   static ResultType getVectorDifferenceL1Norm( const Vector1& v1, const Vector2& v2 );
+
+   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
+   static ResultType getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 );
+
    template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Real_ >
-   static ResultType getVectorDifferenceLpNorm( const Vector1& v1,
-                                                const Vector2& v2,
-                                                const Real_ p );
+   static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Real_ p );
 
    template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
-   static ResultType getVectorDifferenceSum( const Vector1& v1,
-                                             const Vector2& v2 );
- 
- 
+   static ResultType getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 );
+
    template< typename Vector >
    static void vectorScalarMultiplication( Vector& v,
                                            const typename Vector::RealType& alpha );
 
-   template< typename Vector1, typename Vector2 >
-   static typename Vector1::RealType getScalarProduct( const Vector1& v1,
-                                                       const Vector2& v2 );
+   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
+   static ResultType getScalarProduct( const Vector1& v1, const Vector2& v2 );
 
    template< typename Vector1, typename Vector2 >
    static void addVector( Vector1& y,
                           const Vector2& x,
                           const typename Vector2::RealType& alpha,
                           const typename Vector1::RealType& thisMultiplicator = 1.0 );
- 
+
    template< typename Vector1, typename Vector2, typename Vector3 >
    static void addVectors( Vector1& v,
                            const Vector2& v1,
@@ -225,7 +201,7 @@ class VectorOperations< Devices::Cuda >
                            const Vector3& v2,
                            const typename Vector3::RealType& multiplicator2,
                            const typename Vector1::RealType& thisMultiplicator = 1.0 );
- 
+
    template< typename Vector >
    static void computePrefixSum( Vector& v,
                                  const typename Vector::IndexType begin,
@@ -241,8 +217,7 @@ class VectorOperations< Devices::Cuda >
 template<>
 class VectorOperations< Devices::MIC >
 {
-   public:
-
+public:
    template< typename Vector >
    static void addElement( Vector& v,
                            const typename Vector::IndexType i,
@@ -254,24 +229,24 @@ class VectorOperations< Devices::MIC >
                            const typename Vector::RealType& value,
                            const typename Vector::RealType& thisElementMultiplicator );
 
-   template< typename Vector >
-   static typename Vector::RealType getVectorMax( const Vector& v );
+   template< typename Vector, typename ResultType = typename Vector::RealType >
+   static ResultType getVectorMax( const Vector& v );
 
-   template< typename Vector >
-   static typename Vector::RealType getVectorMin( const Vector& v );
+   template< typename Vector, typename ResultType = typename Vector::RealType >
+   static ResultType getVectorMin( const Vector& v );
 
-   template< typename Vector >
-   static typename Vector::RealType getVectorAbsMax( const Vector& v );
+   template< typename Vector, typename ResultType = typename Vector::RealType >
+   static ResultType getVectorAbsMax( const Vector& v );
+
+   template< typename Vector, typename ResultType = typename Vector::RealType >
+   static ResultType getVectorAbsMin( const Vector& v );
 
-   template< typename Vector >
-   static typename Vector::RealType getVectorAbsMin( const Vector& v );
-   
    template< typename Vector, typename ResultType = typename Vector::RealType >
    static ResultType getVectorL1Norm( const Vector& v );
- 
+
    template< typename Vector, typename ResultType = typename Vector::RealType >
    static ResultType getVectorL2Norm( const Vector& v );
- 
+
    template< typename Vector, typename ResultType = typename Vector::RealType, typename Real_ >
    static ResultType getVectorLpNorm( const Vector& v,
                                       const Real_ p );
@@ -279,54 +254,43 @@ class VectorOperations< Devices::MIC >
    template< typename Vector, typename ResultType = typename Vector::RealType >
    static ResultType getVectorSum( const Vector& v );
 
-   template< typename Vector1, typename Vector2 >
-   static typename Vector1::RealType getVectorDifferenceMax( const Vector1& v1,
-                                                             const Vector2& v2 );
+   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
+   static ResultType getVectorDifferenceMax( const Vector1& v1, const Vector2& v2 );
 
-   template< typename Vector1, typename Vector2 >
-   static typename Vector1::RealType getVectorDifferenceMin( const Vector1& v1,
-                                                               const Vector2& v2 );
+   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
+   static ResultType getVectorDifferenceMin( const Vector1& v1, const Vector2& v2 );
 
-   template< typename Vector1, typename Vector2 >
-   static typename Vector1::RealType getVectorDifferenceAbsMax( const Vector1& v1,
-                                                                  const Vector2& v2 );
+   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
+   static ResultType getVectorDifferenceAbsMax( const Vector1& v1, const Vector2& v2 );
 
-   template< typename Vector1, typename Vector2 >
-   static typename Vector1::RealType getVectorDifferenceAbsMin( const Vector1& v1,
-                                                                const Vector2& v2 );
-  
    template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
-   static ResultType getVectorDifferenceL1Norm( const Vector1& v1,
-                                                const Vector2& v2 );
+   static ResultType getVectorDifferenceAbsMin( const Vector1& v1, const Vector2& v2 );
 
    template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
-   static ResultType getVectorDifferenceL2Norm( const Vector1& v1,
-                                                const Vector2& v2 );
- 
+   static ResultType getVectorDifferenceL1Norm( const Vector1& v1, const Vector2& v2 );
+
+   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
+   static ResultType getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 );
+
    template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Real_ >
-   static ResultType getVectorDifferenceLpNorm( const Vector1& v1,
-                                                const Vector2& v2,
-                                                const Real_ p );
+   static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Real_ p );
 
    template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
-   static ResultType getVectorDifferenceSum( const Vector1& v1,
-                                             const Vector2& v2 );
- 
-   
+   static ResultType getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 );
+
    template< typename Vector >
    static void vectorScalarMultiplication( Vector& v,
                                            const typename Vector::RealType& alpha );
 
-   template< typename Vector1, typename Vector2 >
-   static typename Vector1::RealType getScalarProduct( const Vector1& v1,
-                                                         const Vector2& v2 );
+   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
+   static ResultType getScalarProduct( const Vector1& v1, const Vector2& v2 );
 
    template< typename Vector1, typename Vector2 >
    static void addVector( Vector1& y,
                           const Vector2& x,
                           const typename Vector2::RealType& alpha,
                           const typename Vector1::RealType& thisMultiplicator = 1.0 );
-   
+
    template< typename Vector1, typename Vector2, typename Vector3 >
    static void addVectors( Vector1& v,
                            const Vector2& v1,
@@ -334,7 +298,6 @@ class VectorOperations< Devices::MIC >
                            const Vector3& v2,
                            const typename Vector3::RealType& multiplicator2,
                            const typename Vector1::RealType& thisMultiplicator = 1.0 );
-   
 
    template< typename Vector >
    static void computePrefixSum( Vector& v,
diff --git a/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h b/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h
index 64846317c486f6f852e2abeeef79c184b7fb0b76..b1bc4dec94817845791641d9a59b9d5bb43ded35 100644
--- a/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h
+++ b/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h
@@ -16,7 +16,7 @@
 #include <TNL/Containers/Algorithms/cuda-prefix-sum.h>
 
 namespace TNL {
-namespace Containers {   
+namespace Containers {
 namespace Algorithms {
 
 template< typename Vector >
@@ -40,78 +40,78 @@ addElement( Vector& v,
    v[ i ] = thisElementMultiplicator * v[ i ] + value;
 }
 
-template< typename Vector >
-typename Vector::RealType
+template< typename Vector, typename ResultType >
+ResultType
 VectorOperations< Devices::Cuda >::
 getVectorMax( const Vector& v )
 {
-   typedef typename Vector::RealType Real;
+   typedef typename Vector::RealType RealType;
 
    TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
 
-   Real result( 0 );
-   Algorithms::ParallelReductionMax< Real > operation;
+   ResultType result( 0 );
+   Algorithms::ParallelReductionMax< RealType, ResultType > operation;
    Reduction< Devices::Cuda >::reduce( operation,
                                        v.getSize(),
                                        v.getData(),
-                                       ( Real* ) 0,
+                                       ( RealType* ) 0,
                                        result );
    return result;
 }
 
-template< typename Vector >
-typename Vector::RealType
+template< typename Vector, typename ResultType >
+ResultType
 VectorOperations< Devices::Cuda >::
 getVectorMin( const Vector& v )
 {
-   typedef typename Vector::RealType Real;
+   typedef typename Vector::RealType RealType;
 
    TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
 
-   Real result( 0 );
-   Algorithms::ParallelReductionMin< Real > operation;
+   ResultType result( 0 );
+   Algorithms::ParallelReductionMin< RealType, ResultType > operation;
    Reduction< Devices::Cuda >::reduce( operation,
                                        v.getSize(),
                                        v.getData(),
-                                       ( Real* ) 0,
+                                       ( RealType* ) 0,
                                        result );
    return result;
 }
 
-template< typename Vector >
-typename Vector::RealType
+template< typename Vector, typename ResultType >
+ResultType
 VectorOperations< Devices::Cuda >::
 getVectorAbsMax( const Vector& v )
 {
-   typedef typename Vector::RealType Real;
+   typedef typename Vector::RealType RealType;
 
    TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
 
-   Real result( 0 );
-   Algorithms::ParallelReductionAbsMax< Real > operation;
+   ResultType result( 0 );
+   Algorithms::ParallelReductionAbsMax< RealType, ResultType > operation;
    Reduction< Devices::Cuda >::reduce( operation,
                                        v.getSize(),
                                        v.getData(),
-                                       ( Real* ) 0,
+                                       ( RealType* ) 0,
                                        result );
    return result;
 }
 
-template< typename Vector >
-typename Vector::RealType
+template< typename Vector, typename ResultType >
+ResultType
 VectorOperations< Devices::Cuda >::
 getVectorAbsMin( const Vector& v )
 {
-   typedef typename Vector::RealType Real;
+   typedef typename Vector::RealType RealType;
 
    TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
 
-   Real result( 0 );
-   Algorithms::ParallelReductionAbsMin< Real > operation;
+   ResultType result( 0 );
+   Algorithms::ParallelReductionAbsMin< RealType, ResultType > operation;
    Reduction< Devices::Cuda >::reduce( operation,
                                        v.getSize(),
                                        v.getData(),
-                                       ( Real* ) 0,
+                                       ( RealType* ) 0,
                                        result );
    return result;
 }
@@ -164,7 +164,7 @@ getVectorLpNorm( const Vector& v,
 
    TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
    TNL_ASSERT_GE( p, 1.0, "Parameter of the L^p norm must be at least 1.0." );
- 
+
    if( p == 1 )
       return getVectorL1Norm< Vector, ResultType >( v );
    if( p == 2 )
@@ -199,19 +199,17 @@ getVectorSum( const Vector& v )
    return result;
 }
 
-template< typename Vector1, typename Vector2 >
-typename Vector1::RealType
+template< typename Vector1, typename Vector2, typename ResultType >
+ResultType
 VectorOperations< Devices::Cuda >::
 getVectorDifferenceMax( const Vector1& v1,
                         const Vector2& v2 )
 {
-   typedef typename Vector1::RealType Real;
-
    TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
    TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
 
-   Real result( 0 );
-   Algorithms::ParallelReductionDiffMax< typename Vector1::RealType, typename Vector2::RealType > operation;
+   ResultType result( 0 );
+   Algorithms::ParallelReductionDiffMax< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation;
    Reduction< Devices::Cuda >::reduce( operation,
                                        v1.getSize(),
                                        v1.getData(),
@@ -220,19 +218,17 @@ getVectorDifferenceMax( const Vector1& v1,
    return result;
 }
 
-template< typename Vector1, typename Vector2 >
-typename Vector1::RealType
+template< typename Vector1, typename Vector2, typename ResultType >
+ResultType
 VectorOperations< Devices::Cuda >::
 getVectorDifferenceMin( const Vector1& v1,
                         const Vector2& v2 )
 {
-   typedef typename Vector1::RealType Real;
-
    TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
    TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
 
-   Real result( 0 );
-   Algorithms::ParallelReductionDiffMin< typename Vector1::RealType, typename Vector2::RealType > operation;
+   ResultType result( 0 );
+   Algorithms::ParallelReductionDiffMin< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation;
    Reduction< Devices::Cuda >::reduce( operation,
                                        v1.getSize(),
                                        v1.getData(),
@@ -242,19 +238,17 @@ getVectorDifferenceMin( const Vector1& v1,
 }
 
 
-template< typename Vector1, typename Vector2 >
-typename Vector1::RealType
+template< typename Vector1, typename Vector2, typename ResultType >
+ResultType
 VectorOperations< Devices::Cuda >::
 getVectorDifferenceAbsMax( const Vector1& v1,
                            const Vector2& v2 )
 {
-   typedef typename Vector1::RealType Real;
-
    TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
    TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
 
-   Real result( 0 );
-   Algorithms::ParallelReductionDiffAbsMax< typename Vector1::RealType, typename Vector2::RealType > operation;
+   ResultType result( 0 );
+   Algorithms::ParallelReductionDiffAbsMax< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation;
    Reduction< Devices::Cuda >::reduce( operation,
                                        v1.getSize(),
                                        v1.getData(),
@@ -263,19 +257,17 @@ getVectorDifferenceAbsMax( const Vector1& v1,
    return result;
 }
 
-template< typename Vector1, typename Vector2 >
-typename Vector1::RealType
+template< typename Vector1, typename Vector2, typename ResultType >
+ResultType
 VectorOperations< Devices::Cuda >::
 getVectorDifferenceAbsMin( const Vector1& v1,
                            const Vector2& v2 )
 {
-   typedef typename Vector1::RealType Real;
-
    TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
    TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
 
-   Real result( 0 );
-   Algorithms::ParallelReductionDiffAbsMin< typename Vector1::RealType, typename Vector2::RealType > operation;
+   ResultType result( 0 );
+   Algorithms::ParallelReductionDiffAbsMin< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation;
    Reduction< Devices::Cuda >::reduce( operation,
                                        v1.getSize(),
                                        v1.getData(),
@@ -290,8 +282,6 @@ VectorOperations< Devices::Cuda >::
 getVectorDifferenceL1Norm( const Vector1& v1,
                            const Vector2& v2 )
 {
-   typedef typename Vector1::RealType Real;
-
    TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
    TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
 
@@ -397,7 +387,7 @@ vectorScalarMultiplication( Vector& v,
    TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
 
 #ifdef HAVE_CUDA
-   typedef typename Vector::IndexType Index;   
+   typedef typename Vector::IndexType Index;
    dim3 blockSize( 0 ), gridSize( 0 );
    const Index& size = v.getSize();
    blockSize.x = 256;
@@ -413,19 +403,17 @@ vectorScalarMultiplication( Vector& v,
 }
 
 
-template< typename Vector1, typename Vector2 >
-typename Vector1::RealType
+template< typename Vector1, typename Vector2, typename ResultType >
+ResultType
 VectorOperations< Devices::Cuda >::
 getScalarProduct( const Vector1& v1,
                   const Vector2& v2 )
 {
-   typedef typename Vector1::RealType Real;
-
    TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
    TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
 
-   Real result( 0 );
-   Algorithms::ParallelReductionScalarProduct< typename Vector1::RealType, typename Vector2::RealType > operation;
+   ResultType result( 0 );
+   Algorithms::ParallelReductionScalarProduct< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation;
    Reduction< Devices::Cuda >::reduce( operation,
                                        v1.getSize(),
                                        v1.getData(),
@@ -474,7 +462,7 @@ addVector( Vector1& y,
 
 #ifdef HAVE_CUDA
    typedef typename Vector1::IndexType Index;
-   
+
    dim3 blockSize( 0 ), gridSize( 0 );
 
    const Index& size = x.getSize();
@@ -542,7 +530,7 @@ addVectors( Vector1& v,
    TNL_ASSERT_EQ( v.getSize(), v2.getSize(), "The vector sizes must be the same." );
 
 #ifdef HAVE_CUDA
-   typedef typename Vector1::IndexType Index;   
+   typedef typename Vector1::IndexType Index;
    dim3 blockSize( 0 ), gridSize( 0 );
 
    const Index& size = v.getSize();
@@ -614,267 +602,3 @@ computeExclusivePrefixSum( Vector& v,
 } // namespace Algorithms
 } // namespace Containers
 } // namespace TNL
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-#include <TNL/Containers/Vector.h>
-
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-/****
- * Max
- */
-extern template int         VectorOperations< Devices::Cuda >::getVectorMax( const Vector< int, Devices::Cuda, int >& v );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorMax( const Vector< float, Devices::Cuda, int >& v );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorMax( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Cuda >::getVectorMax( const Vector< int, Devices::Cuda, long int >& v );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorMax( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorMax( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Min
- */
-extern template int         VectorOperations< Devices::Cuda >::getVectorMin( const Vector< int, Devices::Cuda, int >& v );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorMin( const Vector< float, Devices::Cuda, int >& v );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorMin( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Cuda >::getVectorMin( const Vector< int, Devices::Cuda, long int >& v );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorMin( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorMin( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Abs max
- */
-extern template int         VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< int, Devices::Cuda, int >& v );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< float, Devices::Cuda, int >& v );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< int, Devices::Cuda, long int >& v );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Abs min
- */
-extern template int         VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< int, Devices::Cuda, int >& v );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< float, Devices::Cuda, int >& v );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< int, Devices::Cuda, long int >& v );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Lp norm
- */
-extern template int         VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< int, Devices::Cuda, int >& v, const int& p );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long int, Devices::Cuda, int >& v, const long int& p );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< float, Devices::Cuda, int >& v, const float& p );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< double, Devices::Cuda, int >& v, const double& p );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long double, Devices::Cuda, int >& v, const long double& p );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< int, Devices::Cuda, long int >& v, const int& p );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long int, Devices::Cuda, long int >& v, const long int& p );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< float, Devices::Cuda, long int >& v, const float& p );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< double, Devices::Cuda, long int >& v, const double& p );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long double, Devices::Cuda, long int >& v, const long double& p );
-#endif
-#endif
-
-/****
- * Sum
- */
-extern template int         VectorOperations< Devices::Cuda >::getVectorSum( const Vector< int, Devices::Cuda, int >& v );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorSum( const Vector< float, Devices::Cuda, int >& v );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorSum( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Cuda >::getVectorSum( const Vector< int, Devices::Cuda, long int >& v );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorSum( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorSum( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Difference max
- */
-extern template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference min
- */
-extern template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference abs max
- */
-extern template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference abs min
- */
-extern template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
-
-#endif
diff --git a/src/TNL/Containers/Algorithms/VectorOperationsHost_impl.h b/src/TNL/Containers/Algorithms/VectorOperationsHost_impl.h
index 4a3b92ec4f1225e8ce3129c9ca5afe29611f95de..ef938886ed93821d1cd071402c2ac85b66e22c42 100644
--- a/src/TNL/Containers/Algorithms/VectorOperationsHost_impl.h
+++ b/src/TNL/Containers/Algorithms/VectorOperationsHost_impl.h
@@ -14,7 +14,7 @@
 #include <TNL/Containers/Algorithms/VectorOperations.h>
 
 namespace TNL {
-namespace Containers {   
+namespace Containers {
 namespace Algorithms {
 
 static const int OpenMPVectorOperationsThreshold = 512; // TODO: check this threshold
@@ -41,17 +41,16 @@ addElement( Vector& v,
    v[ i ] = thisElementMultiplicator * v[ i ] + value;
 }
 
-template< typename Vector >
-typename Vector::RealType
+template< typename Vector, typename ResultType >
+ResultType
 VectorOperations< Devices::Host >::
 getVectorMax( const Vector& v )
 {
-   typedef typename Vector::RealType Real;
    typedef typename Vector::IndexType Index;
 
    TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
 
-   Real result = v.getElement( 0 );
+   ResultType result = v.getElement( 0 );
    const Index n = v.getSize();
 #if defined( HAVE_OPENMP ) && _OPENMP >= 201107  // OpenMP 3.1 added support for min/max reduction operations
 #pragma omp parallel for reduction(max:result) if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
@@ -61,17 +60,16 @@ getVectorMax( const Vector& v )
    return result;
 }
 
-template< typename Vector >
-typename Vector::RealType
+template< typename Vector, typename ResultType >
+ResultType
 VectorOperations< Devices::Host >::
 getVectorMin( const Vector& v )
 {
-   typedef typename Vector::RealType Real;
    typedef typename Vector::IndexType Index;
 
    TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
 
-   Real result = v.getElement( 0 );
+   ResultType result = v.getElement( 0 );
    const Index n = v.getSize();
 #if defined( HAVE_OPENMP ) && _OPENMP >= 201107  // OpenMP 3.1 added support for min/max reduction operations
 #pragma omp parallel for reduction(min:result) if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
@@ -81,44 +79,42 @@ getVectorMin( const Vector& v )
    return result;
 }
 
-template< typename Vector >
-typename Vector::RealType
+template< typename Vector, typename ResultType >
+ResultType
 VectorOperations< Devices::Host >::
 getVectorAbsMax( const Vector& v )
 {
-   typedef typename Vector::RealType Real;
    typedef typename Vector::IndexType Index;
 
    TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
 
-   Real result = TNL::abs( v.getElement( 0 ) );
+   ResultType result = TNL::abs( v.getElement( 0 ) );
    const Index n = v.getSize();
 #if defined( HAVE_OPENMP ) && _OPENMP >= 201107  // OpenMP 3.1 added support for min/max reduction operations
 #pragma omp parallel for reduction(max:result) if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
 #endif
    for( Index i = 1; i < n; i ++ )
-      result = max( result, ( Real ) TNL::abs( v.getElement( i ) ) );
+      result = max( result, (ResultType) TNL::abs( v.getElement( i ) ) );
    return result;
 }
 
 
-template< typename Vector >
-typename Vector::RealType
+template< typename Vector, typename ResultType >
+ResultType
 VectorOperations< Devices::Host >::
 getVectorAbsMin( const Vector& v )
 {
-   typedef typename Vector::RealType Real;
    typedef typename Vector::IndexType Index;
 
    TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
 
-   Real result = TNL::abs( v.getElement( 0 ) );
+   ResultType result = TNL::abs( v.getElement( 0 ) );
    const Index n = v.getSize();
 #if defined( HAVE_OPENMP ) && _OPENMP >= 201107  // OpenMP 3.1 added support for min/max reduction operations
 #pragma omp parallel for reduction(min:result) if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
 #endif
    for( Index i = 1; i < n; i ++ )
-      result = min( result, ( Real ) TNL::abs( v.getElement( i ) ) );
+      result = min( result, (ResultType) TNL::abs( v.getElement( i ) ) );
    return result;
 }
 
@@ -248,19 +244,18 @@ getVectorSum( const Vector& v )
    return result;
 }
 
-template< typename Vector1, typename Vector2 >
-typename Vector1::RealType
+template< typename Vector1, typename Vector2, typename ResultType >
+ResultType
 VectorOperations< Devices::Host >::
 getVectorDifferenceMax( const Vector1& v1,
                         const Vector2& v2 )
 {
-   typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
 
    TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
    TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
 
-   Real result = v1.getElement( 0 ) - v2.getElement( 0 );
+   ResultType result = v1.getElement( 0 ) - v2.getElement( 0 );
    const Index n = v1.getSize();
 #if defined( HAVE_OPENMP ) && _OPENMP >= 201107  // OpenMP 3.1 added support for min/max reduction operations
 #pragma omp parallel for reduction(max:result) if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
@@ -270,19 +265,18 @@ getVectorDifferenceMax( const Vector1& v1,
    return result;
 }
 
-template< typename Vector1, typename Vector2 >
-typename Vector1::RealType
+template< typename Vector1, typename Vector2, typename ResultType >
+ResultType
 VectorOperations< Devices::Host >::
 getVectorDifferenceMin( const Vector1& v1,
                         const Vector2& v2 )
 {
-   typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
 
    TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
    TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
 
-   Real result = v1.getElement( 0 ) - v2.getElement( 0 );
+   ResultType result = v1.getElement( 0 ) - v2.getElement( 0 );
    const Index n = v1.getSize();
 #if defined( HAVE_OPENMP ) && _OPENMP >= 201107  // OpenMP 3.1 added support for min/max reduction operations
 #pragma omp parallel for reduction(min:result) if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
@@ -292,47 +286,45 @@ getVectorDifferenceMin( const Vector1& v1,
    return result;
 }
 
-template< typename Vector1, typename Vector2 >
-typename Vector1::RealType
+template< typename Vector1, typename Vector2, typename ResultType >
+ResultType
 VectorOperations< Devices::Host >::
 getVectorDifferenceAbsMax( const Vector1& v1,
                            const Vector2& v2 )
 {
-   typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
 
    TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
    TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
 
-   Real result = TNL::abs( v1.getElement( 0 ) - v2.getElement( 0 ) );
+   ResultType result = TNL::abs( v1.getElement( 0 ) - v2.getElement( 0 ) );
    const Index n = v1.getSize();
 #if defined( HAVE_OPENMP ) && _OPENMP >= 201107  // OpenMP 3.1 added support for min/max reduction operations
 #pragma omp parallel for reduction(max:result) if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
 #endif
    for( Index i = 1; i < n; i ++ )
-      result =  max( result, ( Real ) TNL::abs( v1.getElement( i ) - v2.getElement( i ) ) );
+      result =  max( result, (ResultType) TNL::abs( v1.getElement( i ) - v2.getElement( i ) ) );
    return result;
 }
 
-template< typename Vector1, typename Vector2 >
-typename Vector1::RealType
+template< typename Vector1, typename Vector2, typename ResultType >
+ResultType
 VectorOperations< Devices::Host >::
 getVectorDifferenceAbsMin( const Vector1& v1,
                            const Vector2& v2 )
 {
-   typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
 
    TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
    TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
 
-   Real result = TNL::abs( v1[ 0 ] - v2[ 0 ] );
+   ResultType result = TNL::abs( v1[ 0 ] - v2[ 0 ] );
    const Index n = v1.getSize();
 #if defined( HAVE_OPENMP ) && _OPENMP >= 201107  // OpenMP 3.1 added support for min/max reduction operations
 #pragma omp parallel for reduction(min:result) if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
 #endif
    for( Index i = 1; i < n; i ++ )
-      result =  min( result, ( Real ) TNL::abs( v1[ i ] - v2[ i ] ) );
+      result =  min( result, (ResultType) TNL::abs( v1[ i ] - v2[ i ] ) );
    return result;
 }
 
@@ -363,7 +355,6 @@ VectorOperations< Devices::Host >::
 getVectorDifferenceL2Norm( const Vector1& v1,
                            const Vector2& v2 )
 {
-   typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
 
    TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
@@ -376,7 +367,7 @@ getVectorDifferenceL2Norm( const Vector1& v1,
 #endif
    for( Index i = 0; i < n; i ++ )
    {
-      Real aux = TNL::abs( v1[ i ] - v2[ i ] );
+      ResultType aux = TNL::abs( v1[ i ] - v2[ i ] );
       result += aux * aux;
    }
    return std::sqrt( result );
@@ -452,13 +443,12 @@ vectorScalarMultiplication( Vector& v,
 }
 
 
-template< typename Vector1, typename Vector2 >
-typename Vector1::RealType
+template< typename Vector1, typename Vector2, typename ResultType >
+ResultType
 VectorOperations< Devices::Host >::
 getScalarProduct( const Vector1& v1,
                   const Vector2& v2 )
 {
-   typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
 
    TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
@@ -467,13 +457,14 @@ getScalarProduct( const Vector1& v1,
 
 #ifdef OPTIMIZED_VECTOR_HOST_OPERATIONS
 #ifdef __GNUC__
+   typedef typename Vector1::RealType Real;
    // We need to get the address of the first element to avoid
    // bounds checking in TNL::Array::operator[]
    const Real* V1 = v1.getData();
    const Real* V2 = v2.getData();
 #endif
 
-   Real dot1 = 0.0, dot2 = 0.0, dot3 = 0.0, dot4 = 0.0;
+   ResultType dot1 = 0.0, dot2 = 0.0, dot3 = 0.0, dot4 = 0.0;
    Index i = 0;
    const Index unroll_limit = n - n % 4;
 #ifdef HAVE_OPENMP
@@ -504,7 +495,7 @@ getScalarProduct( const Vector1& v1,
 
 #else // OPTIMIZED_VECTOR_HOST_OPERATIONS
 
-   Real result( 0.0 );
+   ResultType result( 0.0 );
 #ifdef HAVE_OPENMP
    #pragma omp parallel for reduction(+:result) if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
 #endif
@@ -651,268 +642,3 @@ computeExclusivePrefixSum( Vector& v,
 } // namespace Algorithms
 } // namespace Containers
 } // namespace TNL
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-#include <TNL/Containers/Vector.h>
-
-namespace TNL {
-namespace Containers {   
-namespace Algorithms {
-
-/****
- * Max
- */
-extern template int         VectorOperations< Devices::Host >::getVectorMax( const Vector< int, Devices::Host, int >& v );
-extern template long int    VectorOperations< Devices::Host >::getVectorMax( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorMax( const Vector< float, Devices::Host, int >& v );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorMax( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorMax( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Host >::getVectorMax( const Vector< int, Devices::Host, long int >& v );
-extern template long int    VectorOperations< Devices::Host >::getVectorMax( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorMax( const Vector< float, Devices::Host, long int >& v );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorMax( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorMax( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-/****
- * Min
- */
-extern template int         VectorOperations< Devices::Host >::getVectorMin( const Vector< int, Devices::Host, int >& v );
-extern template long int    VectorOperations< Devices::Host >::getVectorMin( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorMin( const Vector< float, Devices::Host, int >& v );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorMin( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorMin( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Host >::getVectorMin( const Vector< int, Devices::Host, long int >& v );
-extern template long int    VectorOperations< Devices::Host >::getVectorMin( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorMin( const Vector< float, Devices::Host, long int >& v );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorMin( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorMin( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-/****
- * Abs max
- */
-extern template int         VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< int, Devices::Host, int >& v );
-extern template long int    VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< float, Devices::Host, int >& v );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< int, Devices::Host, long int >& v );
-extern template long int    VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< float, Devices::Host, long int >& v );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-/****
- * Abs min
- */
-extern template int         VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< int, Devices::Host, int >& v );
-extern template long int    VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< float, Devices::Host, int >& v );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< int, Devices::Host, long int >& v );
-extern template long int    VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< float, Devices::Host, long int >& v );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-/****
- * Lp norm
- */
-extern template int         VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< int, Devices::Host, int >& v, const int& p );
-extern template long int    VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< long int, Devices::Host, int >& v, const long int& p );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< float, Devices::Host, int >& v, const float& p );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< double, Devices::Host, int >& v, const double& p );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< long double, Devices::Host, int >& v, const long double& p );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< int, Devices::Host, long int >& v, const int& p );
-extern template long int    VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< long int, Devices::Host, long int >& v, const long int& p );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< float, Devices::Host, long int >& v, const float& p );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< double, Devices::Host, long int >& v, const double& p );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< long double, Devices::Host, long int >& v, const long double& p );
-#endif
-#endif
-
-/****
- * Sum
- */
-extern template int         VectorOperations< Devices::Host >::getVectorSum( const Vector< int, Devices::Host, int >& v );
-extern template long int    VectorOperations< Devices::Host >::getVectorSum( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorSum( const Vector< float, Devices::Host, int >& v );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorSum( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorSum( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Host >::getVectorSum( const Vector< int, Devices::Host, long int >& v );
-extern template long int    VectorOperations< Devices::Host >::getVectorSum( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorSum( const Vector< float, Devices::Host, long int >& v );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorSum( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorSum( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-/****
- * Difference max
- */
-extern template int         VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< int, Devices::Host, int >& v1, const Vector< int, Devices::Host, int >& v2 );
-extern template long int    VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< long int, Devices::Host, int >& v1, const Vector< long int, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< float, Devices::Host, int >& v1,  const Vector< float, Devices::Host, int >& v2);
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< double, Devices::Host, int >& v1, const Vector< double, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< long double, Devices::Host, int >& v1, const Vector< long double, Devices::Host, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< int, Devices::Host, long int >& v1, const Vector< int, Devices::Host, long int >& v2 );
-extern template long int    VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< long int, Devices::Host, long int >& v1, const Vector< long int, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< float, Devices::Host, long int >& v1, const Vector< float, Devices::Host, long int >& v2 );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< double, Devices::Host, long int >& v1, const Vector< double, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< long double, Devices::Host, long int >& v1, const Vector< long double, Devices::Host, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference min
- */
-extern template int         VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< int, Devices::Host, int >& v1, const Vector< int, Devices::Host, int >& v2 );
-extern template long int    VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< long int, Devices::Host, int >& v1, const Vector< long int, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< float, Devices::Host, int >& v1,  const Vector< float, Devices::Host, int >& v2);
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< double, Devices::Host, int >& v1, const Vector< double, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< long double, Devices::Host, int >& v1, const Vector< long double, Devices::Host, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< int, Devices::Host, long int >& v1, const Vector< int, Devices::Host, long int >& v2 );
-extern template long int    VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< long int, Devices::Host, long int >& v1, const Vector< long int, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< float, Devices::Host, long int >& v1, const Vector< float, Devices::Host, long int >& v2 );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< double, Devices::Host, long int >& v1, const Vector< double, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< long double, Devices::Host, long int >& v1, const Vector< long double, Devices::Host, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference abs max
- */
-extern template int         VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< int, Devices::Host, int >& v1, const Vector< int, Devices::Host, int >& v2 );
-extern template long int    VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< long int, Devices::Host, int >& v1, const Vector< long int, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< float, Devices::Host, int >& v1,  const Vector< float, Devices::Host, int >& v2);
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< double, Devices::Host, int >& v1, const Vector< double, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< long double, Devices::Host, int >& v1, const Vector< long double, Devices::Host, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< int, Devices::Host, long int >& v1, const Vector< int, Devices::Host, long int >& v2 );
-extern template long int    VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< long int, Devices::Host, long int >& v1, const Vector< long int, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< float, Devices::Host, long int >& v1, const Vector< float, Devices::Host, long int >& v2 );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< double, Devices::Host, long int >& v1, const Vector< double, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< long double, Devices::Host, long int >& v1, const Vector< long double, Devices::Host, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference abs min
- */
-extern template int         VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< int, Devices::Host, int >& v1, const Vector< int, Devices::Host, int >& v2 );
-extern template long int    VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< long int, Devices::Host, int >& v1, const Vector< long int, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< float, Devices::Host, int >& v1,  const Vector< float, Devices::Host, int >& v2);
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< double, Devices::Host, int >& v1, const Vector< double, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< long double, Devices::Host, int >& v1, const Vector< long double, Devices::Host, int >& v2 );
-#endif
-
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< int, Devices::Host, long int >& v1, const Vector< int, Devices::Host, long int >& v2 );
-extern template long int    VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< long int, Devices::Host, long int >& v1, const Vector< long int, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< float, Devices::Host, long int >& v1, const Vector< float, Devices::Host, long int >& v2 );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< double, Devices::Host, long int >& v1, const Vector< double, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< long double, Devices::Host, long int >& v1, const Vector< long double, Devices::Host, long int >& v2 );
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
-
-#endif
diff --git a/src/TNL/Containers/Algorithms/VectorOperationsMIC_impl.h b/src/TNL/Containers/Algorithms/VectorOperationsMIC_impl.h
index 917b92368ec0b370921b9b2e6be6b7ffcbff79ec..4a00ce3154eb27a2369ee135d7460a6c48460f93 100644
--- a/src/TNL/Containers/Algorithms/VectorOperationsMIC_impl.h
+++ b/src/TNL/Containers/Algorithms/VectorOperationsMIC_impl.h
@@ -46,13 +46,13 @@ addElement( Vector& v,
    v.setElement(i,thisElementMultiplicator*v.getElemet(i)+value);
 }
 
-template< typename Vector >
-typename Vector::RealType
+template< typename Vector, typename ResultType >
+ResultType
 VectorOperations< Devices::MIC >::
 getVectorMax( const Vector& v )
 {
    //tady je možnost paralelizace
-   typename Vector::RealType result;
+   ResultType result;
    typename Vector::IndexType size=v.getSize();
    Devices::MICHider<const typename Vector::RealType > vct;
    vct.pointer=v.getData();
@@ -69,13 +69,13 @@ getVectorMax( const Vector& v )
    return result;
 }
 
-template< typename Vector >
-typename Vector::RealType
+template< typename Vector, typename ResultType >
+ResultType
 VectorOperations< Devices::MIC >::
 getVectorMin( const Vector& v )
 {
    //tady je možnost paralelizace
-   typename Vector::RealType result;
+   ResultType result;
    typename Vector::IndexType size=v.getSize();
    Devices::MICHider<const typename Vector::RealType > vct;
    vct.pointer=v.getData();
@@ -92,13 +92,13 @@ getVectorMin( const Vector& v )
    return result;
 }
 
-template< typename Vector >
-typename Vector::RealType
+template< typename Vector, typename ResultType >
+ResultType
 VectorOperations< Devices::MIC >::
 getVectorAbsMax( const Vector& v )
 {
    //tady je možnost paralelizace
-   typename Vector::RealType result;
+   ResultType result;
    typename Vector::IndexType size=v.getSize();
    Devices::MICHider<const typename Vector::RealType > vct;
    vct.pointer=v.getData();
@@ -115,13 +115,13 @@ getVectorAbsMax( const Vector& v )
    return result;
 }
 
-template< typename Vector >
-typename Vector::RealType
+template< typename Vector, typename ResultType >
+ResultType
 VectorOperations< Devices::MIC >::
 getVectorAbsMin( const Vector& v )
 {
    //tady je možnost paralelizace
-   typename Vector::RealType result;
+   ResultType result;
    typename Vector::IndexType size=v.getSize();
    Devices::MICHider<const typename Vector::RealType > vct;
    vct.pointer=v.getData();
@@ -243,8 +243,8 @@ getVectorSum( const Vector& v )
    return result;
 }
 
-template< typename Vector1, typename Vector2 >
-typename Vector1::RealType
+template< typename Vector1, typename Vector2, typename ResultType >
+ResultType
 VectorOperations< Devices::MIC >::
 getVectorDifferenceMax( const Vector1& v1,
                         const Vector2& v2 )
@@ -254,7 +254,7 @@ getVectorDifferenceMax( const Vector1& v1,
    TNL_ASSERT( v1. getSize() > 0, );
    TNL_ASSERT( v1. getSize() == v2. getSize(), );
 
-   Real result( 0.0 );
+   ResultType result( 0.0 );
    const Index n = v1. getSize();
    Devices::MICHider<const Real > vct1;
    Devices::MICHider<const Real > vct2;
@@ -270,8 +270,8 @@ getVectorDifferenceMax( const Vector1& v1,
    return result;
 }
 
-template< typename Vector1, typename Vector2 >
-typename Vector1::RealType
+template< typename Vector1, typename Vector2, typename ResultType >
+ResultType
 VectorOperations< Devices::MIC >::
 getVectorDifferenceMin( const Vector1& v1,
                         const Vector2& v2 )
@@ -281,7 +281,7 @@ getVectorDifferenceMin( const Vector1& v1,
    TNL_ASSERT( v1. getSize() > 0, );
    TNL_ASSERT( v1. getSize() == v2. getSize(), );
 
-   Real result( 0.0 );
+   ResultType result( 0.0 );
    const Index n = v1. getSize();
    Devices::MICHider<const Real > vct1;
    Devices::MICHider<const Real > vct2;
@@ -297,8 +297,8 @@ getVectorDifferenceMin( const Vector1& v1,
    return result;
 }
 
-template< typename Vector1, typename Vector2 >
-typename Vector1::RealType
+template< typename Vector1, typename Vector2, typename ResultType >
+ResultType
 VectorOperations< Devices::MIC >::
 getVectorDifferenceAbsMax( const Vector1& v1,
                            const Vector2& v2 )
@@ -308,7 +308,7 @@ getVectorDifferenceAbsMax( const Vector1& v1,
    TNL_ASSERT( v1. getSize() > 0, );
    TNL_ASSERT( v1. getSize() == v2. getSize(), );
 
-   Real result( 0.0 );
+   ResultType result( 0.0 );
    const Index n = v1. getSize();
    Devices::MICHider<const Real > vct1;
    Devices::MICHider<const Real > vct2;
@@ -324,8 +324,8 @@ getVectorDifferenceAbsMax( const Vector1& v1,
    return result;
 }
 
-template< typename Vector1, typename Vector2 >
-typename Vector1::RealType
+template< typename Vector1, typename Vector2, typename ResultType >
+ResultType
 VectorOperations< Devices::MIC >::
 getVectorDifferenceAbsMin( const Vector1& v1,
                            const Vector2& v2 )
@@ -335,7 +335,7 @@ getVectorDifferenceAbsMin( const Vector1& v1,
    TNL_ASSERT( v1. getSize() > 0, );
    TNL_ASSERT( v1. getSize() == v2. getSize(), );
 
-   Real result( 0.0 );
+   ResultType result( 0.0 );
    const Index n = v1. getSize();
    Devices::MICHider<const Real > vct1;
    Devices::MICHider<const Real > vct2;
@@ -499,8 +499,8 @@ vectorScalarMultiplication( Vector& v,
 }
 
 
-template< typename Vector1, typename Vector2 >
-typename Vector1::RealType
+template< typename Vector1, typename Vector2, typename ResultType >
+ResultType
 VectorOperations< Devices::MIC >::
 getScalarProduct( const Vector1& v1,
                   const Vector2& v2 )
@@ -510,7 +510,7 @@ getScalarProduct( const Vector1& v1,
    TNL_ASSERT( v1. getSize() > 0, );
    TNL_ASSERT( v1. getSize() == v2. getSize(), );
 
-   Real result( 0.0 );
+   ResultType result( 0.0 );
    const Index n = v1. getSize();
    Devices::MICHider<const Real > vct1;
    Devices::MICHider<const Real > vct2;
diff --git a/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h b/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h
index e4a3461f5d06e7acf92a81ef08a3cdbaf9725a3e..f9aa3dfae9c8e69fbd33cbe0ea087c1500b2032b 100644
--- a/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h
+++ b/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h
@@ -9,14 +9,15 @@
 /* See Copyright Notice in tnl/Copyright */
 
 #pragma once
-   
+
 #include <iostream>
 
 #include <TNL/Math.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Exceptions/CudaBadAlloc.h>
 #include <TNL/Containers/Algorithms/ReductionOperations.h>
-   
+#include <TNL/Containers/Array.h>
+
 #ifdef HAVE_CUDA
 
 namespace TNL {
diff --git a/src/TNL/Containers/Array.h b/src/TNL/Containers/Array.h
index a8869cebe6fae1cf36a6e27e50af77d632f434c0..5605f7a9c27762a347e6c92116a104e5996fb4cd 100644
--- a/src/TNL/Containers/Array.h
+++ b/src/TNL/Containers/Array.h
@@ -23,24 +23,24 @@ template< int, typename > class StaticArray;
  * Array handles memory allocation and sharing of the same data between more Arrays.
  *
  */
-template< typename Element,
+template< typename Value,
           typename Device = Devices::Host,
           typename Index = int >
 class Array : public Object
 {
    public:
 
-      typedef Element ElementType;
+      typedef Value ValueType;
       typedef Device DeviceType;
       typedef Index IndexType;
-      typedef Containers::Array< Element, Devices::Host, Index > HostType;
-      typedef Containers::Array< Element, Devices::Cuda, Index > CudaType;
+      typedef Containers::Array< Value, Devices::Host, Index > HostType;
+      typedef Containers::Array< Value, Devices::Cuda, Index > CudaType;
 
       Array();
 
       Array( const IndexType& size );
 
-      Array( Element* data,
+      Array( Value* data,
              const IndexType& size );
 
       Array( Array& array,
@@ -67,7 +67,7 @@ class Array : public Object
       template< typename ArrayT >
       void setLike( const ArrayT& array );
 
-      void bind( Element* _data,
+      void bind( Value* _data,
                  const Index _size );
 
       template< typename ArrayT >
@@ -76,25 +76,23 @@ class Array : public Object
                  const IndexType& size = 0 );
 
       template< int Size >
-      void bind( StaticArray< Size, Element >& array );
+      void bind( StaticArray< Size, Value >& array );
 
       void swap( Array& array );
 
       void reset();
 
-      void setElement( const Index& i, const Element& x );
+      __cuda_callable__ const Value* getData() const;
 
-      Element getElement( const Index& i ) const;
+      __cuda_callable__ Value* getData();
 
-      // Checks if there is an element with value v in this array
-      bool containsValue( const Element& v ) const;
+      void setElement( const Index& i, const Value& x );
 
-      // Checks if all elements in this array have the same value v
-      bool containsOnlyValue( const Element& v ) const;
+      Value getElement( const Index& i ) const;
 
-      __cuda_callable__ inline Element& operator[] ( const Index& i );
+      __cuda_callable__ inline Value& operator[] ( const Index& i );
 
-      __cuda_callable__ inline const Element& operator[] ( const Index& i ) const;
+      __cuda_callable__ inline const Value& operator[] ( const Index& i ) const;
 
       Array& operator = ( const Array& array );
 
@@ -107,25 +105,19 @@ class Array : public Object
       template< typename ArrayT >
       bool operator != ( const ArrayT& array ) const;
 
-      void setValue( const Element& e );
+      void setValue( const Value& v );
 
-      __cuda_callable__ const Element* getData() const;
+      // Checks if there is an element with value v in this array
+      bool containsValue( const Value& v ) const;
 
-      __cuda_callable__ Element* getData();
+      // Checks if all elements in this array have the same value v
+      bool containsOnlyValue( const Value& v ) const;
 
       /*!
        * Returns true if non-zero size is set.
        */
       operator bool() const;
 
-      //! This method measures data transfers done by this vector.
-      /*!
-       * Every time one touches this grid touches * size * sizeof( Real ) bytes are added
-       * to transfered bytes in tnlStatistics.
-       */
-      template< typename IndexType2 = Index >
-      void touch( IndexType2 touches = 1 ) const;
-
       //! Method for saving the object to a file as a binary data.
       bool save( File& file ) const;
 
@@ -157,7 +149,7 @@ class Array : public Object
       mutable Index size;
 
       //! Pointer to data
-      mutable Element* data;
+      mutable Value* data;
 
       /****
        * Pointer to the originally allocated data. They might differ if one
@@ -166,7 +158,7 @@ class Array : public Object
        * deallocate the array. If outer data (not allocated by TNL) are bind
        * then this pointer is zero since no deallocation is necessary.
        */
-      mutable Element* allocationPointer;
+      mutable Value* allocationPointer;
 
       /****
        * Counter of objects sharing this array or some parts of it. The reference counter is
@@ -176,8 +168,8 @@ class Array : public Object
       mutable int* referenceCounter;
 };
 
-template< typename Element, typename Device, typename Index >
-std::ostream& operator << ( std::ostream& str, const Array< Element, Device, Index >& v );
+template< typename Value, typename Device, typename Index >
+std::ostream& operator << ( std::ostream& str, const Array< Value, Device, Index >& v );
 
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/ArrayIO.h b/src/TNL/Containers/ArrayIO.h
index 0b79cf9d1db0444f81dea124ca61b7d5f1e41f3e..ad8e43be460e5d5f1fe917ed7f06a94165e92254 100644
--- a/src/TNL/Containers/ArrayIO.h
+++ b/src/TNL/Containers/ArrayIO.h
@@ -18,22 +18,22 @@
 namespace TNL {
 namespace Containers {
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index,
-          bool Elementwise = std::is_base_of< Object, Element >::value >
+          bool Elementwise = std::is_base_of< Object, Value >::value >
 class ArrayIO
 {};
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-class ArrayIO< Element, Device, Index, true >
+class ArrayIO< Value, Device, Index, true >
 {
    public:
 
    static bool save( File& file,
-                     const Element* data,
+                     const Value* data,
                      const Index elements )
    {
       for( Index i = 0; i < elements; i++ )
@@ -46,7 +46,7 @@ class ArrayIO< Element, Device, Index, true >
    }
 
    static bool load( File& file,
-                     Element* data,
+                     Value* data,
                      const Index elements )
    {
       for( Index i = 0; i < elements; i++ )
@@ -59,25 +59,25 @@ class ArrayIO< Element, Device, Index, true >
    }
 };
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-class ArrayIO< Element, Device, Index, false >
+class ArrayIO< Value, Device, Index, false >
 {
    public:
 
    static bool save( File& file,
-                     const Element* data,
+                     const Value* data,
                      const Index elements )
    {
-      return file.write< Element, Device, Index >( data, elements );
+      return file.write< Value, Device, Index >( data, elements );
    }
 
    static bool load( File& file,
-                     Element* data,
+                     Value* data,
                      const Index elements )
    {
-      return file.read< Element, Device, Index >( data, elements );
+      return file.read< Value, Device, Index >( data, elements );
    }
 
 };
diff --git a/src/TNL/Containers/ArrayView.h b/src/TNL/Containers/ArrayView.h
new file mode 100644
index 0000000000000000000000000000000000000000..05f00a418c289d900b90f6a3608d5ffd981e3e21
--- /dev/null
+++ b/src/TNL/Containers/ArrayView.h
@@ -0,0 +1,159 @@
+/***************************************************************************
+                          ArrayView.h  -  description
+                             -------------------
+    begin                : Sep 1, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// Implemented by: Jakub Klinkovský
+
+#pragma once
+
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+namespace TNL {
+namespace Containers {
+
+template< typename Value, typename Device, typename Index >
+class Array;
+
+template< int Size, typename Value >
+class StaticArray;
+
+template< typename Value,
+          typename Device = Devices::Host,
+          typename Index = int >
+class ArrayView
+{
+public:
+   using ValueType = Value;
+   using DeviceType = Device;
+   using IndexType = Index;
+   using HostType = ArrayView< Value, Devices::Host, Index >;
+   using CudaType = ArrayView< Value, Devices::Cuda, Index >;
+
+   __cuda_callable__
+   ArrayView() = default;
+
+   // explicit initialization by raw data pointer and size
+   __cuda_callable__
+   ArrayView( Value* data, Index size );
+
+   // Copy-constructor does shallow copy, so views can be passed-by-value into
+   // CUDA kernels and they can be captured-by-value in __cuda_callable__
+   // lambda functions.
+   __cuda_callable__
+   ArrayView( const ArrayView& ) = default;
+
+   // "Templated copy-constructor" accepting any cv-qualification of Value
+   template< typename Value_ >
+   __cuda_callable__
+   ArrayView( ArrayView< Value_, Device, Index >& array )
+   : data(array.getData()), size(array.getSize()) {}
+
+   // default move-constructor
+   __cuda_callable__
+   ArrayView( ArrayView&& ) = default;
+
+   // initialization from other array containers (using shallow copy)
+   template< typename Value_ >  // template catches both const and non-const qualified Value
+   __cuda_callable__
+   ArrayView( Array< Value_, Device, Index >& array );
+
+   template< int Size, typename Value_ >  // template catches both const and non-const qualified Value
+   __cuda_callable__
+   ArrayView( StaticArray< Size, Value_ >& array );
+
+   // these constructors will be used only when Value is const-qualified
+   // (const views are initializable by const references)
+   template< typename Value_ >  // template catches both const and non-const qualified Value
+   __cuda_callable__
+   ArrayView( const Array< Value_, Device, Index >& array );
+
+   template< int Size, typename Value_ >  // template catches both const and non-const qualified Value
+   __cuda_callable__
+   ArrayView( const StaticArray< Size, Value_ >& array );
+
+
+   // methods for rebinding (reinitialization)
+   __cuda_callable__
+   void bind( Value* data, const Index size );
+
+   // Note that you can also bind directly to Array and other types implicitly
+   // convertible to ArrayView.
+   __cuda_callable__
+   void bind( ArrayView view );
+
+
+   // Copy-assignment does deep copy, just like regular array, but the sizes
+   // must match (i.e. copy-assignment cannot resize).
+   ArrayView& operator=( const ArrayView& view );
+
+   template< typename Value_, typename Device_, typename Index_ >
+   ArrayView& operator=( const ArrayView< Value_, Device_, Index_ >& view );
+
+
+   static String getType();
+
+
+   __cuda_callable__
+   void swap( ArrayView& view );
+
+   __cuda_callable__
+   void reset();
+
+   __cuda_callable__
+   const Value* getData() const;
+
+   __cuda_callable__
+   Value* getData();
+
+   __cuda_callable__
+   Index getSize() const;
+
+   void setElement( Index i, Value value );
+
+   Value getElement( Index i ) const;
+
+   __cuda_callable__
+   Value& operator[]( Index i );
+
+   __cuda_callable__
+   const Value& operator[]( Index i ) const;
+
+   template< typename Value_, typename Device_, typename Index_ >
+   bool operator==( const ArrayView< Value_, Device_, Index_ >& view ) const;
+
+   template< typename Value_, typename Device_, typename Index_ >
+   bool operator!=( const ArrayView< Value_, Device_, Index_ >& view ) const;
+
+   void setValue( Value value );
+
+   // Checks if there is an element with given value in this array
+   bool containsValue( Value value ) const;
+
+   // Checks if all elements in this array have the same given value
+   bool containsOnlyValue( Value value ) const;
+
+   //! Returns true if non-zero size is set.
+   operator bool() const;
+
+protected:
+   //! Pointer to allocated data
+   Value* data = nullptr;
+
+   //! Number of allocated elements
+   Index size = 0;
+};
+
+template< typename Value, typename Device, typename Index >
+std::ostream& operator<<( std::ostream& str, const ArrayView< Value, Device, Index >& v );
+
+} // namespace Containers
+} // namespace TNL
+
+#include <TNL/Containers/ArrayView_impl.h>
diff --git a/src/TNL/Containers/ArrayView_impl.h b/src/TNL/Containers/ArrayView_impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..d755c4c3111812df4a00f8f633c44f4736c0aefc
--- /dev/null
+++ b/src/TNL/Containers/ArrayView_impl.h
@@ -0,0 +1,341 @@
+/***************************************************************************
+                          ArrayView_impl.h  -  description
+                             -------------------
+    begin                : Sep 1, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <iostream>
+
+#include <TNL/param-types.h>
+#include <TNL/Containers/Algorithms/ArrayOperations.h>
+
+#include "ArrayView.h"
+
+namespace TNL {
+namespace Containers {
+
+// explicit initialization by raw data pointer and size
+template< typename Value,
+          typename Device,
+          typename Index >
+__cuda_callable__
+ArrayView< Value, Device, Index >::
+ArrayView( Value* data, Index size ) : data(data), size(size)
+{
+   TNL_ASSERT_GE( size, 0, "ArrayView size was initialized with a negative size." );
+   TNL_ASSERT_TRUE( (data == nullptr && size == 0) || (data != nullptr && size > 0),
+                    "ArrayView was initialized with a positive address and zero size or zero address and positive size." );
+}
+
+// initialization from other array containers (using shallow copy)
+template< typename Value,
+          typename Device,
+          typename Index >
+   template< typename Value_ >
+__cuda_callable__
+ArrayView< Value, Device, Index >::
+ArrayView( Array< Value_, Device, Index >& array )
+{
+   this->bind( array.getData(), array.getSize() );
+}
+
+template< typename Value,
+          typename Device,
+          typename Index >
+   template< int Size, typename Value_ >
+__cuda_callable__
+ArrayView< Value, Device, Index >::
+ArrayView( StaticArray< Size, Value_ >& array )
+{
+   this->bind( array.getData(), Size );
+}
+
+template< typename Value,
+          typename Device,
+          typename Index >
+   template< typename Value_ >
+__cuda_callable__
+ArrayView< Value, Device, Index >::
+ArrayView( const Array< Value_, Device, Index >& array )
+{
+   this->bind( array.getData(), array.getSize() );
+}
+
+template< typename Value,
+          typename Device,
+          typename Index >
+   template< int Size, typename Value_ >
+__cuda_callable__
+ArrayView< Value, Device, Index >::
+ArrayView( const StaticArray< Size, Value_ >& array )
+{
+   this->bind( array.getData(), Size );
+}
+
+// methods for rebinding (reinitialization)
+template< typename Value,
+          typename Device,
+          typename Index >
+__cuda_callable__
+void
+ArrayView< Value, Device, Index >::
+bind( Value* data, Index size )
+{
+   TNL_ASSERT_GE( size, 0, "ArrayView size was initialized with a negative size." );
+   TNL_ASSERT_TRUE( (data == nullptr && size == 0) || (data != nullptr && size > 0),
+                    "ArrayView was initialized with a positive address and zero size or zero address and positive size." );
+
+   this->data = data;
+   this->size = size;
+}
+
+template< typename Value,
+          typename Device,
+          typename Index >
+__cuda_callable__
+void ArrayView< Value, Device, Index >::bind( ArrayView view )
+{
+   bind( view.getData(), view.getSize() );
+}
+
+
+// Copy-assignment does deep copy, just like regular array, but the sizes
+// must match (i.e. copy-assignment cannot resize).
+template< typename Value,
+           typename Device,
+           typename Index >
+ArrayView< Value, Device, Index >&
+ArrayView< Value, Device, Index >::
+operator=( const ArrayView& view )
+{
+   TNL_ASSERT_EQ( getSize(), view.getSize(), "The sizes of the array views must be equal, views are not resizable." );
+   if( getSize() > 0 )
+      Algorithms::ArrayOperations< Device >::copyMemory( getData(), view.getData(), getSize() );
+   return *this;
+}
+
+template< typename Value,
+           typename Device,
+           typename Index >
+   template< typename Value_, typename Device_, typename Index_ >
+ArrayView< Value, Device, Index >&
+ArrayView< Value, Device, Index >::
+operator=( const ArrayView< Value_, Device_, Index_ >& view )
+{
+   TNL_ASSERT_EQ( getSize(), view.getSize(), "The sizes of the array views must be equal, views are not resizable." );
+   if( getSize() > 0 )
+      Algorithms::ArrayOperations< Device, Device_ >::copyMemory( getData(), view.getData(), getSize() );
+   return *this;
+}
+
+
+template< typename Value,
+          typename Device,
+          typename Index >
+String
+ArrayView< Value, Device, Index >::
+getType()
+{
+   return String( "Containers::ArrayView< " ) + ", " +
+                  TNL::getType< Value >() + ", " +
+                  Device::getDeviceType() + ", " +
+                  TNL::getType< Index >() + " >";
+}
+
+
+template< typename Value,
+          typename Device,
+          typename Index >
+__cuda_callable__
+void
+ArrayView< Value, Device, Index >::
+swap( ArrayView& array )
+{
+   TNL::swap( data, array.data );
+   TNL::swap( size, array.size );
+}
+
+template< typename Value,
+          typename Device,
+          typename Index >
+__cuda_callable__
+void
+ArrayView< Value, Device, Index >::
+reset()
+{
+   data = nullptr;
+   size = 0;
+}
+
+
+template< typename Value,
+          typename Device,
+          typename Index >
+__cuda_callable__
+const
+Value* ArrayView< Value, Device, Index >::
+getData() const
+{
+   return data;
+}
+
+template< typename Value,
+          typename Device,
+          typename Index >
+__cuda_callable__
+Value*
+ArrayView< Value, Device, Index >::
+getData()
+{
+   return data;
+}
+
+template< typename Value,
+          typename Device,
+          typename Index >
+__cuda_callable__
+Index
+ArrayView< Value, Device, Index >::
+getSize() const
+{
+   return size;
+}
+
+template< typename Value,
+          typename Device,
+          typename Index >
+void
+ArrayView< Value, Device, Index >::
+setElement( Index i, Value value )
+{
+   TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
+   TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
+   return Algorithms::ArrayOperations< Device >::setMemoryElement( &data[ i ], value );
+}
+
+template< typename Value,
+          typename Device,
+          typename Index >
+Value
+ArrayView< Value, Device, Index >::
+getElement( Index i ) const
+{
+   TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
+   TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
+   return Algorithms::ArrayOperations< Device >::getMemoryElement( &data[ i ] );
+}
+
+template< typename Value,
+          typename Device,
+          typename Index >
+__cuda_callable__
+Value& ArrayView< Value, Device, Index >::
+operator[]( Index i )
+{
+   TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
+   TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
+   return data[ i ];
+}
+
+template< typename Value,
+          typename Device,
+          typename Index >
+__cuda_callable__
+const
+Value& ArrayView< Value, Device, Index >::
+operator[]( Index i ) const
+{
+   TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
+   TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
+   return data[ i ];
+}
+
+template< typename Value,
+          typename Device,
+          typename Index >
+   template< typename Value_, typename Device_, typename Index_ >
+bool
+ArrayView< Value, Device, Index >::
+operator==( const ArrayView< Value_, Device_, Index_ >& view ) const
+{
+   if( view.getSize() != getSize() )
+      return false;
+   if( getSize() == 0 )
+      return true;
+   return Algorithms::ArrayOperations< Device, Device_ >::compareMemory( getData(), view.getData(), getSize() );
+}
+
+template< typename Value,
+          typename Device,
+          typename Index >
+   template< typename Value_, typename Device_, typename Index_ >
+bool
+ArrayView< Value, Device, Index >::
+operator!=( const ArrayView< Value_, Device_, Index_ >& view ) const
+{
+   return ! ( *this == view );
+}
+
+template< typename Value,
+          typename Device,
+          typename Index >
+void
+ArrayView< Value, Device, Index >::
+setValue( Value value )
+{
+   TNL_ASSERT_GT( size, 0, "Attempted to set value to an empty array view." );
+   Algorithms::ArrayOperations< Device >::setMemory( getData(), value, getSize() );
+}
+
+template< typename Value,
+          typename Device,
+          typename Index >
+bool
+ArrayView< Value, Device, Index >::
+containsValue( Value value ) const
+{
+   return Algorithms::ArrayOperations< Device >::containsValue( data, size, value );
+}
+
+template< typename Value,
+          typename Device,
+          typename Index >
+bool
+ArrayView< Value, Device, Index >::
+containsOnlyValue( Value value ) const
+{
+   return Algorithms::ArrayOperations< Device >::containsOnlyValue( data, size, value );
+}
+
+template< typename Value,
+          typename Device,
+          typename Index >
+ArrayView< Value, Device, Index >::
+operator bool() const
+{
+   return data;
+}
+
+
+template< typename Value, typename Device, typename Index >
+std::ostream& operator<<( std::ostream& str, const ArrayView< Value, Device, Index >& v )
+{
+   str << "[ ";
+   if( v.getSize() > 0 )
+   {
+      str << v.getElement( 0 );
+      for( Index i = 1; i < v.getSize(); i++ )
+         str << ", " << v.getElement( i );
+   }
+   str << " ]";
+   return str;
+}
+
+} // namespace Containers
+} // namespace TNL
diff --git a/src/TNL/Containers/Array_impl.cpp b/src/TNL/Containers/Array_impl.cpp
deleted file mode 100644
index c951e0526badefc0806979821e2b0decfbdc2cdf..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Array_impl.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-/***************************************************************************
-                          Array_impl.cpp  -  description
-                             -------------------
-    begin                : Jan 20, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-#include <TNL/Containers/Array.h>
-
-namespace TNL {
-namespace Containers {
-
-#ifdef INSTANTIATE_FLOAT
-template class Array< float, Devices::Host, int >;
-#endif
-template class Array< double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class Array< long double, Devices::Host, int >;
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class Array< float, Devices::Host, long int >;
-#endif
-template class Array< double, Devices::Host, long int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class Array< long double, Devices::Host, long int >;
-#endif
-#endif
-
-#ifndef HAVE_CUDA
-#ifdef INSTANTIATE_FLOAT
-template class Array< float, Devices::Cuda, int >;
-#endif
-template class Array< double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class Array< long double, Devices::Cuda, int >;
-#endif
-
-
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class Array< float, Devices::Cuda, long int >;
-#endif
-template class Array< double, Devices::Cuda, long int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class Array< long double, Devices::Cuda, long int >;
-#endif
-#endif
-
-#endif
-
-} // namespace Containers
-} // namespace TNL
-
-#endif // #ifdef TEMPLATE_EXPLICIT_INSTANTIATION
diff --git a/src/TNL/Containers/Array_impl.cu b/src/TNL/Containers/Array_impl.cu
deleted file mode 100644
index 5c43ec36e69ccb02b2b9950d85e27f9271192b3d..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Array_impl.cu
+++ /dev/null
@@ -1,43 +0,0 @@
-/***************************************************************************
-                          Array_impl.cu  -  description
-                             -------------------
-    begin                : Jan 20, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-#include <TNL/Containers/Array.h>
-
-namespace TNL {
-namespace Containers {
-
-#ifdef HAVE_CUDA
-#ifdef INSTANTIATE_FLOAT
-template class Array< float, Devices::Cuda, int >;
-#endif
-template class Array< double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class Array< long double, Devices::Cuda, int >;
-#endif
-
-
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class Array< float, Devices::Cuda, long int >;
-#endif
-template class Array< double, Devices::Cuda, long int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class Array< long double, Devices::Cuda, long int >;
-#endif
-#endif
-
-#endif
-
-} // namespace Containers
-} // namespace TNL
-
-#endif // #ifdef TEMPLATE_EXPLICIT_INSTANTIATION
diff --git a/src/TNL/Containers/Array_impl.h b/src/TNL/Containers/Array_impl.h
index 0d9d7ac3553b8af2cb2b97cc63ba9ad01c8a12cf..8502650b56e7352ece207a1a4138271a366453bf 100644
--- a/src/TNL/Containers/Array_impl.h
+++ b/src/TNL/Containers/Array_impl.h
@@ -22,10 +22,10 @@
 namespace TNL {
 namespace Containers {
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-Array< Element, Device, Index >::
+Array< Value, Device, Index >::
 Array()
 : size( 0 ),
   data( 0 ),
@@ -34,10 +34,10 @@ Array()
 {
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-Array< Element, Device, Index >::
+Array< Value, Device, Index >::
 Array( const IndexType& size )
 : size( 0 ),
   data( 0 ),
@@ -47,11 +47,11 @@ Array( const IndexType& size )
    this->setSize( size );
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-Array< Element, Device, Index >::
-Array( Element* data,
+Array< Value, Device, Index >::
+Array( Value* data,
        const IndexType& size )
 : size( size ),
   data( data ),
@@ -60,11 +60,11 @@ Array( Element* data,
 {
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-Array< Element, Device, Index >::
-Array( Array< Element, Device, Index >& array,
+Array< Value, Device, Index >::
+Array( Array< Value, Device, Index >& array,
        const IndexType& begin,
        const IndexType& size )
 : size( size ),
@@ -92,54 +92,54 @@ Array( Array< Element, Device, Index >& array,
    }
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
 String
-Array< Element, Device, Index >::
+Array< Value, Device, Index >::
 getType()
 {
    return String( "Containers::Array< " ) +
-          TNL::getType< Element >() + ", " +
+          TNL::getType< Value >() + ", " +
           Device::getDeviceType() + ", " +
           TNL::getType< Index >() + " >";
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
 String
-Array< Element, Device, Index >::
+Array< Value, Device, Index >::
 getTypeVirtual() const
 {
    return this->getType();
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
 String
-Array< Element, Device, Index >::
+Array< Value, Device, Index >::
 getSerializationType()
 {
    return HostType::getType();
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
 String
-Array< Element, Device, Index >::
+Array< Value, Device, Index >::
 getSerializationTypeVirtual() const
 {
    return this->getSerializationType();
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
 void
-Array< Element, Device, Index >::
+Array< Value, Device, Index >::
 releaseData() const
 {
    if( this->referenceCounter )
@@ -160,11 +160,11 @@ releaseData() const
    this->referenceCounter = 0;
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
 void
-Array< Element, Device, Index >::
+Array< Value, Device, Index >::
 setSize( const Index size )
 {
    TNL_ASSERT_GE( size, 0, "Array size must be non-negative." );
@@ -184,34 +184,34 @@ setSize( const Index size )
    }
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
 __cuda_callable__
 Index
-Array< Element, Device, Index >::
+Array< Value, Device, Index >::
 getSize() const
 {
    return this -> size;
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
    template< typename ArrayT >
 void
-Array< Element, Device, Index >::
+Array< Value, Device, Index >::
 setLike( const ArrayT& array )
 {
    setSize( array.getSize() );
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
 void
-Array< Element, Device, Index >::
-bind( Element* data,
+Array< Value, Device, Index >::
+bind( Value* data,
       const Index size )
 {
    TNL_ASSERT_TRUE( data, "Null pointer cannot be bound." );
@@ -220,18 +220,18 @@ bind( Element* data,
    this->size = size;
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
    template< typename ArrayT >
 void
-Array< Element, Device, Index >::
+Array< Value, Device, Index >::
 bind( const ArrayT& array,
       const IndexType& begin,
       const IndexType& size )
 {
    // all template parameters of Array must match, otherwise binding does not make sense
-   static_assert( std::is_same< Element, typename ArrayT::ElementType >::value, "ElementType of both arrays must be the same." );
+   static_assert( std::is_same< Value, typename ArrayT::ValueType >::value, "ValueType of both arrays must be the same." );
    static_assert( std::is_same< Device, typename ArrayT::DeviceType >::value, "DeviceType of both arrays must be the same." );
    static_assert( std::is_same< Index, typename ArrayT::IndexType >::value, "IndexType of both arrays must be the same." );
    TNL_ASSERT_TRUE( array.getData(), "Empty array cannot be bound." );
@@ -243,7 +243,7 @@ bind( const ArrayT& array,
       this->size = size;
    else
       this->size = array.getSize() - begin;
-   this->data = const_cast< Element* >( &array.getData()[ begin ] );
+   this->data = const_cast< Value* >( &array.getData()[ begin ] );
    this->allocationPointer = array.allocationPointer;
    if( array.allocationPointer )
    {
@@ -260,13 +260,13 @@ bind( const ArrayT& array,
    }
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
    template< int Size >
 void
-Array< Element, Device, Index >::
-bind( StaticArray< Size, Element >& array )
+Array< Value, Device, Index >::
+bind( StaticArray< Size, Value >& array )
 {
    this->releaseData();
    this->size = Size;
@@ -274,12 +274,12 @@ bind( StaticArray< Size, Element >& array )
 }
 
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
 void
-Array< Element, Device, Index >::
-swap( Array< Element, Device, Index >& array )
+Array< Value, Device, Index >::
+swap( Array< Value, Device, Index >& array )
 {
    TNL::swap( this->size, array.size );
    TNL::swap( this->data, array.data );
@@ -287,66 +287,64 @@ swap( Array< Element, Device, Index >& array )
    TNL::swap( this->referenceCounter, array.referenceCounter );
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
 void
-Array< Element, Device, Index >::
+Array< Value, Device, Index >::
 reset()
 {
    this->releaseData();
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-void
-Array< Element, Device, Index >::
-setElement( const Index& i, const Element& x )
+__cuda_callable__
+const Value* Array< Value, Device, Index >::getData() const
 {
-   TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
-   TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
-   return Algorithms::ArrayOperations< Device >::setMemoryElement( &( this->data[ i ] ), x );
+   return this->data;
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-Element
-Array< Element, Device, Index >::
-getElement( const Index& i ) const
+__cuda_callable__
+Value* Array< Value, Device, Index >::getData()
 {
-   TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
-   TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
-   return Algorithms::ArrayOperations< Device >::getMemoryElement( & ( this->data[ i ] ) );
+   return this->data;
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-bool
-Array< Element, Device, Index >::
-containsValue( const Element& v ) const
+void
+Array< Value, Device, Index >::
+setElement( const Index& i, const Value& x )
 {
-   return Algorithms::ArrayOperations< Device >::containsValue( this->data, this->size, v );
+   TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
+   TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
+   return Algorithms::ArrayOperations< Device >::setMemoryElement( &( this->data[ i ] ), x );
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-bool
-Array< Element, Device, Index >::
-containsOnlyValue( const Element& v ) const
+Value
+Array< Value, Device, Index >::
+getElement( const Index& i ) const
 {
-   return Algorithms::ArrayOperations< Device >::containsOnlyValue( this->data, this->size, v );
+   TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
+   TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
+   return Algorithms::ArrayOperations< Device >::getMemoryElement( & ( this->data[ i ] ) );
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
 __cuda_callable__
-inline Element&
-Array< Element, Device, Index >::
+inline Value&
+Array< Value, Device, Index >::
 operator[] ( const Index& i )
 {
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
@@ -354,12 +352,12 @@ operator[] ( const Index& i )
    return this->data[ i ];
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
 __cuda_callable__
-inline const Element&
-Array< Element, Device, Index >::
+inline const Value&
+Array< Value, Device, Index >::
 operator[] ( const Index& i ) const
 {
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
@@ -367,12 +365,12 @@ operator[] ( const Index& i ) const
    return this->data[ i ];
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-Array< Element, Device, Index >&
-Array< Element, Device, Index >::
-operator = ( const Array< Element, Device, Index >& array )
+Array< Value, Device, Index >&
+Array< Value, Device, Index >::
+operator = ( const Array< Value, Device, Index >& array )
 {
    //TNL_ASSERT_EQ( array.getSize(), this->getSize(), "Array sizes must be the same." );
    if( this->getSize() != array.getSize() )
@@ -385,17 +383,17 @@ operator = ( const Array< Element, Device, Index >& array )
    return ( *this );
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
    template< typename ArrayT >
-Array< Element, Device, Index >&
-Array< Element, Device, Index >::
+Array< Value, Device, Index >&
+Array< Value, Device, Index >::
 operator = ( const ArrayT& array )
 {
    //TNL_ASSERT_EQ( array.getSize(), this->getSize(), "Array sizes must be the same." );
    if( this->getSize() != array.getSize() )
-      this->setLike( array );   
+      this->setLike( array );
    if( this->getSize() > 0 )
       Algorithms::ArrayOperations< Device, typename ArrayT::DeviceType >::
          copyMemory( this->getData(),
@@ -404,12 +402,12 @@ operator = ( const ArrayT& array )
    return ( *this );
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
    template< typename ArrayT >
 bool
-Array< Element, Device, Index >::
+Array< Value, Device, Index >::
 operator == ( const ArrayT& array ) const
 {
    if( array.getSize() != this->getSize() )
@@ -422,71 +420,63 @@ operator == ( const ArrayT& array ) const
                            array.getSize() );
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
    template< typename ArrayT >
-bool Array< Element, Device, Index >::operator != ( const ArrayT& array ) const
+bool Array< Value, Device, Index >::operator != ( const ArrayT& array ) const
 {
    return ! ( ( *this ) == array );
 }
 
-
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-void Array< Element, Device, Index >::setValue( const Element& e )
+void Array< Value, Device, Index >::setValue( const Value& e )
 {
    TNL_ASSERT_TRUE( this->getData(), "Attempted to set a value of an empty array." );
    Algorithms::ArrayOperations< Device >::setMemory( this->getData(), e, this->getSize() );
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-__cuda_callable__
-const Element* Array< Element, Device, Index >::getData() const
+bool
+Array< Value, Device, Index >::
+containsValue( const Value& v ) const
 {
-   return this -> data;
+   return Algorithms::ArrayOperations< Device >::containsValue( this->data, this->size, v );
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-__cuda_callable__
-Element* Array< Element, Device, Index >::getData()
+bool
+Array< Value, Device, Index >::
+containsOnlyValue( const Value& v ) const
 {
-   return this -> data;
+   return Algorithms::ArrayOperations< Device >::containsOnlyValue( this->data, this->size, v );
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-Array< Element, Device, Index >::operator bool() const
+Array< Value, Device, Index >::operator bool() const
 {
    return data != 0;
 }
 
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-   template< typename IndexType2 >
-void Array< Element, Device, Index >::touch( IndexType2 touches ) const
-{
-   //TODO: implement
-}
-
-template< typename Element,
-          typename Device,
-          typename Index >
-bool Array< Element, Device, Index >::save( File& file ) const
+bool Array< Value, Device, Index >::save( File& file ) const
 {
    if( ! Object::save( file ) )
       return false;
    if( ! file.write( &this->size ) )
       return false;
-   if( this->size != 0 && ! ArrayIO< Element, Device, Index >::save( file, this->data, this->size ) )
+   if( this->size != 0 && ! ArrayIO< Value, Device, Index >::save( file, this->data, this->size ) )
    {
       std::cerr << "I was not able to save " << this->getType()
            << " with size " << this -> getSize() << std::endl;
@@ -495,11 +485,11 @@ bool Array< Element, Device, Index >::save( File& file ) const
    return true;
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
 bool
-Array< Element, Device, Index >::
+Array< Value, Device, Index >::
 load( File& file )
 {
    if( ! Object::load( file ) )
@@ -518,7 +508,7 @@ load( File& file )
    setSize( _size );
    if( _size )
    {
-      if( ! ArrayIO< Element, Device, Index >::load( file, this->data, this->size ) )
+      if( ! ArrayIO< Value, Device, Index >::load( file, this->data, this->size ) )
       {
          std::cerr << "I was not able to load " << this->getType()
                     << " with size " << this -> getSize() << std::endl;
@@ -528,11 +518,11 @@ load( File& file )
    return true;
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
 bool
-Array< Element, Device, Index >::
+Array< Value, Device, Index >::
 boundLoad( File& file )
 {
    if( ! Object::load( file ) )
@@ -557,7 +547,7 @@ boundLoad( File& file )
    else setSize( _size );
    if( _size )
    {
-      if( ! ArrayIO< Element, Device, Index >::load( file, this->data, this->size ) )
+      if( ! ArrayIO< Value, Device, Index >::load( file, this->data, this->size ) )
       {
          std::cerr << "I was not able to load " << this->getType()
                    << " with size " << this -> getSize() << std::endl;
@@ -568,17 +558,17 @@ boundLoad( File& file )
 }
 
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-Array< Element, Device, Index >::
+Array< Value, Device, Index >::
 ~Array()
 {
    this->releaseData();
 }
 
-template< typename Element, typename Device, typename Index >
-std::ostream& operator << ( std::ostream& str, const Array< Element, Device, Index >& v )
+template< typename Value, typename Device, typename Index >
+std::ostream& operator << ( std::ostream& str, const Array< Value, Device, Index >& v )
 {
    str << "[ ";
    if( v.getSize() > 0 )
@@ -591,42 +581,5 @@ std::ostream& operator << ( std::ostream& str, const Array< Element, Device, Ind
    return str;
 }
 
-
-#ifdef UNDEF //TEMPLATE_EXPLICIT_INSTANTIATION
-
-// TODO: this does not work with CUDA 5.5 - fix it later
-
-#ifdef INSTANTIATE_FLOAT
-extern template class Array< float, Devices::Host, int >;
-#endif
-extern template class Array< double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template class Array< long double, Devices::Host, int >;
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-extern template class Array< float, Devices::Host, long int >;
-#endif
-extern template class Array< double, Devices::Host, long int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template class Array< long double, Devices::Host, long int >;
-#endif
-#endif
-
-#ifdef HAVE_CUDA
-/*
- #ifdef INSTANTIATE_FLOAT
- extern template class Array< float, Devices::Cuda, int >;
- #endif
- extern template class Array< double, Devices::Cuda, int >;
- #ifdef INSTANTIATE_FLOAT
- extern template class Array< float, Devices::Cuda, long int >;
- #endif
- extern template class Array< double, Devices::Cuda, long int >;*/
-#endif
-
-#endif
-
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/CMakeLists.txt b/src/TNL/Containers/CMakeLists.txt
index 16c3230587508c806861b113dbbea918743c0c6c..f53155604c78f7841cc4c35f0c7940b2b5141711 100644
--- a/src/TNL/Containers/CMakeLists.txt
+++ b/src/TNL/Containers/CMakeLists.txt
@@ -17,8 +17,8 @@ set( headers Array.h
              MultiArray2D_impl.h
              MultiArray3D_impl.h
              MultiArray4D_impl.h
-             SharedArray.h
-             SharedArray_impl.h
+             ArrayView.h
+             ArrayView_impl.h
              StaticArray.h
              StaticArray_impl.h
              StaticArray1D_impl.h
@@ -31,8 +31,8 @@ set( headers Array.h
              MultiVector2D_impl.h
              MultiVector3D_impl.h
              MultiVector4D_impl.h
-             SharedVector.h
-             SharedVector_impl.h
+             VectorView.h
+             VectorView_impl.h
              StaticVector.h
              StaticVector_impl.h
              StaticVector1D_impl.h
@@ -40,30 +40,3 @@ set( headers Array.h
              StaticVector3D_impl.h  )
 
 INSTALL( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Containers )
-
-if( ${WITH_TEMPLATES_INSTANTIATION} )
-   SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/TNL/Containers )
-   set( common_SOURCES
-        ${CURRENT_DIR}/MultiArray_impl.cpp
-        ${CURRENT_DIR}/Array_impl.cpp
-        ${CURRENT_DIR}/StaticArray_impl.cpp
-        ${CURRENT_DIR}/MultiVector_impl.cpp
-        ${CURRENT_DIR}/Vector_impl.cpp
-        ${CURRENT_DIR}/StaticVector_impl.cpp
-   )
-
-   if( BUILD_CUDA )
-      set( tnl_containers_CUDA__SOURCES
-           ${common_SOURCES}
-           ${CURRENT_DIR}/Array_impl.cu
-           ${CURRENT_DIR}/MultiArray_impl.cu
-           ${CURRENT_DIR}/StaticArray_impl.cu
-           ${CURRENT_DIR}/Vector_impl.cu
-           ${CURRENT_DIR}/StaticVector_impl.cu
-           PARENT_SCOPE )
-   endif()
-
-   set( tnl_containers_SOURCES
-        ${common_SOURCES}
-        PARENT_SCOPE )
-endif()
diff --git a/src/TNL/Containers/IndexedMap.h b/src/TNL/Containers/IndexedMap.h
index 2c0e2d99471726fd08ee4c2529c8ca64ce63925d..0beac82b3bd77dce07b042c650eb8057e9fefea9 100644
--- a/src/TNL/Containers/IndexedMap.h
+++ b/src/TNL/Containers/IndexedMap.h
@@ -11,70 +11,67 @@
 #pragma once
 
 #include <map>
-#include <stdexcept>
+#include <iostream>
 
 namespace TNL {
 namespace Containers {
 
-template< typename Element,
+template< typename Value,
           typename Index,
           typename Key >
 class IndexedMap
 {
-   public:
-
-   typedef Element   ElementType;
-   typedef Index     IndexType;
-   typedef Key       KeyType;
+public:
+   using ValueType = Value;
+   using IndexType = Index;
+   using KeyType = Key;
 
    void reset();
 
    IndexType getSize() const;
 
-   IndexType insert( const ElementType &data );
+   IndexType insert( const ValueType &data );
 
-   bool find( const ElementType &data, IndexType& index ) const;
+   bool find( const ValueType &data, IndexType& index ) const;
 
    template< typename ArrayType >
    void toArray( ArrayType& array ) const;
 
-   const Element& getElement( KeyType key ) const;
+   const Value& getElement( KeyType key ) const;
 
-   Element& getElement( KeyType key );
- 
-   void print( std::ostream& str ) const;
+   Value& getElement( KeyType key );
 
-   protected:
+   void print( std::ostream& str ) const;
 
+protected:
    struct DataWithIndex
    {
       // This constructor is here only because of bug in g++, we might fix it later.
       // http://stackoverflow.com/questions/22357887/comparing-two-mapiterators-why-does-it-need-the-copy-constructor-of-stdpair
       DataWithIndex(){};
- 
+
       DataWithIndex( const DataWithIndex& d ) : data( d.data ), index( d.index) {}
- 
-      explicit DataWithIndex( const Element data) : data( data ) {}
 
-      DataWithIndex( const Element data,
+      explicit DataWithIndex( const Value data) : data( data ) {}
+
+      DataWithIndex( const Value data,
                      const Index index) : data(data), index(index) {}
 
-      Element data;
+      Value data;
       Index index;
    };
 
-   typedef std::map< Key, DataWithIndex >      STDMapType;
-   typedef typename STDMapType::value_type     STDMapValueType;
-   typedef typename STDMapType::const_iterator STDMapIteratorType;
+   using STDMapType = std::map< Key, DataWithIndex >;
+   using STDMapValueType = typename STDMapType::value_type;
+   using STDMapIteratorType = typename STDMapType::const_iterator;
 
    STDMapType map;
-
 };
 
-template< typename Element,
+template< typename Value,
           typename Index,
           typename Key >
-std::ostream& operator <<( std::ostream& str, IndexedMap< Element, Index, Key >& set );
+std::ostream& operator <<( std::ostream& str, IndexedMap< Value, Index, Key >& set );
 
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/IndexedMap_impl.h b/src/TNL/Containers/IndexedMap_impl.h
index 488b3b93bcc4716bec28375232130f21d62a58a2..383a7a445a126501b2d31ee02455f1d41b0a9a26 100644
--- a/src/TNL/Containers/IndexedMap_impl.h
+++ b/src/TNL/Containers/IndexedMap_impl.h
@@ -10,39 +10,41 @@
 
 #pragma once
 
+#include "IndexedMap.h"
+
 namespace TNL {
 namespace Containers {
 
-template< typename Element,
+template< typename Value,
           typename Index,
           typename Key >
-void IndexedMap< Element, Index, Key >::reset()
+void IndexedMap< Value, Index, Key >::reset()
 {
    map.clear();
 }
 
-template< typename Element,
+template< typename Value,
           typename Index,
           typename Key >
-Index IndexedMap< Element, Index, Key >::getSize() const
+Index IndexedMap< Value, Index, Key >::getSize() const
 {
    return map.size();
 }
 
-template< typename Element,
+template< typename Value,
           typename Index,
           typename Key >
-Index IndexedMap< Element, Index, Key >::insert( const Element &data )
+Index IndexedMap< Value, Index, Key >::insert( const Value &data )
 {
    STDMapIteratorType iter = map.insert( STDMapValueType( Key( data ),
                                          DataWithIndex( data, getSize() ) ) ).first;
    return iter->second.index;
 }
 
-template< typename Element,
+template< typename Value,
           typename Index,
           typename Key >
-bool IndexedMap< Element, Index, Key >::find( const Element &data, Index& index ) const
+bool IndexedMap< Value, Index, Key >::find( const Value &data, Index& index ) const
 {
    STDMapIteratorType iter = map.find( Key( data ) );
    if (iter == map.end())
@@ -51,11 +53,11 @@ bool IndexedMap< Element, Index, Key >::find( const Element &data, Index& index
    return true;
 }
 
-template< typename Element,
+template< typename Value,
           typename Index,
           typename Key >
    template<typename ArrayType>
-void IndexedMap< Element, Index, Key >::toArray( ArrayType& array ) const
+void IndexedMap< Value, Index, Key >::toArray( ArrayType& array ) const
 {
    TNL_ASSERT( array.getSize() == getSize(),
                std::cerr << "array.getSize() = " << array.getSize()
@@ -67,26 +69,26 @@ void IndexedMap< Element, Index, Key >::toArray( ArrayType& array ) const
       array[ iter->second.index ] = iter->second.data;
 }
 
-template< typename Element,
+template< typename Value,
           typename Index,
           typename Key >
-const Element& IndexedMap< Element, Index, Key >::getElement( KeyType key ) const
+const Value& IndexedMap< Value, Index, Key >::getElement( KeyType key ) const
 {
    return map[ key ];
 }
 
-template< typename Element,
+template< typename Value,
           typename Index,
           typename Key >
-Element& IndexedMap< Element, Index, Key >::getElement( KeyType key )
+Value& IndexedMap< Value, Index, Key >::getElement( KeyType key )
 {
    return map[ key ];
 }
 
-template< typename Element,
+template< typename Value,
           typename Index,
           typename Key >
-void IndexedMap< Element, Index, Key >::print( std::ostream& str ) const
+void IndexedMap< Value, Index, Key >::print( std::ostream& str ) const
 {
    STDMapIteratorType iter = map.begin();
    str << iter->second.data;
@@ -98,10 +100,10 @@ void IndexedMap< Element, Index, Key >::print( std::ostream& str ) const
    }
 }
 
-template< typename Element,
+template< typename Value,
           typename Index,
           typename Key >
-std::ostream& operator<<( std::ostream& str, IndexedMap< Element, Index, Key >& set )
+std::ostream& operator<<( std::ostream& str, IndexedMap< Value, Index, Key >& set )
 {
    set.print( str );
    return str;
diff --git a/src/TNL/Containers/List.h b/src/TNL/Containers/List.h
index 0ec42106de2b49f5296e8a0aede1328715eea3f9..c64a8a957b3d7106a8601a7033c169052f599362 100644
--- a/src/TNL/Containers/List.h
+++ b/src/TNL/Containers/List.h
@@ -41,7 +41,7 @@ template< class T > class ListDataElement;
 template< class T > class List
 {
    public:
-      typedef T ElementType;
+      typedef T ValueType;
 
       //! Basic constructor
       List();
diff --git a/src/TNL/Containers/MultiArray.h b/src/TNL/Containers/MultiArray.h
index 891efda8fd554556f2b6c083105565a2425d6fa2..cdcda634bcf768759c44f486790a3511445392d4 100644
--- a/src/TNL/Containers/MultiArray.h
+++ b/src/TNL/Containers/MultiArray.h
@@ -18,21 +18,21 @@
 namespace TNL {
 namespace Containers {   
 
-template< int Dimension, typename Element = double, typename Device = Devices::Host, typename Index = int >
-class MultiArray : public Array< Element, Device, Index >
+template< int Dimension, typename Value = double, typename Device = Devices::Host, typename Index = int >
+class MultiArray : public Array< Value, Device, Index >
 {
 };
 
-template< typename Element, typename Device, typename Index >
-class MultiArray< 1, Element, Device, Index > : public Array< Element, Device, Index >
+template< typename Value, typename Device, typename Index >
+class MultiArray< 1, Value, Device, Index > : public Array< Value, Device, Index >
 {
    public:
    enum { Dimension = 1};
-   typedef Element ElementType;
+   typedef Value ValueType;
    typedef Device DeviceType;
    typedef Index IndexType;
-   typedef MultiArray< 1, Element, Devices::Host, Index > HostType;
-   typedef MultiArray< 1, Element, Devices::Cuda, Index > CudaType;
+   typedef MultiArray< 1, Value, Devices::Host, Index > HostType;
+   typedef MultiArray< 1, Value, Devices::Cuda, Index > CudaType;
 
 
    MultiArray();
@@ -61,19 +61,19 @@ class MultiArray< 1, Element, Device, Index > : public Array< Element, Device, I
 
    __cuda_callable__ Index getElementIndex( const Index i ) const;
 
-   void setElement( const Index i, Element value );
+   void setElement( const Index i, Value value );
 
    //! This method can be used for general access to the elements of the arrays.
    /*! It does not return reference but value. So it can be used to access
     *  arrays in different address space (usually GPU device).
     *  See also operator().
     */
-   Element getElement( const Index i ) const;
+   Value getElement( const Index i ) const;
 
    //! Operator for accessing elements of the array.
-   __cuda_callable__ Element& operator()( const Index i );
+   __cuda_callable__ Value& operator()( const Index i );
 
-   __cuda_callable__ const Element& operator()( const Index i ) const;
+   __cuda_callable__ const Value& operator()( const Index i ) const;
 
 
    template< typename MultiArrayT >
@@ -82,10 +82,10 @@ class MultiArray< 1, Element, Device, Index > : public Array< Element, Device, I
    template< typename MultiArrayT >
    bool operator != ( const MultiArrayT& array ) const;
 
-   MultiArray< 1, Element, Device, Index >& operator = ( const MultiArray< 1, Element, Device, Index >& array );
+   MultiArray< 1, Value, Device, Index >& operator = ( const MultiArray< 1, Value, Device, Index >& array );
 
    template< typename MultiArrayT >
-   MultiArray< 1, Element, Device, Index >& operator = ( const MultiArrayT& array );
+   MultiArray< 1, Value, Device, Index >& operator = ( const MultiArrayT& array );
 
    //! Method for saving the object to a file as a binary data
    bool save( File& file ) const;
@@ -102,16 +102,16 @@ class MultiArray< 1, Element, Device, Index > : public Array< Element, Device, I
    Containers::StaticVector< 1, Index > dimensions;
 };
 
-template< typename Element, typename Device, typename Index >
-class MultiArray< 2, Element, Device, Index > : public Array< Element, Device, Index >
+template< typename Value, typename Device, typename Index >
+class MultiArray< 2, Value, Device, Index > : public Array< Value, Device, Index >
 {
    public:
    enum { Dimension = 2 };
-   typedef Element ElementType;
+   typedef Value ValueType;
    typedef Device DeviceType;
    typedef Index IndexType;
-   typedef MultiArray< 2, Element, Devices::Host, Index > HostType;
-   typedef MultiArray< 2, Element, Devices::Cuda, Index > CudaType;
+   typedef MultiArray< 2, Value, Devices::Host, Index > HostType;
+   typedef MultiArray< 2, Value, Devices::Cuda, Index > CudaType;
 
 
    MultiArray();
@@ -140,23 +140,23 @@ class MultiArray< 2, Element, Device, Index > : public Array< Element, Device, I
 
    __cuda_callable__ Index getElementIndex( const Index j, const Index i ) const;
 
-   void setElement( const Index j, const Index i, Element value );
+   void setElement( const Index j, const Index i, Value value );
 
    //! This method can be used for general access to the elements of the arrays.
    /*! It does not return reference but value. So it can be used to access
     *  arrays in different adress space (usualy GPU device).
     *  See also operator().
     */
-   Element getElement( const Index j, const Index i ) const;
+   Value getElement( const Index j, const Index i ) const;
 
    //! Operator for accessing elements of the array.
    /*! It returns reference to given elements so it cannot be
     *  used to access elements of arrays in different address space
     *  (GPU device usually).
     */
-   __cuda_callable__ Element& operator()( const Index j, const Index i );
+   __cuda_callable__ Value& operator()( const Index j, const Index i );
 
-   __cuda_callable__ const Element& operator()( const Index j, const Index i ) const;
+   __cuda_callable__ const Value& operator()( const Index j, const Index i ) const;
 
    template< typename MultiArrayT >
    bool operator == ( const MultiArrayT& array ) const;
@@ -164,10 +164,10 @@ class MultiArray< 2, Element, Device, Index > : public Array< Element, Device, I
    template< typename MultiArrayT >
    bool operator != ( const MultiArrayT& array ) const;
 
-   MultiArray< 2, Element, Device, Index >& operator = ( const MultiArray< 2, Element, Device, Index >& array );
+   MultiArray< 2, Value, Device, Index >& operator = ( const MultiArray< 2, Value, Device, Index >& array );
 
    template< typename MultiArrayT >
-   MultiArray< 2, Element, Device, Index >& operator = ( const MultiArrayT& array );
+   MultiArray< 2, Value, Device, Index >& operator = ( const MultiArrayT& array );
 
    //! Method for saving the object to a file as a binary data
    bool save( File& file ) const;
@@ -184,17 +184,17 @@ class MultiArray< 2, Element, Device, Index > : public Array< Element, Device, I
    Containers::StaticVector< 2, Index > dimensions;
 };
 
-template< typename Element, typename Device, typename Index >
-class MultiArray< 3, Element, Device, Index > : public Array< Element, Device, Index >
+template< typename Value, typename Device, typename Index >
+class MultiArray< 3, Value, Device, Index > : public Array< Value, Device, Index >
 {
    public:
 
    enum { Dimension = 3 };
-   typedef Element ElementType;
+   typedef Value ValueType;
    typedef Device DeviceType;
    typedef Index IndexType;
-   typedef MultiArray< 3, Element, Devices::Host, Index > HostType;
-   typedef MultiArray< 3, Element, Devices::Cuda, Index > CudaType;
+   typedef MultiArray< 3, Value, Devices::Host, Index > HostType;
+   typedef MultiArray< 3, Value, Devices::Cuda, Index > CudaType;
 
 
    MultiArray();
@@ -223,23 +223,23 @@ class MultiArray< 3, Element, Device, Index > : public Array< Element, Device, I
 
    __cuda_callable__ Index getElementIndex( const Index k, const Index j, const Index i ) const;
 
-   void setElement( const Index k, const Index j, const Index i, Element value );
+   void setElement( const Index k, const Index j, const Index i, Value value );
 
    //! This method can be used for general access to the elements of the arrays.
    /*! It does not return reference but value. So it can be used to access
     *  arrays in different adress space (usualy GPU device).
     *  See also operator().
     */
-   Element getElement( const Index k, const Index j, const Index i ) const;
+   Value getElement( const Index k, const Index j, const Index i ) const;
 
    //! Operator for accessing elements of the array.
    /*! It returns reference to given elements so it cannot be
     *  used to access elements of arrays in different adress space
     *  (GPU device usualy).
     */
-   __cuda_callable__ Element& operator()( const Index k, const Index j, const Index i );
+   __cuda_callable__ Value& operator()( const Index k, const Index j, const Index i );
 
-   __cuda_callable__ const Element& operator()( const Index k, const Index j, const Index i ) const;
+   __cuda_callable__ const Value& operator()( const Index k, const Index j, const Index i ) const;
 
    template< typename MultiArrayT >
    bool operator == ( const MultiArrayT& array ) const;
@@ -247,10 +247,10 @@ class MultiArray< 3, Element, Device, Index > : public Array< Element, Device, I
    template< typename MultiArrayT >
    bool operator != ( const MultiArrayT& array ) const;
 
-   MultiArray< 3, Element, Device, Index >& operator = ( const MultiArray< 3, Element, Device, Index >& array );
+   MultiArray< 3, Value, Device, Index >& operator = ( const MultiArray< 3, Value, Device, Index >& array );
 
    template< typename MultiArrayT >
-   MultiArray< 3, Element, Device, Index >& operator = ( const MultiArrayT& array );
+   MultiArray< 3, Value, Device, Index >& operator = ( const MultiArrayT& array );
 
    //! Method for saving the object to a file as a binary data
    bool save( File& file ) const;
@@ -267,17 +267,17 @@ class MultiArray< 3, Element, Device, Index > : public Array< Element, Device, I
    Containers::StaticVector< 3, Index > dimensions;
 };
 
-template< typename Element, typename Device, typename Index >
-class MultiArray< 4, Element, Device, Index > : public Array< Element, Device, Index >
+template< typename Value, typename Device, typename Index >
+class MultiArray< 4, Value, Device, Index > : public Array< Value, Device, Index >
 {
    public:
 
    enum { Dimension = 4 };
-   typedef Element ElementType;
+   typedef Value ValueType;
    typedef Device DeviceType;
    typedef Index IndexType;
-   typedef MultiArray< 4, Element, Devices::Host, Index > HostType;
-   typedef MultiArray< 4, Element, Devices::Cuda, Index > CudaType;
+   typedef MultiArray< 4, Value, Devices::Host, Index > HostType;
+   typedef MultiArray< 4, Value, Devices::Cuda, Index > CudaType;
 
 
    MultiArray();
@@ -306,23 +306,23 @@ class MultiArray< 4, Element, Device, Index > : public Array< Element, Device, I
 
    __cuda_callable__ Index getElementIndex( const Index l, const Index k, const Index j, const Index i ) const;
 
-   void setElement( const Index l, const Index k, const Index j, const Index i, Element value );
+   void setElement( const Index l, const Index k, const Index j, const Index i, Value value );
 
    //! This method can be used for general access to the elements of the arrays.
    /*! It does not return reference but value. So it can be used to access
     *  arrays in different adress space (usualy GPU device).
     *  See also operator().
     */
-   Element getElement( const Index l, const Index k, const Index j, const Index i ) const;
+   Value getElement( const Index l, const Index k, const Index j, const Index i ) const;
 
    //! Operator for accessing elements of the array.
    /*! It returns reference to given elements so it cannot be
     *  used to access elements of arrays in different adress space
     *  (GPU device usualy).
     */
-   __cuda_callable__ Element& operator()( const Index l, const Index k, const Index j, const Index i );
+   __cuda_callable__ Value& operator()( const Index l, const Index k, const Index j, const Index i );
 
-   __cuda_callable__ const Element& operator()( const Index l, const Index k, const Index j, const Index i ) const;
+   __cuda_callable__ const Value& operator()( const Index l, const Index k, const Index j, const Index i ) const;
 
    template< typename MultiArrayT >
    bool operator == ( const MultiArrayT& array ) const;
@@ -330,10 +330,10 @@ class MultiArray< 4, Element, Device, Index > : public Array< Element, Device, I
    template< typename MultiArrayT >
    bool operator != ( const MultiArrayT& array ) const;
 
-   MultiArray< 4, Element, Device, Index >& operator = ( const MultiArray< 4, Element, Device, Index >& array );
+   MultiArray< 4, Value, Device, Index >& operator = ( const MultiArray< 4, Value, Device, Index >& array );
 
    template< typename MultiArrayT >
-   MultiArray< 4, Element, Device, Index >& operator = ( const MultiArrayT& array );
+   MultiArray< 4, Value, Device, Index >& operator = ( const MultiArrayT& array );
 
    //! Method for saving the object to a file as a binary data
    bool save( File& file ) const;
@@ -350,17 +350,17 @@ class MultiArray< 4, Element, Device, Index > : public Array< Element, Device, I
    Containers::StaticVector< 4, Index > dimensions;
 };
 
-template< typename Element, typename device, typename Index >
-std::ostream& operator << ( std::ostream& str, const MultiArray< 1, Element, device, Index >& array );
+template< typename Value, typename device, typename Index >
+std::ostream& operator << ( std::ostream& str, const MultiArray< 1, Value, device, Index >& array );
 
-template< typename Element, typename device, typename Index >
-std::ostream& operator << ( std::ostream& str, const MultiArray< 2, Element, device, Index >& array );
+template< typename Value, typename device, typename Index >
+std::ostream& operator << ( std::ostream& str, const MultiArray< 2, Value, device, Index >& array );
 
-template< typename Element, typename device, typename Index >
-std::ostream& operator << ( std::ostream& str, const MultiArray< 3, Element, device, Index >& array );
+template< typename Value, typename device, typename Index >
+std::ostream& operator << ( std::ostream& str, const MultiArray< 3, Value, device, Index >& array );
 
-template< typename Element, typename device, typename Index >
-std::ostream& operator << ( std::ostream& str, const MultiArray< 4, Element, device, Index >& array );
+template< typename Value, typename device, typename Index >
+std::ostream& operator << ( std::ostream& str, const MultiArray< 4, Value, device, Index >& array );
 
 } // namespace Containers
 } // namespace TNL
@@ -369,92 +369,3 @@ std::ostream& operator << ( std::ostream& str, const MultiArray< 4, Element, dev
 #include <TNL/Containers/MultiArray2D_impl.h>
 #include <TNL/Containers/MultiArray3D_impl.h>
 #include <TNL/Containers/MultiArray4D_impl.h>
-
-namespace TNL {
-namespace Containers {
-   
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiArray< 1, float,  Devices::Host, int >;
-#endif
-extern template class MultiArray< 1, double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiArray< 1, float,  Devices::Host, long int >;
-#endif
-extern template class MultiArray< 1, double, Devices::Host, long int >;
-#endif
-
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiArray< 2, float,  Devices::Host, int >;
-#endif
-extern template class MultiArray< 2, double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiArray< 2, float,  Devices::Host, long int >;
-#endif
-extern template class MultiArray< 2, double, Devices::Host, long int >;
-#endif
-
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiArray< 3, float,  Devices::Host, int >;
-#endif
-extern template class MultiArray< 3, double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiArray< 3, float,  Devices::Host, long int >;
-#endif
-extern template class MultiArray< 3, double, Devices::Host, long int >;
-#endif
-
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiArray< 4, float,  Devices::Host, int >;
-#endif
-extern template class MultiArray< 4, double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiArray< 4, float,  Devices::Host, long int >;
-#endif
-extern template class MultiArray< 4, double, Devices::Host, long int >;
-#endif
-
-// TODO: There are problems with nvlink - it might be better in later versions
-/*
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiArray< 1, float,  Devices::Cuda, int >;
-#endif
-extern template class MultiArray< 1, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiArray< 1, float,  Devices::Cuda, long int >;
-#endif
-extern template class MultiArray< 1, double, Devices::Cuda, long int >;
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiArray< 2, float,  Devices::Cuda, int >;
-#endif
-extern template class MultiArray< 2, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiArray< 2, float,  Devices::Cuda, long int >;
-#endif
-extern template class MultiArray< 2, double, Devices::Cuda, long int >;
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiArray< 3, float,  Devices::Cuda, int >;
-#endif
-extern template class MultiArray< 3, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiArray< 3, float,  Devices::Cuda, long int >;
-#endif
-extern template class MultiArray< 3, double, Devices::Cuda, long int >;
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiArray< 4, float,  Devices::Cuda, int >;
-#endif
-extern template class MultiArray< 4, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiArray< 4, float,  Devices::Cuda, long int >;
-#endif
-extern template class MultiArray< 4, double, Devices::Cuda, long int >;*/
-
-#endif
-
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/MultiArray1D_impl.h b/src/TNL/Containers/MultiArray1D_impl.h
index d380aa831fdba0b302dee9743de4dbb7574d3d4c..6c8f0b29ceefb37ea4bb237d0a920295603125c0 100644
--- a/src/TNL/Containers/MultiArray1D_impl.h
+++ b/src/TNL/Containers/MultiArray1D_impl.h
@@ -13,18 +13,18 @@
 namespace TNL {
 namespace Containers {   
 
-template< typename Element, typename Device, typename Index >
-MultiArray< 1, Element, Device, Index > :: MultiArray()
+template< typename Value, typename Device, typename Index >
+MultiArray< 1, Value, Device, Index > :: MultiArray()
 {
 }
 
-template< typename Element, typename Device, typename Index >
-String MultiArray< 1, Element, Device, Index > :: getType()
+template< typename Value, typename Device, typename Index >
+String MultiArray< 1, Value, Device, Index > :: getType()
 {
    return String( "Containers::MultiArray< ") +
           String( Dimension ) +
           String( ", " ) +
-          String( TNL::getType< Element >() ) +
+          String( TNL::getType< Value >() ) +
           String( ", " ) +
           String( Device :: getDeviceType() ) +
           String( ", " ) +
@@ -32,161 +32,161 @@ String MultiArray< 1, Element, Device, Index > :: getType()
           String( " >" );
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-String MultiArray< 1, Element, Device, Index > :: getTypeVirtual() const
+String MultiArray< 1, Value, Device, Index > :: getTypeVirtual() const
 {
    return this->getType();
 };
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-String MultiArray< 1, Element, Device, Index > :: getSerializationType()
+String MultiArray< 1, Value, Device, Index > :: getSerializationType()
 {
    return HostType::getType();
 };
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-String MultiArray< 1, Element, Device, Index > :: getSerializationTypeVirtual() const
+String MultiArray< 1, Value, Device, Index > :: getSerializationTypeVirtual() const
 {
    return this->getSerializationType();
 };
 
-template< typename Element, typename Device, typename Index >
-void MultiArray< 1, Element, Device, Index > :: setDimensions( const Index iSize )
+template< typename Value, typename Device, typename Index >
+void MultiArray< 1, Value, Device, Index > :: setDimensions( const Index iSize )
 {
    TNL_ASSERT( iSize > 0,
               std::cerr << "iSize = " << iSize );
    dimensions[ 0 ] = iSize;
-   Array< Element, Device, Index >::setSize( iSize );
+   Array< Value, Device, Index >::setSize( iSize );
 }
 
-template< typename Element, typename Device, typename Index >
-void MultiArray< 1, Element, Device, Index > :: setDimensions( const Containers::StaticVector< 1, Index >& dimensions )
+template< typename Value, typename Device, typename Index >
+void MultiArray< 1, Value, Device, Index > :: setDimensions( const Containers::StaticVector< 1, Index >& dimensions )
 {
    TNL_ASSERT( dimensions[ 0 ] > 0,
               std::cerr << " dimensions[ 0 ] = " << dimensions[ 0 ] );
    this->dimensions = dimensions;
-   Array< Element, Device, Index >::setSize( this->dimensions[ 0 ] );
+   Array< Value, Device, Index >::setSize( this->dimensions[ 0 ] );
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
    template< typename MultiArrayT >
-void MultiArray< 1, Element, Device, Index > :: setLike( const MultiArrayT& multiArray )
+void MultiArray< 1, Value, Device, Index > :: setLike( const MultiArrayT& multiArray )
 {
    setDimensions( multiArray. getDimensions() );
 }
 
-template< typename Element, typename Device, typename Index >
-void MultiArray< 1, Element, Device, Index >::reset()
+template< typename Value, typename Device, typename Index >
+void MultiArray< 1, Value, Device, Index >::reset()
 {
    this->dimensions = Containers::StaticVector< 1, Index >( ( Index ) 0 );
-   Array< Element, Device, Index >::reset();
+   Array< Value, Device, Index >::reset();
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
 __cuda_callable__
-void MultiArray< 1, Element, Device, Index > :: getDimensions( Index& xSize ) const
+void MultiArray< 1, Value, Device, Index > :: getDimensions( Index& xSize ) const
 {
    xSize = this->dimensions[ 0 ];
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
 __cuda_callable__
-const Containers::StaticVector< 1, Index >& MultiArray< 1, Element, Device, Index > :: getDimensions() const
+const Containers::StaticVector< 1, Index >& MultiArray< 1, Value, Device, Index > :: getDimensions() const
 {
    return this->dimensions;
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
 __cuda_callable__
-Index MultiArray< 1, Element, Device, Index > :: getElementIndex( const Index i ) const
+Index MultiArray< 1, Value, Device, Index > :: getElementIndex( const Index i ) const
 {
    TNL_ASSERT( i >= 0 && i < this->dimensions[ 0 ],
               std::cerr << "i = " << i << " this->dimensions[ 0 ] = " <<  this->dimensions[ 0 ] );
    return i;
 }
 
-template< typename Element, typename Device, typename Index >
-Element MultiArray< 1, Element, Device, Index > :: getElement( const Index i ) const
+template< typename Value, typename Device, typename Index >
+Value MultiArray< 1, Value, Device, Index > :: getElement( const Index i ) const
 {
-   return Array< Element, Device, Index > :: getElement( getElementIndex( i ) );
+   return Array< Value, Device, Index > :: getElement( getElementIndex( i ) );
 }
 
-template< typename Element, typename Device, typename Index >
-void MultiArray< 1, Element, Device, Index > :: setElement( const Index i, Element value )
+template< typename Value, typename Device, typename Index >
+void MultiArray< 1, Value, Device, Index > :: setElement( const Index i, Value value )
 {
-   Array< Element, Device, Index > :: setElement( getElementIndex( i ), value );
+   Array< Value, Device, Index > :: setElement( getElementIndex( i ), value );
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
 __cuda_callable__
-Element& MultiArray< 1, Element, Device, Index > :: operator()( const Index element )
+Value& MultiArray< 1, Value, Device, Index > :: operator()( const Index element )
 {
-   return Array< Element, Device, Index > :: operator[]( getElementIndex( element ) );
+   return Array< Value, Device, Index > :: operator[]( getElementIndex( element ) );
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
 __cuda_callable__
-const Element& MultiArray< 1, Element, Device, Index > :: operator()( const Index element ) const
+const Value& MultiArray< 1, Value, Device, Index > :: operator()( const Index element ) const
 {
-   return Array< Element, Device, Index > :: operator[]( getElementIndex( element ) );
+   return Array< Value, Device, Index > :: operator[]( getElementIndex( element ) );
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
    template< typename MultiArrayT >
-bool MultiArray< 1, Element, Device, Index > :: operator == ( const MultiArrayT& array ) const
+bool MultiArray< 1, Value, Device, Index > :: operator == ( const MultiArrayT& array ) const
 {
    // TODO: Static assert on dimensions
    TNL_ASSERT( this->getDimensions() == array. getDimensions(),
               std::cerr << "You are attempting to compare two arrays with different dimensions." << std::endl
                    << "First array dimensions are ( " << this->getDimensions() << " )" << std::endl
                    << "Second array dimensions are ( " << array. getDimensions() << " )" << std::endl; );
-   return Array< Element, Device, Index > :: operator == ( array );
+   return Array< Value, Device, Index > :: operator == ( array );
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
    template< typename MultiArrayT >
-bool MultiArray< 1, Element, Device, Index > :: operator != ( const MultiArrayT& array ) const
+bool MultiArray< 1, Value, Device, Index > :: operator != ( const MultiArrayT& array ) const
 {
    return ! ( (* this ) == array );
 }
 
-template< typename Element, typename Device, typename Index >
-MultiArray< 1, Element, Device, Index >&
-   MultiArray< 1, Element, Device, Index > :: operator = ( const MultiArray< 1, Element, Device, Index >& array )
+template< typename Value, typename Device, typename Index >
+MultiArray< 1, Value, Device, Index >&
+   MultiArray< 1, Value, Device, Index > :: operator = ( const MultiArray< 1, Value, Device, Index >& array )
 {
    // TODO: Static assert on dimensions
    TNL_ASSERT( this->getDimensions() == array. getDimensions(),
               std::cerr << "You are attempting to assign two arrays with different dimensions." << std::endl
                    << "First array dimensions are ( " << this->getDimensions() << " )" << std::endl
                    << "Second array dimensions are ( " << array. getDimensions() << " )" << std::endl; );
-   Array< Element, Device, Index > :: operator = ( array );
+   Array< Value, Device, Index > :: operator = ( array );
    return ( *this );
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
    template< typename MultiArrayT >
-MultiArray< 1, Element, Device, Index >&
-   MultiArray< 1, Element, Device, Index > :: operator = ( const MultiArrayT& array )
+MultiArray< 1, Value, Device, Index >&
+   MultiArray< 1, Value, Device, Index > :: operator = ( const MultiArrayT& array )
 {
    // TODO: Static assert on dimensions
    TNL_ASSERT( this->getDimensions() == array. getDimensions(),
               std::cerr << "You are attempting to assign two arrays with different dimensions." << std::endl
                    << "First array dimensions are ( " << this->getDimensions() << " )" << std::endl
                    << "Second array dimensions are ( " << array. getDimensions() << " )" << std::endl; );
-   Array< Element, Device, Index > :: operator = ( array );
+   Array< Value, Device, Index > :: operator = ( array );
    return ( *this );
 }
 
-template< typename Element, typename Device, typename Index >
-bool MultiArray< 1, Element, Device, Index > :: save( File& file ) const
+template< typename Value, typename Device, typename Index >
+bool MultiArray< 1, Value, Device, Index > :: save( File& file ) const
 {
-   if( ! Array< Element, Device, Index > :: save( file ) )
+   if( ! Array< Value, Device, Index > :: save( file ) )
    {
       std::cerr << "I was not able to write the Array of MultiArray." << std::endl;
       return false;
@@ -199,10 +199,10 @@ bool MultiArray< 1, Element, Device, Index > :: save( File& file ) const
    return true;
 }
 
-template< typename Element, typename Device, typename Index >
-bool MultiArray< 1, Element, Device, Index > :: load( File& file )
+template< typename Value, typename Device, typename Index >
+bool MultiArray< 1, Value, Device, Index > :: load( File& file )
 {
-   if( ! Array< Element, Device, Index > :: load( file ) )
+   if( ! Array< Value, Device, Index > :: load( file ) )
    {
       std::cerr << "I was not able to read the Array of MultiArray." << std::endl;
       return false;
@@ -215,20 +215,20 @@ bool MultiArray< 1, Element, Device, Index > :: load( File& file )
    return true;
 }
 
-template< typename Element, typename Device, typename Index >
-bool MultiArray< 1, Element, Device, Index > :: save( const String& fileName ) const
+template< typename Value, typename Device, typename Index >
+bool MultiArray< 1, Value, Device, Index > :: save( const String& fileName ) const
 {
    return Object :: save( fileName );
 }
 
-template< typename Element, typename Device, typename Index >
-bool MultiArray< 1, Element, Device, Index > :: load( const String& fileName )
+template< typename Value, typename Device, typename Index >
+bool MultiArray< 1, Value, Device, Index > :: load( const String& fileName )
 {
    return Object :: load( fileName );
 }
 
-template< typename Element, typename Device, typename Index >
-std::ostream& operator << ( std::ostream& str, const MultiArray< 1, Element, Device, Index >& array )
+template< typename Value, typename Device, typename Index >
+std::ostream& operator << ( std::ostream& str, const MultiArray< 1, Value, Device, Index >& array )
 {
    for( Index i = 0; i < array. getDimensions()[ 0 ]; i ++ )
    {
diff --git a/src/TNL/Containers/MultiArray2D_impl.h b/src/TNL/Containers/MultiArray2D_impl.h
index 3812429c8260d75874b74b249876a5d9e8e56cf1..44d860167968df549a63ed089cad38f9c0919881 100644
--- a/src/TNL/Containers/MultiArray2D_impl.h
+++ b/src/TNL/Containers/MultiArray2D_impl.h
@@ -13,18 +13,18 @@
 namespace TNL {
 namespace Containers {   
 
-template< typename Element, typename Device, typename Index >
-MultiArray< 2, Element, Device, Index > :: MultiArray()
+template< typename Value, typename Device, typename Index >
+MultiArray< 2, Value, Device, Index > :: MultiArray()
 {
 }
 
-template< typename Element, typename Device, typename Index >
-String MultiArray< 2, Element, Device, Index > :: getType()
+template< typename Value, typename Device, typename Index >
+String MultiArray< 2, Value, Device, Index > :: getType()
 {
    return String( "Containers::MultiArray< ") +
           String( Dimension ) +
           String( ", " ) +
-          String( TNL::getType< Element >() ) +
+          String( TNL::getType< Value >() ) +
           String( ", " ) +
           String( Device :: getDeviceType() ) +
           String( ", " ) +
@@ -32,32 +32,32 @@ String MultiArray< 2, Element, Device, Index > :: getType()
           String( " >" );
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-String MultiArray< 2, Element, Device, Index > :: getTypeVirtual() const
+String MultiArray< 2, Value, Device, Index > :: getTypeVirtual() const
 {
    return this->getType();
 };
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-String MultiArray< 2, Element, Device, Index > :: getSerializationType()
+String MultiArray< 2, Value, Device, Index > :: getSerializationType()
 {
    return HostType::getType();
 };
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-String MultiArray< 2, Element, Device, Index > :: getSerializationTypeVirtual() const
+String MultiArray< 2, Value, Device, Index > :: getSerializationTypeVirtual() const
 {
    return this->getSerializationType();
 };
 
-template< typename Element, typename Device, typename Index >
-void MultiArray< 2, Element, Device, Index > :: setDimensions( const Index jSize,
+template< typename Value, typename Device, typename Index >
+void MultiArray< 2, Value, Device, Index > :: setDimensions( const Index jSize,
                                                                   const Index iSize )
 {
    TNL_ASSERT( iSize > 0 && jSize > 0,
@@ -66,11 +66,11 @@ void MultiArray< 2, Element, Device, Index > :: setDimensions( const Index jSize
 
    dimensions[ 0 ] = iSize;
    dimensions[ 1 ] = jSize;
-   Array< Element, Device, Index > :: setSize( iSize * jSize );
+   Array< Value, Device, Index > :: setSize( iSize * jSize );
 }
 
-template< typename Element, typename Device, typename Index >
-void MultiArray< 2, Element, Device, Index > :: setDimensions( const Containers::StaticVector< 2, Index >& dimensions )
+template< typename Value, typename Device, typename Index >
+void MultiArray< 2, Value, Device, Index > :: setDimensions( const Containers::StaticVector< 2, Index >& dimensions )
 {
    TNL_ASSERT( dimensions[ 0 ] > 0 && dimensions[ 1 ] > 0,
               std::cerr << "dimensions = " << dimensions );
@@ -79,41 +79,41 @@ void MultiArray< 2, Element, Device, Index > :: setDimensions( const Containers:
     */
    this->dimensions. x() = dimensions. y();
    this->dimensions. y() = dimensions. x();
-   Array< Element, Device, Index > :: setSize( this->dimensions[ 1 ] * this->dimensions[ 0 ] );
+   Array< Value, Device, Index > :: setSize( this->dimensions[ 1 ] * this->dimensions[ 0 ] );
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
    template< typename MultiArrayT >
-void MultiArray< 2, Element, Device, Index > :: setLike( const MultiArrayT& multiArray )
+void MultiArray< 2, Value, Device, Index > :: setLike( const MultiArrayT& multiArray )
 {
    setDimensions( multiArray. getDimensions() );
 }
 
-template< typename Element, typename Device, typename Index >
-void MultiArray< 2, Element, Device, Index >::reset()
+template< typename Value, typename Device, typename Index >
+void MultiArray< 2, Value, Device, Index >::reset()
 {
    this->dimensions = Containers::StaticVector< 2, Index >( ( Index ) 0 );
-   Array< Element, Device, Index >::reset();
+   Array< Value, Device, Index >::reset();
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
 __cuda_callable__
-void MultiArray< 2, Element, Device, Index > :: getDimensions( Index& jSize, Index& iSize ) const
+void MultiArray< 2, Value, Device, Index > :: getDimensions( Index& jSize, Index& iSize ) const
 {
    iSize = this->dimensions[ 0 ];
    jSize = this->dimensions[ 1 ];
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
 __cuda_callable__
-const Containers::StaticVector< 2, Index >& MultiArray< 2, Element, Device, Index > :: getDimensions() const
+const Containers::StaticVector< 2, Index >& MultiArray< 2, Value, Device, Index > :: getDimensions() const
 {
    return this->dimensions;
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
 __cuda_callable__
-Index MultiArray< 2, Element, Device, Index > :: getElementIndex( const Index j, const Index i ) const
+Index MultiArray< 2, Value, Device, Index > :: getElementIndex( const Index j, const Index i ) const
 {
    TNL_ASSERT( i >= 0 && i < this->dimensions[ 0 ] && j >= 0 && j < this->dimensions[ 1 ],
               std::cerr << "i = " << i << " j = " << j << " this->dimensions[ 0 ] = " <<  this->dimensions[ 0 ]
@@ -121,82 +121,82 @@ Index MultiArray< 2, Element, Device, Index > :: getElementIndex( const Index j,
    return j * this->dimensions[ 0 ] + i;
 }
 
-template< typename Element, typename Device, typename Index >
-Element MultiArray< 2, Element, Device, Index > :: getElement( const Index j, const Index i ) const
+template< typename Value, typename Device, typename Index >
+Value MultiArray< 2, Value, Device, Index > :: getElement( const Index j, const Index i ) const
 {
-   return Array< Element, Device, Index > :: getElement( getElementIndex( j, i ) );
+   return Array< Value, Device, Index > :: getElement( getElementIndex( j, i ) );
 }
 
-template< typename Element, typename Device, typename Index >
-void MultiArray< 2, Element, Device, Index > :: setElement( const Index j, const Index i, Element value )
+template< typename Value, typename Device, typename Index >
+void MultiArray< 2, Value, Device, Index > :: setElement( const Index j, const Index i, Value value )
 {
-   Array< Element, Device, Index > :: setElement( getElementIndex( j, i ), value );
+   Array< Value, Device, Index > :: setElement( getElementIndex( j, i ), value );
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
 __cuda_callable__
-Element& MultiArray< 2, Element, Device, Index > :: operator()( const Index j, const Index i )
+Value& MultiArray< 2, Value, Device, Index > :: operator()( const Index j, const Index i )
 {
-   return Array< Element, Device, Index > :: operator[]( getElementIndex( j, i ) );
+   return Array< Value, Device, Index > :: operator[]( getElementIndex( j, i ) );
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
 __cuda_callable__
-const Element& MultiArray< 2, Element, Device, Index > :: operator()( const Index j, const Index i ) const
+const Value& MultiArray< 2, Value, Device, Index > :: operator()( const Index j, const Index i ) const
 {
-   return Array< Element, Device, Index > :: operator[]( getElementIndex( j, i ) );
+   return Array< Value, Device, Index > :: operator[]( getElementIndex( j, i ) );
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
    template< typename MultiArrayT >
-bool MultiArray< 2, Element, Device, Index > :: operator == ( const MultiArrayT& array ) const
+bool MultiArray< 2, Value, Device, Index > :: operator == ( const MultiArrayT& array ) const
 {
    // TODO: Static assert on dimensions
    TNL_ASSERT( this->getDimensions() == array. getDimensions(),
               std::cerr << "You are attempting to compare two arrays with different dimensions." << std::endl
                    << "First array dimensions are ( " << this->getDimensions() << " )" << std::endl
                    << "Second array dimensions are ( " << array. getDimensions() << " )" << std::endl; );
-   return Array< Element, Device, Index > :: operator == ( array );
+   return Array< Value, Device, Index > :: operator == ( array );
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
    template< typename MultiArrayT >
-bool MultiArray< 2, Element, Device, Index > :: operator != ( const MultiArrayT& array ) const
+bool MultiArray< 2, Value, Device, Index > :: operator != ( const MultiArrayT& array ) const
 {
    return ! ( (* this ) == array );
 }
 
-template< typename Element, typename Device, typename Index >
-MultiArray< 2, Element, Device, Index >&
-   MultiArray< 2, Element, Device, Index > :: operator = ( const MultiArray< 2, Element, Device, Index >& array )
+template< typename Value, typename Device, typename Index >
+MultiArray< 2, Value, Device, Index >&
+   MultiArray< 2, Value, Device, Index > :: operator = ( const MultiArray< 2, Value, Device, Index >& array )
 {
    // TODO: Static assert on dimensions
    TNL_ASSERT( this->getDimensions() == array. getDimensions(),
               std::cerr << "You are attempting to assign two arrays with different dimensions." << std::endl
                    << "First array dimensions are ( " << this->getDimensions() << " )" << std::endl
                    << "Second array dimensions are ( " << array. getDimensions() << " )" << std::endl; );
-   Array< Element, Device, Index > :: operator = ( array );
+   Array< Value, Device, Index > :: operator = ( array );
    return ( *this );
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
    template< typename MultiArrayT >
-MultiArray< 2, Element, Device, Index >&
-   MultiArray< 2, Element, Device, Index > :: operator = ( const MultiArrayT& array )
+MultiArray< 2, Value, Device, Index >&
+   MultiArray< 2, Value, Device, Index > :: operator = ( const MultiArrayT& array )
 {
    // TODO: Static assert on dimensions
    TNL_ASSERT( this->getDimensions() == array. getDimensions(),
               std::cerr << "You are attempting to assign two arrays with different dimensions." << std::endl
                    << "First array dimensions are ( " << this->getDimensions() << " )" << std::endl
                    << "Second array dimensions are ( " << array. getDimensions() << " )" << std::endl; );
-   Array< Element, Device, Index > :: operator = ( array );
+   Array< Value, Device, Index > :: operator = ( array );
    return ( *this );
 }
 
-template< typename Element, typename Device, typename Index >
-bool MultiArray< 2, Element, Device, Index > :: save( File& file ) const
+template< typename Value, typename Device, typename Index >
+bool MultiArray< 2, Value, Device, Index > :: save( File& file ) const
 {
-   if( ! Array< Element, Device, Index > :: save( file ) )
+   if( ! Array< Value, Device, Index > :: save( file ) )
    {
       std::cerr << "I was not able to write the Array of MultiArray." << std::endl;
       return false;
@@ -209,10 +209,10 @@ bool MultiArray< 2, Element, Device, Index > :: save( File& file ) const
    return true;
 }
 
-template< typename Element, typename Device, typename Index >
-bool MultiArray< 2, Element, Device, Index > :: load( File& file )
+template< typename Value, typename Device, typename Index >
+bool MultiArray< 2, Value, Device, Index > :: load( File& file )
 {
-   if( ! Array< Element, Device, Index > :: load( file ) )
+   if( ! Array< Value, Device, Index > :: load( file ) )
    {
       std::cerr << "I was not able to read the Array of MultiArray." << std::endl;
       return false;
@@ -225,20 +225,20 @@ bool MultiArray< 2, Element, Device, Index > :: load( File& file )
    return true;
 }
 
-template< typename Element, typename Device, typename Index >
-bool MultiArray< 2, Element, Device, Index > :: save( const String& fileName ) const
+template< typename Value, typename Device, typename Index >
+bool MultiArray< 2, Value, Device, Index > :: save( const String& fileName ) const
 {
    return Object :: save( fileName );
 }
 
-template< typename Element, typename Device, typename Index >
-bool MultiArray< 2, Element, Device, Index > :: load( const String& fileName )
+template< typename Value, typename Device, typename Index >
+bool MultiArray< 2, Value, Device, Index > :: load( const String& fileName )
 {
    return Object :: load( fileName );
 }
 
-template< typename Element, typename Device, typename Index >
-std::ostream& operator << ( std::ostream& str, const MultiArray< 2, Element, Device, Index >& array )
+template< typename Value, typename Device, typename Index >
+std::ostream& operator << ( std::ostream& str, const MultiArray< 2, Value, Device, Index >& array )
 {
    for( Index j = 0; j < array. getDimensions()[ 1 ]; j ++ )
    {
diff --git a/src/TNL/Containers/MultiArray3D_impl.h b/src/TNL/Containers/MultiArray3D_impl.h
index 6e9fb9d9f2697d997ab32522257c2b1be23185bd..9dc3c031795b2e4c5b68fa093512f81911d1abc2 100644
--- a/src/TNL/Containers/MultiArray3D_impl.h
+++ b/src/TNL/Containers/MultiArray3D_impl.h
@@ -13,18 +13,18 @@
 namespace TNL {
 namespace Containers {   
 
-template< typename Element, typename Device, typename Index >
-MultiArray< 3, Element, Device, Index > :: MultiArray()
+template< typename Value, typename Device, typename Index >
+MultiArray< 3, Value, Device, Index > :: MultiArray()
 {
 }
 
-template< typename Element, typename Device, typename Index >
-String MultiArray< 3, Element, Device, Index > :: getType()
+template< typename Value, typename Device, typename Index >
+String MultiArray< 3, Value, Device, Index > :: getType()
 {
    return String( "Containers::MultiArray< ") +
           String( Dimension ) +
           String( ", " ) +
-          String( TNL::getType< Element >() ) +
+          String( TNL::getType< Value >() ) +
           String( ", " ) +
           String( Device :: getDeviceType() ) +
           String( ", " ) +
@@ -32,32 +32,32 @@ String MultiArray< 3, Element, Device, Index > :: getType()
           String( " >" );
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-String MultiArray< 3, Element, Device, Index > :: getTypeVirtual() const
+String MultiArray< 3, Value, Device, Index > :: getTypeVirtual() const
 {
    return this->getType();
 };
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-String MultiArray< 3, Element, Device, Index > :: getSerializationType()
+String MultiArray< 3, Value, Device, Index > :: getSerializationType()
 {
    return HostType::getType();
 };
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-String MultiArray< 3, Element, Device, Index > :: getSerializationTypeVirtual() const
+String MultiArray< 3, Value, Device, Index > :: getSerializationTypeVirtual() const
 {
    return this->getSerializationType();
 };
 
-template< typename Element, typename Device, typename Index >
-void MultiArray< 3, Element, Device, Index > :: setDimensions( const Index kSize,
+template< typename Value, typename Device, typename Index >
+void MultiArray< 3, Value, Device, Index > :: setDimensions( const Index kSize,
                                                                        const Index jSize,
                                                                        const Index iSize )
 {
@@ -69,11 +69,11 @@ void MultiArray< 3, Element, Device, Index > :: setDimensions( const Index kSize
    dimensions[ 0 ] = iSize;
    dimensions[ 1 ] = jSize;
    dimensions[ 2 ] = kSize;
-   Array< Element, Device, Index > :: setSize( iSize * jSize * kSize );
+   Array< Value, Device, Index > :: setSize( iSize * jSize * kSize );
 }
 
-template< typename Element, typename Device, typename Index >
-void MultiArray< 3, Element, Device, Index > :: setDimensions( const Containers::StaticVector< 3, Index >& dimensions )
+template< typename Value, typename Device, typename Index >
+void MultiArray< 3, Value, Device, Index > :: setDimensions( const Containers::StaticVector< 3, Index >& dimensions )
 {
    TNL_ASSERT( dimensions[ 0 ] > 0 && dimensions[ 1 ] > 0 && dimensions[ 2 ],
               std::cerr << "dimensions = " << dimensions );
@@ -83,28 +83,28 @@ void MultiArray< 3, Element, Device, Index > :: setDimensions( const Containers:
    this->dimensions. x() = dimensions. z();
    this->dimensions. y() = dimensions. y();
    this->dimensions. z() = dimensions. x();
-   Array< Element, Device, Index > :: setSize( this->dimensions[ 2 ] *
+   Array< Value, Device, Index > :: setSize( this->dimensions[ 2 ] *
                                                this->dimensions[ 1 ] *
                                                this->dimensions[ 0 ] );
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
    template< typename MultiArrayT >
-void MultiArray< 3, Element, Device, Index > :: setLike( const MultiArrayT& multiArray )
+void MultiArray< 3, Value, Device, Index > :: setLike( const MultiArrayT& multiArray )
 {
    setDimensions( multiArray. getDimensions() );
 }
 
-template< typename Element, typename Device, typename Index >
-void MultiArray< 3, Element, Device, Index >::reset()
+template< typename Value, typename Device, typename Index >
+void MultiArray< 3, Value, Device, Index >::reset()
 {
    this->dimensions = Containers::StaticVector< 3, Index >( ( Index ) 0 );
-   Array< Element, Device, Index >::reset();
+   Array< Value, Device, Index >::reset();
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
 __cuda_callable__
-void MultiArray< 3, Element, Device, Index > :: getDimensions( Index& kSize,
+void MultiArray< 3, Value, Device, Index > :: getDimensions( Index& kSize,
                                                                   Index& jSize,
                                                                   Index& iSize ) const
 {
@@ -113,16 +113,16 @@ void MultiArray< 3, Element, Device, Index > :: getDimensions( Index& kSize,
    kSize = this->dimensions[ 2 ];
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
 __cuda_callable__
-const Containers::StaticVector< 3, Index >& MultiArray< 3, Element, Device, Index > :: getDimensions() const
+const Containers::StaticVector< 3, Index >& MultiArray< 3, Value, Device, Index > :: getDimensions() const
 {
    return this->dimensions;
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
 __cuda_callable__
-Index MultiArray< 3, Element, Device, Index > :: getElementIndex( const Index k,
+Index MultiArray< 3, Value, Device, Index > :: getElementIndex( const Index k,
                                                                      const Index j,
                                                                      const Index i ) const
 {
@@ -136,91 +136,91 @@ Index MultiArray< 3, Element, Device, Index > :: getElementIndex( const Index k,
    return ( k * this->dimensions[ 1 ]  + j ) * this->dimensions[ 0 ] + i;
 }
 
-template< typename Element, typename Device, typename Index >
-Element MultiArray< 3, Element, Device, Index > :: getElement( const Index k,
+template< typename Value, typename Device, typename Index >
+Value MultiArray< 3, Value, Device, Index > :: getElement( const Index k,
                                                                   const Index j,
                                                                   const Index i ) const
 {
-   return Array< Element, Device, Index > :: getElement( getElementIndex( k, j, i ) );
+   return Array< Value, Device, Index > :: getElement( getElementIndex( k, j, i ) );
 }
 
-template< typename Element, typename Device, typename Index >
-void MultiArray< 3, Element, Device, Index > :: setElement( const Index k,
+template< typename Value, typename Device, typename Index >
+void MultiArray< 3, Value, Device, Index > :: setElement( const Index k,
                                                                     const Index j,
-                                                                    const Index i, Element value )
+                                                                    const Index i, Value value )
 {
-   Array< Element, Device, Index > :: setElement( getElementIndex( k, j, i ), value );
+   Array< Value, Device, Index > :: setElement( getElementIndex( k, j, i ), value );
 }
 
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
 __cuda_callable__
-Element& MultiArray< 3, Element, Device, Index > :: operator()( const Index k,
+Value& MultiArray< 3, Value, Device, Index > :: operator()( const Index k,
                                                                         const Index j,
                                                                         const Index i )
 {
-   return Array< Element, Device, Index > :: operator[]( getElementIndex( k, j, i ) );
+   return Array< Value, Device, Index > :: operator[]( getElementIndex( k, j, i ) );
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
 __cuda_callable__
-const Element& MultiArray< 3, Element, Device, Index > :: operator()( const Index k,
+const Value& MultiArray< 3, Value, Device, Index > :: operator()( const Index k,
                                                                                const Index j,
                                                                                const Index i ) const
 {
-   return Array< Element, Device, Index > :: operator[]( getElementIndex( k, j, i ) );
+   return Array< Value, Device, Index > :: operator[]( getElementIndex( k, j, i ) );
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
    template< typename MultiArrayT >
-bool MultiArray< 3, Element, Device, Index > :: operator == ( const MultiArrayT& array ) const
+bool MultiArray< 3, Value, Device, Index > :: operator == ( const MultiArrayT& array ) const
 {
    // TODO: Static assert on dimensions
    TNL_ASSERT( this->getDimensions() == array. getDimensions(),
               std::cerr << "You are attempting to compare two arrays with different dimensions." << std::endl
                    << "First array dimensions are ( " << this->getDimensions() << " )" << std::endl
                    << "Second array dimensions are ( " << array. getDimensions() << " )" << std::endl; );
-   return Array< Element, Device, Index > :: operator == ( array );
+   return Array< Value, Device, Index > :: operator == ( array );
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
    template< typename MultiArrayT >
-bool MultiArray< 3, Element, Device, Index > :: operator != ( const MultiArrayT& array ) const
+bool MultiArray< 3, Value, Device, Index > :: operator != ( const MultiArrayT& array ) const
 {
    return ! ( (* this ) == array );
 }
 
-template< typename Element, typename Device, typename Index >
-MultiArray< 3, Element, Device, Index >&
-   MultiArray< 3, Element, Device, Index > :: operator = ( const MultiArray< 3, Element, Device, Index >& array )
+template< typename Value, typename Device, typename Index >
+MultiArray< 3, Value, Device, Index >&
+   MultiArray< 3, Value, Device, Index > :: operator = ( const MultiArray< 3, Value, Device, Index >& array )
 {
    // TODO: Static assert on dimensions
    TNL_ASSERT( this->getDimensions() == array. getDimensions(),
               std::cerr << "You are attempting to assign two arrays with different dimensions." << std::endl
                    << "First array dimensions are ( " << this->getDimensions() << " )" << std::endl
                    << "Second array dimensions are ( " << array. getDimensions() << " )" << std::endl; );
-   Array< Element, Device, Index > :: operator = ( array );
+   Array< Value, Device, Index > :: operator = ( array );
    return ( *this );
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
    template< typename MultiArrayT >
-MultiArray< 3, Element, Device, Index >&
-   MultiArray< 3, Element, Device, Index > :: operator = ( const MultiArrayT& array )
+MultiArray< 3, Value, Device, Index >&
+   MultiArray< 3, Value, Device, Index > :: operator = ( const MultiArrayT& array )
 {
    // TODO: Static assert on dimensions
    TNL_ASSERT( this->getDimensions() == array. getDimensions(),
               std::cerr << "You are attempting to assign two arrays with different dimensions." << std::endl
                    << "First array dimensions are ( " << this->getDimensions() << " )" << std::endl
                    << "Second array dimensions are ( " << array. getDimensions() << " )" << std::endl; );
-   Array< Element, Device, Index > :: operator = ( array );
+   Array< Value, Device, Index > :: operator = ( array );
    return ( *this );
 }
 
-template< typename Element, typename Device, typename Index >
-bool MultiArray< 3, Element, Device, Index > :: save( File& file ) const
+template< typename Value, typename Device, typename Index >
+bool MultiArray< 3, Value, Device, Index > :: save( File& file ) const
 {
-   if( ! Array< Element, Device, Index > :: save( file ) )
+   if( ! Array< Value, Device, Index > :: save( file ) )
    {
       std::cerr << "I was not able to write the Array of MultiArray." << std::endl;
       return false;
@@ -233,10 +233,10 @@ bool MultiArray< 3, Element, Device, Index > :: save( File& file ) const
    return true;
 }
 
-template< typename Element, typename Device, typename Index >
-bool MultiArray< 3, Element, Device, Index > :: load( File& file )
+template< typename Value, typename Device, typename Index >
+bool MultiArray< 3, Value, Device, Index > :: load( File& file )
 {
-   if( ! Array< Element, Device, Index > :: load( file ) )
+   if( ! Array< Value, Device, Index > :: load( file ) )
    {
       std::cerr << "I was not able to read the Array of MultiArray." << std::endl;
       return false;
@@ -249,8 +249,8 @@ bool MultiArray< 3, Element, Device, Index > :: load( File& file )
    return true;
 }
 
-template< typename Element, typename Device, typename Index >
-std::ostream& operator << ( std::ostream& str, const MultiArray< 3, Element, Device, Index >& array )
+template< typename Value, typename Device, typename Index >
+std::ostream& operator << ( std::ostream& str, const MultiArray< 3, Value, Device, Index >& array )
 {
    for( Index k = 0; k < array. getDimensions()[ 2 ]; k ++ )
    {
diff --git a/src/TNL/Containers/MultiArray4D_impl.h b/src/TNL/Containers/MultiArray4D_impl.h
index ec034b3d21b6b8de53fa3326c0f240a5a59a8884..2b35c1caaf5180645ca1999f69b08a6458cfe4c3 100644
--- a/src/TNL/Containers/MultiArray4D_impl.h
+++ b/src/TNL/Containers/MultiArray4D_impl.h
@@ -14,18 +14,18 @@ namespace TNL {
 namespace Containers {   
    
 
-template< typename Element, typename Device, typename Index >
-MultiArray< 4, Element, Device, Index > :: MultiArray()
+template< typename Value, typename Device, typename Index >
+MultiArray< 4, Value, Device, Index > :: MultiArray()
 {
 }
 
-template< typename Element, typename Device, typename Index >
-String MultiArray< 4, Element, Device, Index > :: getType()
+template< typename Value, typename Device, typename Index >
+String MultiArray< 4, Value, Device, Index > :: getType()
 {
    return String( "Containers::MultiArray< ") +
           String( Dimension ) +
           String( ", " ) +
-          String( TNL::getType< Element >() ) +
+          String( TNL::getType< Value >() ) +
           String( ", " ) +
           String( Device :: getDeviceType() ) +
           String( ", " ) +
@@ -33,32 +33,32 @@ String MultiArray< 4, Element, Device, Index > :: getType()
           String( " >" );
 }
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-String MultiArray< 4, Element, Device, Index > :: getTypeVirtual() const
+String MultiArray< 4, Value, Device, Index > :: getTypeVirtual() const
 {
    return this->getType();
 };
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-String MultiArray< 4, Element, Device, Index > :: getSerializationType()
+String MultiArray< 4, Value, Device, Index > :: getSerializationType()
 {
    return HostType::getType();
 };
 
-template< typename Element,
+template< typename Value,
           typename Device,
           typename Index >
-String MultiArray< 4, Element, Device, Index > :: getSerializationTypeVirtual() const
+String MultiArray< 4, Value, Device, Index > :: getSerializationTypeVirtual() const
 {
    return this->getSerializationType();
 };
 
-template< typename Element, typename Device, typename Index >
-void MultiArray< 4, Element, Device, Index > :: setDimensions( const Index lSize,
+template< typename Value, typename Device, typename Index >
+void MultiArray< 4, Value, Device, Index > :: setDimensions( const Index lSize,
                                                                        const Index kSize,
                                                                        const Index jSize,
                                                                        const Index iSize )
@@ -73,11 +73,11 @@ void MultiArray< 4, Element, Device, Index > :: setDimensions( const Index lSize
    dimensions[ 1 ] = jSize;
    dimensions[ 2 ] = kSize;
    dimensions[ 3 ] = lSize;
-   Array< Element, Device, Index > :: setSize( iSize * jSize * kSize * lSize );
+   Array< Value, Device, Index > :: setSize( iSize * jSize * kSize * lSize );
 }
 
-template< typename Element, typename Device, typename Index >
-void MultiArray< 4, Element, Device, Index > :: setDimensions( const Containers::StaticVector< 4, Index >& dimensions )
+template< typename Value, typename Device, typename Index >
+void MultiArray< 4, Value, Device, Index > :: setDimensions( const Containers::StaticVector< 4, Index >& dimensions )
 {
    TNL_ASSERT( dimensions[ 0 ] > 0 && dimensions[ 1 ] > 0 && dimensions[ 2 ] && dimensions[ 3 ] > 0,
               std::cerr << "dimensions = " << dimensions );
@@ -88,29 +88,29 @@ void MultiArray< 4, Element, Device, Index > :: setDimensions( const Containers:
    this->dimensions[ 1 ] = dimensions[ 2 ];
    this->dimensions[ 2 ] = dimensions[ 1 ];
    this->dimensions[ 3 ] = dimensions[ 0 ];
-   Array< Element, Device, Index > :: setSize( this->dimensions[ 3 ] *
+   Array< Value, Device, Index > :: setSize( this->dimensions[ 3 ] *
                                                this->dimensions[ 2 ] *
                                                this->dimensions[ 1 ] *
                                                this->dimensions[ 0 ] );
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
    template< typename MultiArrayT >
-void MultiArray< 4, Element, Device, Index > :: setLike( const MultiArrayT& multiArray )
+void MultiArray< 4, Value, Device, Index > :: setLike( const MultiArrayT& multiArray )
 {
    setDimensions( multiArray. getDimensions() );
 }
 
-template< typename Element, typename Device, typename Index >
-void MultiArray< 4, Element, Device, Index >::reset()
+template< typename Value, typename Device, typename Index >
+void MultiArray< 4, Value, Device, Index >::reset()
 {
    this->dimensions = Containers::StaticVector< 4, Index >( ( Index ) 0 );
-   Array< Element, Device, Index >::reset();
+   Array< Value, Device, Index >::reset();
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
 __cuda_callable__
-void MultiArray< 4, Element, Device, Index > :: getDimensions( Index& lSize,
+void MultiArray< 4, Value, Device, Index > :: getDimensions( Index& lSize,
                                                                        Index& kSize,
                                                                        Index& jSize,
                                                                        Index& iSize ) const
@@ -121,16 +121,16 @@ void MultiArray< 4, Element, Device, Index > :: getDimensions( Index& lSize,
    lSize = this->dimensions[ 3 ];
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
 __cuda_callable__
-const Containers::StaticVector< 4, Index >& MultiArray< 4, Element, Device, Index > :: getDimensions() const
+const Containers::StaticVector< 4, Index >& MultiArray< 4, Value, Device, Index > :: getDimensions() const
 {
    return this->dimensions;
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
 __cuda_callable__
-Index MultiArray< 4, Element, Device, Index > :: getElementIndex( const Index l,
+Index MultiArray< 4, Value, Device, Index > :: getElementIndex( const Index l,
                                                                      const Index k,
                                                                      const Index j,
                                                                      const Index i ) const
@@ -147,95 +147,95 @@ Index MultiArray< 4, Element, Device, Index > :: getElementIndex( const Index l,
    return ( ( l * this->dimensions[ 2 ] + k ) * this->dimensions[ 1 ]  + j ) * this->dimensions[ 0 ] + i;
 }
 
-template< typename Element, typename Device, typename Index >
-Element MultiArray< 4, Element, Device, Index > :: getElement( const Index l,
+template< typename Value, typename Device, typename Index >
+Value MultiArray< 4, Value, Device, Index > :: getElement( const Index l,
                                                                        const Index k,
                                                                        const Index j,
                                                                        const Index i ) const
 {
-   return Array< Element, Device, Index > :: getElement( getElementIndex( l, k, j, i ) );
+   return Array< Value, Device, Index > :: getElement( getElementIndex( l, k, j, i ) );
 }
 
-template< typename Element, typename Device, typename Index >
-void MultiArray< 4, Element, Device, Index > :: setElement( const Index l,
+template< typename Value, typename Device, typename Index >
+void MultiArray< 4, Value, Device, Index > :: setElement( const Index l,
                                                                     const Index k,
                                                                     const Index j,
-                                                                    const Index i, Element value )
+                                                                    const Index i, Value value )
 {
-   Array< Element, Device, Index > :: setElement( getElementIndex( l, k, j, i ), value );
+   Array< Value, Device, Index > :: setElement( getElementIndex( l, k, j, i ), value );
 }
 
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
 __cuda_callable__
-Element& MultiArray< 4, Element, Device, Index > :: operator()( const Index l,
+Value& MultiArray< 4, Value, Device, Index > :: operator()( const Index l,
                                                                         const Index k,
                                                                         const Index j,
                                                                         const Index i )
 {
-   return Array< Element, Device, Index > :: operator[]( getElementIndex( l, k, j, i ) );
+   return Array< Value, Device, Index > :: operator[]( getElementIndex( l, k, j, i ) );
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
 __cuda_callable__
-const Element& MultiArray< 4, Element, Device, Index > :: operator()( const Index l,
+const Value& MultiArray< 4, Value, Device, Index > :: operator()( const Index l,
                                                                                const Index k,
                                                                                const Index j,
                                                                                const Index i ) const
 {
-   return Array< Element, Device, Index > :: operator[]( getElementIndex( l, k, j, i ) );
+   return Array< Value, Device, Index > :: operator[]( getElementIndex( l, k, j, i ) );
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
    template< typename MultiArrayT >
-bool MultiArray< 4, Element, Device, Index > :: operator == ( const MultiArrayT& array ) const
+bool MultiArray< 4, Value, Device, Index > :: operator == ( const MultiArrayT& array ) const
 {
    // TODO: Static assert on dimensions
    TNL_ASSERT( this->getDimensions() == array. getDimensions(),
               std::cerr << "You are attempting to compare two arrays with different dimensions." << std::endl
                    << "First array dimensions are ( " << this->getDimensions() << " )" << std::endl
                    << "Second array dimensions are ( " << array. getDimensions() << " )" << std::endl; );
-   return Array< Element, Device, Index > :: operator == ( array );
+   return Array< Value, Device, Index > :: operator == ( array );
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
    template< typename MultiArrayT >
-bool MultiArray< 4, Element, Device, Index > :: operator != ( const MultiArrayT& array ) const
+bool MultiArray< 4, Value, Device, Index > :: operator != ( const MultiArrayT& array ) const
 {
    return ! ( (* this ) == array );
 }
 
-template< typename Element, typename Device, typename Index >
-MultiArray< 4, Element, Device, Index >&
-   MultiArray< 4, Element, Device, Index > :: operator = ( const MultiArray< 4, Element, Device, Index >& array )
+template< typename Value, typename Device, typename Index >
+MultiArray< 4, Value, Device, Index >&
+   MultiArray< 4, Value, Device, Index > :: operator = ( const MultiArray< 4, Value, Device, Index >& array )
 {
    // TODO: Static assert on dimensions
    TNL_ASSERT( this->getDimensions() == array. getDimensions(),
               std::cerr << "You are attempting to assign two arrays with different dimensions." << std::endl
                    << "First array dimensions are ( " << this->getDimensions() << " )" << std::endl
                    << "Second array dimensions are ( " << array. getDimensions() << " )" << std::endl; );
-   Array< Element, Device, Index > :: operator = ( array );
+   Array< Value, Device, Index > :: operator = ( array );
    return ( *this );
 }
 
-template< typename Element, typename Device, typename Index >
+template< typename Value, typename Device, typename Index >
    template< typename MultiArrayT >
-MultiArray< 4, Element, Device, Index >&
-   MultiArray< 4, Element, Device, Index > :: operator = ( const MultiArrayT& array )
+MultiArray< 4, Value, Device, Index >&
+   MultiArray< 4, Value, Device, Index > :: operator = ( const MultiArrayT& array )
 {
    // TODO: Static assert on dimensions
    TNL_ASSERT( this->getDimensions() == array. getDimensions(),
               std::cerr << "You are attempting to assign two arrays with different dimensions." << std::endl
                    << "First array dimensions are ( " << this->getDimensions() << " )" << std::endl
                    << "Second array dimensions are ( " << array. getDimensions() << " )" << std::endl; );
-   Array< Element, Device, Index > :: operator = ( array );
+   Array< Value, Device, Index > :: operator = ( array );
    return ( *this );
 }
 
-template< typename Element, typename Device, typename Index >
-bool MultiArray< 4, Element, Device, Index > :: save( File& file ) const
+template< typename Value, typename Device, typename Index >
+bool MultiArray< 4, Value, Device, Index > :: save( File& file ) const
 {
-   if( ! Array< Element, Device, Index > :: save( file ) )
+   if( ! Array< Value, Device, Index > :: save( file ) )
    {
       std::cerr << "I was not able to write the Array of MultiArray." << std::endl;
       return false;
@@ -248,10 +248,10 @@ bool MultiArray< 4, Element, Device, Index > :: save( File& file ) const
    return true;
 }
 
-template< typename Element, typename Device, typename Index >
-bool MultiArray< 4, Element, Device, Index > :: load( File& file )
+template< typename Value, typename Device, typename Index >
+bool MultiArray< 4, Value, Device, Index > :: load( File& file )
 {
-   if( ! Array< Element, Device, Index > :: load( file ) )
+   if( ! Array< Value, Device, Index > :: load( file ) )
    {
       std::cerr << "I was not able to read the Array of MultiArray." << std::endl;
       return false;
@@ -264,8 +264,8 @@ bool MultiArray< 4, Element, Device, Index > :: load( File& file )
    return true;
 }
 
-template< typename Element, typename Device, typename Index >
-std::ostream& operator << ( std::ostream& str, const MultiArray< 4, Element, Device, Index >& array )
+template< typename Value, typename Device, typename Index >
+std::ostream& operator << ( std::ostream& str, const MultiArray< 4, Value, Device, Index >& array )
 {
    for( Index l = 0; l < array. getDimensions()[ 3 ]; l ++ )
    {
diff --git a/src/TNL/Containers/MultiArray_impl.cpp b/src/TNL/Containers/MultiArray_impl.cpp
deleted file mode 100644
index 392c89cb8802233041f5f644e6b3275063fb1c0e..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/MultiArray_impl.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-/***************************************************************************
-                          MultiArray_impl.cpp  -  description
-                             -------------------
-    begin                : Jan 20, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/MultiArray.h>
-
-namespace TNL {
-namespace Containers {    
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 1, float,  Devices::Host, int >;
-#endif
-template class MultiArray< 1, double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 1, float,  Devices::Host, long int >;
-#endif
-template class MultiArray< 1, double, Devices::Host, long int >;
-#endif
-
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 2, float,  Devices::Host, int >;
-#endif
-template class MultiArray< 2, double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 2, float,  Devices::Host, long int >;
-#endif
-template class MultiArray< 2, double, Devices::Host, long int >;
-#endif
-
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 3, float,  Devices::Host, int >;
-#endif
-template class MultiArray< 3, double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 3, float,  Devices::Host, long int >;
-#endif
-template class MultiArray< 3, double, Devices::Host, long int >;
-#endif
-
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 4, float,  Devices::Host, int >;
-#endif
-template class MultiArray< 4, double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 4, float,  Devices::Host, long int >;
-#endif
-template class MultiArray< 4, double, Devices::Host, long int >;
-#endif
-
-#ifndef HAVE_CUDA
-
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 1, float,  Devices::Cuda, int >;
-#endif
-template class MultiArray< 1, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 1, float,  Devices::Cuda, long int >;
-#endif
-template class MultiArray< 1, double, Devices::Cuda, long int >;
-#endif
-
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 2, float,  Devices::Cuda, int >;
-#endif
-template class MultiArray< 2, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 2, float,  Devices::Cuda, long int >;
-#endif
-template class MultiArray< 2, double, Devices::Cuda, long int >;
-#endif
-
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 3, float,  Devices::Cuda, int >;
-#endif
-template class MultiArray< 3, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 3, float,  Devices::Cuda, long int >;
-#endif
-template class MultiArray< 3, double, Devices::Cuda, long int >;
-#endif
-
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 4, float,  Devices::Cuda, int >;
-#endif
-template class MultiArray< 4, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 4, float,  Devices::Cuda, long int >;
-#endif
-template class MultiArray< 4, double, Devices::Cuda, long int >;
-#endif
-
-#endif
-
-#endif
-
-} // namespace Containers
-} // namespace TNL
-
-
-
diff --git a/src/TNL/Containers/MultiArray_impl.cu b/src/TNL/Containers/MultiArray_impl.cu
deleted file mode 100644
index a2d20449e9e37fd81cd80bd06c10744d648d8111..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/MultiArray_impl.cu
+++ /dev/null
@@ -1,68 +0,0 @@
-/***************************************************************************
-                          MultiArray_impl.cu  -  description
-                             -------------------
-    begin                : Feb 4, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/MultiArray.h>
-
-namespace TNL {
-namespace Containers {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-#ifdef HAVE_CUDA
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 1, float,  Devices::Cuda, int >;
-#endif
-template class MultiArray< 1, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 1, float,  Devices::Cuda, long int >;
-#endif
-template class MultiArray< 1, double, Devices::Cuda, long int >;
-#endif
-
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 2, float,  Devices::Cuda, int >;
-#endif
-template class MultiArray< 2, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 2, float,  Devices::Cuda, long int >;
-#endif
-template class MultiArray< 2, double, Devices::Cuda, long int >;
-#endif
-
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 3, float,  Devices::Cuda, int >;
-#endif
-template class MultiArray< 3, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 3, float,  Devices::Cuda, long int >;
-#endif
-template class MultiArray< 3, double, Devices::Cuda, long int >;
-#endif
-
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 4, float,  Devices::Cuda, int >;
-#endif
-template class MultiArray< 4, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class MultiArray< 4, float,  Devices::Cuda, long int >;
-#endif
-template class MultiArray< 4, double, Devices::Cuda, long int >;
-#endif
-
-#endif
-
-#endif
-
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/MultiVector1D_impl.h b/src/TNL/Containers/MultiVector1D_impl.h
index eb0e3109f636fbe5c45f396dc63a9e3deb8d0561..e8f42c5391774360f6af595c6e47d2b329e7c1de 100644
--- a/src/TNL/Containers/MultiVector1D_impl.h
+++ b/src/TNL/Containers/MultiVector1D_impl.h
@@ -227,33 +227,5 @@ bool MultiVector< 1, Real, Device, Index > :: load( const String& fileName )
    return Object :: load( fileName );
 }
 
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiVector< 1, float,  Devices::Host, int >;
-#endif
-extern template class MultiVector< 1, double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiVector< 1, float,  Devices::Host, long int >;
-#endif
-extern template class MultiVector< 1, double, Devices::Host, long int >;
-#endif
-
-#ifdef HAVE_CUDA
-/*#ifdef INSTANTIATE_FLOAT
-extern template class MultiVector< 1, float,  Devices::Cuda, int >;
-#endif
-extern template class MultiVector< 1, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiVector< 1, float,  Devices::Cuda, long int >;
-#endif
-extern template class MultiVector< 1, double, Devices::Cuda, long int >;
-#endif*/
-#endif
-
-#endif
-
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/MultiVector2D_impl.h b/src/TNL/Containers/MultiVector2D_impl.h
index 7eb483e9ed122ecff6a6e4d67ce2d9d25d06fb15..2c39f22226a9ad5f070505f144e1811ac19e18ca 100644
--- a/src/TNL/Containers/MultiVector2D_impl.h
+++ b/src/TNL/Containers/MultiVector2D_impl.h
@@ -238,33 +238,5 @@ std::ostream& operator << ( std::ostream& str, const MultiVector< 2, Real, Devic
    return str;
 }
 
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiVector< 2, float,  Devices::Host, int >;
-#endif
-extern template class MultiVector< 2, double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiVector< 2, float,  Devices::Host, long int >;
-#endif
-extern template class MultiVector< 2, double, Devices::Host, long int >;
-#endif
-
-#ifdef HAVE_CUDA
-/*#ifdef INSTANTIATE_FLOAT
-extern template class MultiVector< 2, float,  Devices::Cuda, int >;
-#endif
-extern template class MultiVector< 2, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiVector< 2, float,  Devices::Cuda, long int >;
-#endif
-extern template class MultiVector< 2, double, Devices::Cuda, long int >;
-#endif*/
-#endif
-
-#endif
-
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/MultiVector3D_impl.h b/src/TNL/Containers/MultiVector3D_impl.h
index 24d92f0e262a32e77b8c375f07f13bb40742d5e5..cd4e543cec48c96464c2775d5ad971791f6ae221 100644
--- a/src/TNL/Containers/MultiVector3D_impl.h
+++ b/src/TNL/Containers/MultiVector3D_impl.h
@@ -262,33 +262,5 @@ bool MultiVector< 3, Real, Device, Index > :: load( const String& fileName )
    return Object :: load( fileName );
 }
 
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiVector< 3, float,  Devices::Host, int >;
-#endif
-extern template class MultiVector< 3, double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiVector< 3, float,  Devices::Host, long int >;
-#endif
-extern template class MultiVector< 3, double, Devices::Host, long int >;
-#endif
-
-#ifdef HAVE_CUDA
-/*#ifdef INSTANTIATE_FLOAT
-extern template class MultiVector< 3, float,  Devices::Cuda, int >;
-#endif
-extern template class MultiVector< 3, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiVector< 3, float,  Devices::Cuda, long int >;
-#endif
-extern template class MultiVector< 3, double, Devices::Cuda, long int >;
-#endif*/
-#endif
-
-#endif
-
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/MultiVector4D_impl.h b/src/TNL/Containers/MultiVector4D_impl.h
index f9c883371f53439e9921280a844cb6c21175ff19..6fe5a4a062eca65d83adfe10537b79d77af5f449 100644
--- a/src/TNL/Containers/MultiVector4D_impl.h
+++ b/src/TNL/Containers/MultiVector4D_impl.h
@@ -283,33 +283,5 @@ bool MultiVector< 4, Real, Device, Index > :: load( const String& fileName )
    return Object :: load( fileName );
 }
 
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiVector< 4, float,  Devices::Host, int >;
-#endif
-extern template class MultiVector< 4, double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiVector< 4, float,  Devices::Host, long int >;
-#endif
-extern template class MultiVector< 4, double, Devices::Host, long int >;
-#endif
-
-#ifdef HAVE_CUDA
-/*#ifdef INSTANTIATE_FLOAT
-extern template class MultiVector< 4, float,  Devices::Cuda, int >;
-#endif
-extern template class MultiVector< 4, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-extern template class MultiVector< 4, float,  Devices::Cuda, long int >;
-#endif
-extern template class MultiVector< 4, double, Devices::Cuda, long int >;
-#endif*/
-#endif
-
-#endif
-
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/MultiVector_impl.cpp b/src/TNL/Containers/MultiVector_impl.cpp
deleted file mode 100644
index 0f26bf4c255281389838ba49ebf45900c53d358d..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/MultiVector_impl.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-/***************************************************************************
-                          MultiVector_impl.cpp  -  description
-                             -------------------
-    begin                : Jan 21, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/MultiVector.h>
-
-namespace TNL {
-namespace Containers {    
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-#ifdef INSTANTIATE_FLOAT
-template class MultiVector< 1, float,  Devices::Host, int >;
-#endif
-template class MultiVector< 1, double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class MultiVector< 1, float,  Devices::Host, long int >;
-#endif
-template class MultiVector< 1, double, Devices::Host, long int >;
-#endif
-
-#ifdef INSTANTIATE_FLOAT
-template class MultiVector< 2, float,  Devices::Host, int >;
-#endif
-template class MultiVector< 2, double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class MultiVector< 2, float,  Devices::Host, long int >;
-#endif
-template class MultiVector< 2, double, Devices::Host, long int >;
-#endif
-
-#ifdef INSTANTIATE_FLOAT
-template class MultiVector< 3, float,  Devices::Host, int >;
-#endif
-template class MultiVector< 3, double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class MultiVector< 3, float,  Devices::Host, long int >;
-#endif
-template class MultiVector< 3, double, Devices::Host, long int >;
-#endif
-
-#ifdef INSTANTIATE_FLOAT
-template class MultiVector< 4, float,  Devices::Host, int >;
-#endif
-template class MultiVector< 4, double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class MultiVector< 4, float,  Devices::Host, long int >;
-#endif
-template class MultiVector< 4, double, Devices::Host, long int >;
-#endif
-
-#ifdef HAVE_CUDA
-
-#ifdef INSTANTIATE_FLOAT
-template class MultiVector< 1, float,  Devices::Cuda, int >;
-#endif
-template class MultiVector< 1, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class MultiVector< 1, float,  Devices::Cuda, long int >;
-#endif
-template class MultiVector< 1, double, Devices::Cuda, long int >;
-#endif
-#ifdef INSTANTIATE_FLOAT
-template class MultiVector< 2, float,  Devices::Cuda, int >;
-#endif
-template class MultiVector< 2, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class MultiVector< 2, float,  Devices::Cuda, long int >;
-#endif
-template class MultiVector< 2, double, Devices::Cuda, long int >;
-#endif
-#ifdef INSTANTIATE_FLOAT
-template class MultiVector< 3, float,  Devices::Cuda, int >;
-#endif
-template class MultiVector< 3, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class MultiVector< 3, float,  Devices::Cuda, long int >;
-#endif
-template class MultiVector< 3, double, Devices::Cuda, long int >;
-#endif
-#ifdef INSTANTIATE_FLOAT
-template class MultiVector< 4, float,  Devices::Cuda, int >;
-#endif
-template class MultiVector< 4, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class MultiVector< 4, float,  Devices::Cuda, long int >;
-#endif
-template class MultiVector< 4, double, Devices::Cuda, long int >;
-#endif
-
-#endif
-
-#endif
-} // namespace Containers
-} // namespace TNL
-
-
diff --git a/src/TNL/Containers/Multimaps/MultimapPermutationApplier.h b/src/TNL/Containers/Multimaps/MultimapPermutationApplier.h
index b459b660d9303a7d66c81dc3aa17a6df74a0466c..6ac890a7026676b811d530df03474206670ea460 100644
--- a/src/TNL/Containers/Multimaps/MultimapPermutationApplier.h
+++ b/src/TNL/Containers/Multimaps/MultimapPermutationApplier.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/DevicePointer.h>
+#include <TNL/Pointers/DevicePointer.h>
 #include <TNL/ParallelFor.h>
 
 namespace TNL {
@@ -43,8 +43,8 @@ void permuteMultimapKeys( Multimap& multimap, const PermutationVector& perm )
       destValues = srcValues;
    };
 
-   DevicePointer< Multimap > multimapPointer( multimap );
-   DevicePointer< Multimap > multimapCopyPointer( multimapCopy );
+   Pointers::DevicePointer< Multimap > multimapPointer( multimap );
+   Pointers::DevicePointer< Multimap > multimapCopyPointer( multimapCopy );
 
    ParallelFor< DeviceType >::exec( (IndexType) 0, multimap.getKeysRange(),
                                     kernel,
@@ -76,7 +76,7 @@ void permuteMultimapValues( Multimap& multimap, const PermutationVector& iperm )
          values[ v ] = iperm[ values[ v ] ];
    };
 
-   DevicePointer< Multimap > multimapPointer( multimap );
+   Pointers::DevicePointer< Multimap > multimapPointer( multimap );
    ParallelFor< DeviceType >::exec( (IndexType) 0, multimap.getKeysRange(),
                                     kernel,
                                     &multimapPointer.template modifyData< DeviceType >(),
diff --git a/src/TNL/Containers/SharedArray.h b/src/TNL/Containers/SharedArray.h
deleted file mode 100644
index 4feaf7862bd7fc35a17e35e69a6230466d0cdf49..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/SharedArray.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/***************************************************************************
-                          SharedArray.h  -  description
-                             -------------------
-    begin                : Nov 7, 2012
-    copyright            : (C) 2012 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once 
-
-#include <TNL/Object.h>
-#include <TNL/Devices/Cuda.h>
-
-// Forward declarations
-namespace TNL {
-   class File;
-
-namespace Devices {   
-   class Host;
-   class Cuda;
-}
-
-namespace Containers {   
-
-
-template< typename Element, typename Device, typename Index >
-class Array;
-
-template< int Size, typename Element >
-class StaticArray;
-
-template< typename Element,
-          typename Device = Devices::Host,
-          typename Index = int >
-class SharedArray : public Object
-{
-   public:
-
-   typedef Element ElementType;
-   typedef Device DeviceType;
-   typedef Index IndexType;
-   typedef SharedArray< Element, Devices::Host, Index > HostType;
-   typedef SharedArray< Element, Devices::Cuda, Index > CudaType;
-
-   #ifndef HAVE_MIC
-   __cuda_callable__
-   #endif
-   SharedArray();
-
-   #ifndef HAVE_MIC
-   __cuda_callable__
-   #endif
-   SharedArray( Element* _data,
-                   const Index _size );
-
-   #ifndef HAVE_MIC
-   __cuda_callable__
-   #endif
-   SharedArray( Array< Element, Device, Index >& array );
-
-   #ifndef HAVE_MIC
-   __cuda_callable__
-   #endif
-   SharedArray( SharedArray< Element, Device, Index >& array );
-
-   static String getType();
-
-   String getTypeVirtual() const;
-
-   static String getSerializationType();
-
-   virtual String getSerializationTypeVirtual() const;
-
-   __cuda_callable__
-   void bind( Element* _data,
-              const Index _size );
-
-   template< typename Array >
-   __cuda_callable__
-   void bind( Array& array,
-              IndexType index = 0,
-              IndexType size = 0 );
-
-   template< int Size >
-   __cuda_callable__
-   void bind( StaticArray< Size, Element >& array );
-
-   __cuda_callable__
-   void bind( SharedArray< Element, Device, Index >& array );
-
-   void swap( SharedArray< Element, Device, Index >& array );
-
-   void reset();
-
-   __cuda_callable__ Index getSize() const;
-
-   void setElement( const Index& i, const Element& x );
-
-   Element getElement( const Index& i ) const;
-
-   __cuda_callable__ Element& operator[] ( const Index& i );
-
-   __cuda_callable__ const Element& operator[] ( const Index& i ) const;
-
-   SharedArray< Element, Device, Index >& operator = ( const SharedArray< Element, Device, Index >& array );
-
-   template< typename Array >
-   SharedArray< Element, Device, Index >& operator = ( const Array& array );
-
-   template< typename Array >
-   bool operator == ( const Array& array ) const;
-
-   template< typename Array >
-   bool operator != ( const Array& array ) const;
-
-   void setValue( const Element& e );
-
-   __cuda_callable__ const Element* getData() const;
-
-   __cuda_callable__ Element* getData();
-
-
-   /*!
-    * Returns true if non-zero size is set.
-    */
-   operator bool() const;
-
-   //! This method measures data transfers done by this vector.
-   /*!
-    * Every time one touches this grid touches * size * sizeof( Real ) bytes are added
-    * to transfered bytes in tnlStatistics.
-    */
-   template< typename IndexType2 = Index >
-   void touch( IndexType2 touches = 1 ) const;
-
-   //! Method for saving the object to a file as a binary data.
-   bool save( File& file ) const;
-
-   bool save( const String& fileName ) const;
-
-   bool load( File& file );
-
-   bool load( const String& fileName );
-
-   protected:
-
-   //!Number of allocated elements
-   Index size;
-
-   //! Pointer to allocated data
-   Element* data;
-};
-
-
-template< typename Element, typename Device, typename Index >
-std::ostream& operator << ( std::ostream& str, const SharedArray< Element, Device, Index >& v );
-
-} // namespace Containers
-} // namespace TNL
-
-#include <TNL/Containers/SharedArray_impl.h>
diff --git a/src/TNL/Containers/SharedArray_impl.cpp b/src/TNL/Containers/SharedArray_impl.cpp
deleted file mode 100644
index fb12eda8f7180ed3a90571793dd3fafbc5c77a27..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/SharedArray_impl.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-/***************************************************************************
-                          SharedArray_impl.cpp  -  description
-                             -------------------
-    begin                : Mar 18, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/SharedArray.h>
-
-namespace TNL {
-namespace Containers {    
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-#ifdef INSTANTIATE_FLOAT
-template class SharedArray< float, Devices::Host, int >;
-#endif
-template class SharedArray< double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class SharedArray< long double, Devices::Host, int >;
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class SharedArray< float, Devices::Host, long int >;
-#endif
-template class SharedArray< double, Devices::Host, long int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class SharedArray< long double, Devices::Host, long int >;
-#endif
-#endif
-
-/*#ifdef HAVE_CUDA
-#ifdef INSTANTIATE_FLOAT
-template class SharedArray< float, Devices::Cuda, int >;
-#endif
-template class SharedArray< double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class SharedArray< long double, Devices::Cuda, int >;
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class SharedArray< float, Devices::Cuda, long int >;
-#endif
-template class SharedArray< double, Devices::Cuda, long int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class SharedArray< long double, Devices::Cuda, long int >;
-#endif
-#endif
-#endif*/
-
-#endif
-
-} // namespace Containers
-} // namespace TNL
-
-
diff --git a/src/TNL/Containers/SharedArray_impl.cu b/src/TNL/Containers/SharedArray_impl.cu
deleted file mode 100644
index 5a3c026aefaee1d8368e0fce2c815df7e7c73aea..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/SharedArray_impl.cu
+++ /dev/null
@@ -1,41 +0,0 @@
-/***************************************************************************
-                          SharedArray_impl.cu  -  description
-                             -------------------
-    begin                : Jan 20, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/SharedArray.h>
-
-namespace TNL {
-namespace Containers {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-#ifdef HAVE_CUDA
-#ifdef INSTANTIATE_FLOAT
-template class SharedArray< float, Devices::Cuda, int >;
-#endif
-template class SharedArray< double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template class SharedArray< long double, Devices::Cuda, int >;
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class SharedArray< float, Devices::Cuda, long int >;
-#endif
-template class SharedArray< double, Devices::Cuda, long int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template class SharedArray< long double, Devices::Cuda, long int >;
-#endif
-#endif
-#endif
-
-#endif
-
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/SharedArray_impl.h b/src/TNL/Containers/SharedArray_impl.h
deleted file mode 100644
index 44eb48a02177e01f4ed553fab33f5bfa6dd40100..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/SharedArray_impl.h
+++ /dev/null
@@ -1,470 +0,0 @@
-/***************************************************************************
-                          SharedArray.h  -  description
-                             -------------------
-    begin                : Nov 7, 2012
-    copyright            : (C) 2012 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <iostream>
-#include <TNL/File.h>
-#include <TNL/Containers/Array.h>
-#include <TNL/Containers/StaticArray.h>
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
-#include <TNL/Math.h>
-#include <TNL/param-types.h>
-
-namespace TNL {
-namespace Containers {   
-
-
-template< typename Element,
-          typename Device,
-          typename Index >
-#ifndef HAVE_MIC
-__cuda_callable__
-#endif
-SharedArray< Element, Device, Index >::SharedArray()
-: size( 0 ), data( 0 )
-{
-};
-
-template< typename Element,
-          typename Device,
-          typename Index >
-#ifndef HAVE_MIC
-__cuda_callable__
-#endif
-SharedArray< Element, Device, Index >::SharedArray( Element* _data,
-                                                          const Index _size )
-{
-   this->bind( _data, _size );
-}
-
-template< typename Element,
-          typename Device,
-          typename Index >
-#ifndef HAVE_MIC
-__cuda_callable__
-#endif
-SharedArray< Element, Device, Index >::SharedArray( Array< Element, Device, Index >& array )
-{
-   this->bind( array );
-}
-
-template< typename Element,
-          typename Device,
-          typename Index >
-#ifndef HAVE_MIC
-__cuda_callable__
-#endif
-SharedArray< Element, Device, Index >::SharedArray( SharedArray< Element, Device, Index >& array )
-{
-   this->bind( array );
-}
-
-template< typename Element,
-          typename Device,
-          typename Index >
-String SharedArray< Element, Device, Index > :: getType()
-{
-   return String( "Containers::SharedArray< " ) + ", " +
-                    TNL::getType< Element >() + ", " +
-                     Device::getDeviceType() + ", " +
-                    TNL::getType< Index >() + " >";
-};
-
-template< typename Element,
-           typename Device,
-           typename Index >
-String SharedArray< Element, Device, Index > :: getTypeVirtual() const
-{
-   return this->getType();
-};
-
-template< typename Element,
-           typename Device,
-           typename Index >
-String
-SharedArray< Element, Device, Index >::
-getSerializationType()
-{
-   return HostType::getType();
-};
-
-template< typename Element,
-           typename Device,
-           typename Index >
-String
-SharedArray< Element, Device, Index >::
-getSerializationTypeVirtual() const
-{
-   return this->getSerializationType();
-};
-
-template< typename Element,
-          typename Device,
-          typename Index >
-__cuda_callable__
-void SharedArray< Element, Device, Index > :: bind( Element* data,
-                                                       const Index size )
-{
-   TNL_ASSERT( size >= 0,
-              std::cerr << "You try to set size of SharedArray to negative value."
-                        << "New size: " << size << std::endl );
-   TNL_ASSERT( data != 0,
-              std::cerr << "You try to use null pointer to data for SharedArray." );
-
-   this->size = size;
-   this->data = data;
-};
-
-template< typename Element,
-          typename Device,
-          typename Index >
-   template< typename Array >
-__cuda_callable__
-void SharedArray< Element, Device, Index > :: bind( Array& array,
-                                                       IndexType index,
-                                                       IndexType size )
-{
-   //tnlStaticTNL_ASSERT( Array::DeviceType::DeviceType == DeviceType::DeviceType,
-   //                 "Attempt to bind arrays between different devices." );
-   // TODO: fix this - it does nto work with StaticArray
-   this->data = &( array. getData()[ index ] );
-   if( ! size )
-      this->size = array. getSize();
-   else
-      this->size = size;
- 
-};
-
-template< typename Element,
-          typename Device,
-          typename Index >
-   template< int Size >
-__cuda_callable__
-void SharedArray< Element, Device, Index >::bind( StaticArray< Size, Element >& array )
-{
-   this->size = Size;
-   this->data = array.getData();
-}
-
-template< typename Element,
-          typename Device,
-          typename Index >
-__cuda_callable__
-void SharedArray< Element, Device, Index > :: bind( SharedArray< Element, Device, Index >& array )
-{
-   this->size = array. getSize();
-   this->data = array. getData();
-};
-
-template< typename Element,
-          typename Device,
-          typename Index >
-void SharedArray< Element, Device, Index > :: swap( SharedArray< Element, Device, Index >& array )
-{
-   TNL::swap( this->size, array. size );
-   TNL::swap( this->data, array. data );
-};
-
-template< typename Element,
-          typename Device,
-          typename Index >
-void SharedArray< Element, Device, Index > :: reset()
-{
-   this->size = 0;
-   this->data = 0;
-};
-
-template< typename Element,
-          typename Device,
-          typename Index >
-__cuda_callable__
-Index SharedArray< Element, Device, Index > :: getSize() const
-{
-   return this->size;
-}
-
-template< typename Element,
-          typename Device,
-          typename Index >
-void SharedArray< Element, Device, Index > :: setElement( const Index& i, const Element& x )
-{
-   TNL_ASSERT( 0 <= i && i < this->getSize(),
-              std::cerr << "Wrong index for setElement method in SharedArray "
-                        << " index is " << i
-                        << " and array size is " << this->getSize() );
-   return Algorithms::ArrayOperations< Device >::setMemoryElement( & ( this->data[ i ] ), x );
-};
-
-template< typename Element,
-          typename Device,
-          typename Index >
-Element SharedArray< Element, Device, Index > :: getElement( const Index& i ) const
-{
-   TNL_ASSERT( 0 <= i && i < this->getSize(),
-              std::cerr << "Wrong index for getElement method in SharedArray "
-                        << " index is " << i
-                        << " and array size is " << this->getSize() );
-   return Algorithms::ArrayOperations< Device >::getMemoryElement( &( this->data[ i ] ) );
-};
-
-template< typename Element,
-          typename Device,
-          typename Index >
-__cuda_callable__
-Element& SharedArray< Element, Device, Index > :: operator[] ( const Index& i )
-{
-   TNL_ASSERT( 0 <= i && i < this->getSize(),
-              std::cerr << "Wrong index for operator[] in SharedArray "
-                        << " index is " << i
-                        << " and array size is " << this->getSize() );
-   return this->data[ i ];
-};
-
-template< typename Element,
-          typename Device,
-          typename Index >
-__cuda_callable__
-const Element& SharedArray< Element, Device, Index > :: operator[] ( const Index& i ) const
-{
-   TNL_ASSERT( 0 <= i && i < this->getSize(),
-              std::cerr << "Wrong index for operator[] in SharedArray "
-                        << " index is " << i
-                        << " and array size is " << this->getSize() );
-   return this->data[ i ];
-};
-
-template< typename Element,
-           typename Device,
-           typename Index >
-SharedArray< Element, Device, Index >&
-    SharedArray< Element, Device, Index > :: operator = ( const SharedArray< Element, Device, Index >& array )
-{
-   TNL_ASSERT( array. getSize() == this->getSize(),
-              std::cerr << "Source size: " << array. getSize() << std::endl
-                        << "Target size: " << this->getSize() << std::endl );
-   Algorithms::ArrayOperations< Device > ::
-      template copyMemory< Element,
-                           Element,
-                           Index >
-                          ( this->getData(),
-                            array. getData(),
-                            array. getSize() );
-   return ( *this );
-};
-
-template< typename Element,
-          typename Device,
-          typename Index >
-   template< typename Array >
-SharedArray< Element, Device, Index >& SharedArray< Element, Device, Index > :: operator = ( const Array& array )
-{
-   TNL_ASSERT( array. getSize() == this->getSize(),
-              std::cerr << "Source size: " << array. getSize() << std::endl
-                        << "Target size: " << this->getSize() << std::endl );
-   Algorithms::ArrayOperations< typename Array::DeviceType, Device >::
-      template copyMemory< Element,
-                           typename Array :: ElementType,
-                           typename Array :: IndexType >
-                         ( this->getData(),
-                           array. getData(),
-                           array. getSize() );
-   return ( *this );
-};
-
-template< typename Element,
-          typename Device,
-          typename Index >
-   template< typename Array >
-bool SharedArray< Element, Device, Index > :: operator == ( const Array& array ) const
-{
-   if( array. getSize() != this->getSize() )
-      return false;
-   return Algorithms::ArrayOperations< Device, typename Array::DeviceType >::
-      template compareMemory< typename Array :: ElementType,
-                              Element,
-                              typename Array :: IndexType >
-                            ( this->getData(),
-                              array. getData(),
-                              array. getSize() );
-}
-
-template< typename Element,
-          typename Device,
-          typename Index >
-   template< typename Array >
-bool SharedArray< Element, Device, Index > :: operator != ( const Array& array ) const
-{
-   return ! ( ( *this ) == array );
-}
-
-template< typename Element,
-          typename Device,
-          typename Index >
-void SharedArray< Element, Device, Index > :: setValue( const Element& e )
-{
-   TNL_ASSERT( this->size != 0, );
-   Algorithms::ArrayOperations< Device >::template setMemory< Element, Index >
-                              ( this->getData(), e, this->getSize() );
-
-}
-
-template< typename Element,
-          typename Device,
-          typename Index >
-__cuda_callable__ 
-const Element* SharedArray< Element, Device, Index > :: getData() const
-{
-   return this->data;
-}
-
-template< typename Element,
-          typename Device,
-          typename Index >
-__cuda_callable__ 
-Element* SharedArray< Element, Device, Index > :: getData()
-{
-   return this->data;
-}
-
-template< typename Element,
-          typename Device,
-          typename Index >
-SharedArray< Element, Device, Index > :: operator bool() const
-{
-   return data != 0;
-};
-
-
-template< typename Element,
-          typename Device,
-          typename Index >
-   template< typename IndexType2 >
-void SharedArray< Element, Device, Index > :: touch( IndexType2 touches ) const
-{
-   //TODO: implement
-};
-
-template< typename Element,
-          typename Device,
-          typename Index >
-bool SharedArray< Element, Device, Index > :: save( File& file ) const
-{
-   TNL_ASSERT( this->size != 0,
-              std::cerr << "You try to save empty array." << std::endl );
-   if( ! Object :: save( file ) )
-      return false;
-   if( ! file. write( &this->size ) )
-      return false;
-   if( ! file. write< Element, Device, Index >( this->data, this->size ) )
-   {
-      std::cerr << "I was not able to WRITE SharedArray with size " << this->getSize() << std::endl;
-      return false;
-   }
-   return true;
-};
-
-template< typename Element,
-          typename Device,
-          typename Index >
-bool SharedArray< Element, Device, Index > :: save( const String& fileName ) const
-{
-   return Object :: save( fileName );
-};
-
-template< typename Element,
-          typename Device,
-          typename Index >
-bool SharedArray< Element, Device, Index > :: load( File& file )
-{
-   if( ! Object :: load( file ) )
-      return false;
-   Index _size;
-   if( ! file. read( &_size, 1 ) )
-      return false;
-   if( _size != this->size )
-   {
-      std::cerr << "Error: The size " << _size << " of the data to be load is different from the " <<
-                   "allocated array. This is not possible in the shared array." << std::endl;
-      return false;
-   }
-   if( _size )
-   {
-      if( ! file. read< Element, Device, Index >( this->data, this->size ) )
-      {
-         std::cerr << "I was not able to READ SharedArray with size " << this->getSize() << std::endl;
-         return false;
-      }
-   }
-   return true;
-};
-
-template< typename Element,
-          typename Device,
-          typename Index >
-bool SharedArray< Element, Device, Index > :: load( const String& fileName )
-{
-   return Object :: load( fileName );
-};
-
-
-template< typename Element, typename Device, typename Index >
-std::ostream& operator << ( std::ostream& str, const SharedArray< Element, Device, Index >& v )
-{
-   str << "[ ";
-   if( v.getSize() > 0 )
-   {
-      str << v.getElement( 0 );
-      for( Index i = 1; i < v.getSize(); i++ )
-         str << ", " << v. getElement( i );
-   }
-   str << " ]";
-   return str;
-}
-
-#ifdef UNDEF //TEMPLATE_EXPLICIT_INSTANTIATION
-
-// TODO: this does not work with CUDA 5.5 - fix it later
-
-#ifdef INSTANTIATE_FLOAT
-extern template class SharedArray< float, Devices::Host, int >;
-#endif
-extern template class SharedArray< double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template class SharedArray< long double, Devices::Host, int >;
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-extern template class SharedArray< float, Devices::Host, long int >;
-#endif
-extern template class SharedArray< double, Devices::Host, long int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template class SharedArray< long double, Devices::Host, long int >;
-#endif
-#endif
-
-
-#ifdef HAVE_CUDA
-/*
-#ifdef INSTANTIATE_FLOAT
-extern template class SharedArray< float, Devices::Cuda, int >;
-#endif
-extern template class SharedArray< double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_FLOAT
-extern template class SharedArray< float, Devices::Cuda, long int >;
-#endif
-extern template class SharedArray< double, Devices::Cuda, long int >;*/
-#endif
-
-#endif
-
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/SharedVector.h b/src/TNL/Containers/SharedVector.h
deleted file mode 100644
index 6526f1b4b7aac8241325f70a866a803b20e98158..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/SharedVector.h
+++ /dev/null
@@ -1,167 +0,0 @@
-/***************************************************************************
-                          SharedVector.h  -  description
-                             -------------------
-    begin                : Nov 7, 2012
-    copyright            : (C) 2012 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once 
-
-#include <TNL/Containers/SharedArray.h>
-#include <TNL/Containers/Vector.h>
-#include <TNL/Functions/Domain.h>
-
-namespace TNL {
-
-namespace Devices
-{
-   class Host;
-}
-
-namespace Containers {   
-
-
-
-template< typename Real = double,
-           typename Device= Devices::Host,
-           typename Index = int >
-class SharedVector : public Containers::SharedArray< Real, Device, Index >
-{
-   public:
-
-   typedef Real RealType;
-   typedef Device DeviceType;
-   typedef Index IndexType;
-   typedef SharedVector< Real, Devices::Host, Index > HostType;
-   typedef SharedVector< Real, Devices::Cuda, Index > CudaType;
-
-
-   #ifndef HAVE_MIC
-   __cuda_callable__
-   #endif
-   SharedVector();
-
-   #ifndef HAVE_MIC
-   __cuda_callable__
-   #endif
-   SharedVector( Real* data,
-                    const Index size );
-
-   #ifndef HAVE_MIC
-   __cuda_callable__
-   #endif
-   SharedVector( Vector< Real, Device, Index >& vector );
-
-   #ifndef HAVE_MIC
-   __cuda_callable__
-   #endif
-   SharedVector( SharedVector< Real, Device, Index >& vector );
-
-   static String getType();
-
-   String getTypeVirtual() const;
-
-   static String getSerializationType();
-
-   virtual String getSerializationTypeVirtual() const;
-
-   void addElement( const IndexType i,
-                    const RealType& value );
-
-   void addElement( const IndexType i,
-                    const RealType& value,
-                    const RealType& thisElementMultiplicator );
-
-   SharedVector< Real, Device, Index >& operator = ( const SharedVector< Real, Device, Index >& array );
-
-   template< typename Vector >
-   SharedVector< Real, Device, Index >& operator = ( const Vector& array );
-
-   template< typename Vector >
-   bool operator == ( const Vector& array ) const;
-
-   template< typename Vector >
-   bool operator != ( const Vector& array ) const;
-
-   template< typename Vector >
-   SharedVector< Real, Device, Index >& operator -= ( const Vector& vector );
-
-   template< typename Vector >
-   SharedVector< Real, Device, Index >& operator += ( const Vector& vector );
- 
-   SharedVector< Real, Device, Index >& operator *= ( const RealType& c );
- 
-   SharedVector< Real, Device, Index >& operator /= ( const RealType& c );
-
-   //bool save( File& file ) const;
-
-   //bool save( const String& fileName ) const;
-
-   Real max() const;
-
-   Real min() const;
-
-   Real absMax() const;
-
-   Real absMin() const;
-
-   Real lpNorm( const Real& p ) const;
-
-   Real sum() const;
-
-   template< typename Vector >
-   Real differenceMax( const Vector& v ) const;
-
-   template< typename Vector >
-   Real differenceMin( const Vector& v ) const;
-
-   template< typename Vector >
-   Real differenceAbsMax( const Vector& v ) const;
-
-   template< typename Vector >
-   Real differenceAbsMin( const Vector& v ) const;
-
-   template< typename Vector >
-   Real differenceLpNorm( const Vector& v, const Real& p ) const;
-
-   template< typename Vector >
-   Real differenceSum( const Vector& v ) const;
-
-   void scalarMultiplication( const Real& alpha );
-
-   //! Computes scalar dot product
-   template< typename Vector >
-   Real scalarProduct( const Vector& v );
-
-   //! Computes Y = alpha * X + Y.
-   template< typename Vector >
-   void addVector( const Vector& x,
-                   const Real& alpha = 1.0,
-                   const Real& thisMultiplicator = 1.0 );
-
-   //! Computes this = thisMultiplicator * this + multiplicator1 * v1 + multiplicator2 * v2.
-   template< typename Vector >
-   void addVectors( const Vector& v1,
-                    const Real& multiplicator1,
-                    const Vector& v2,
-                    const Real& multiplicator2,
-                    const Real& thisMultiplicator = 1.0 );
-
-   void computePrefixSum();
-
-   void computePrefixSum( const IndexType begin, const IndexType end );
-
-   void computeExclusivePrefixSum();
-
-   void computeExclusivePrefixSum( const IndexType begin, const IndexType end );
-
-};
-
-} // namespace Containers
-} // namespace TNL
-
-#include <TNL/Containers/SharedVector_impl.h>
-
diff --git a/src/TNL/Containers/SharedVector_impl.cpp b/src/TNL/Containers/SharedVector_impl.cpp
deleted file mode 100644
index cb9915a8378b1e1aabcbe4a0969e358f0aa8dd05..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/SharedVector_impl.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/***************************************************************************
-                          SharedVector_impl.cpp  -  description
-                             -------------------
-    begin                : Jan 20, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/SharedVector.h>
-
-namespace TNL {
-namespace Containers {    
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-#ifdef INSTANTIATE_FLOAT
-template class SharedVector< float, Devices::Host, int >;
-#endif
-template class SharedVector< double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class SharedVector< long double, Devices::Host, int >;
-#endif
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class SharedVector< float, Devices::Host, long int >;
-#endif
-template class SharedVector< double, Devices::Host, long int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class SharedVector< long double, Devices::Host, long int >;
-#endif
-#endif
-
-#ifdef HAVE_CUDA
-#ifdef INSTANTIATE_FLOAT
-template class SharedVector< float, Devices::Cuda, int >;
-#endif
-template class SharedVector< double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class SharedVector< long double, Devices::Cuda, int >;
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class SharedVector< float, Devices::Cuda, long int >;
-#endif
-template class SharedVector< double, Devices::Cuda, long int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class SharedVector< long double, Devices::Cuda, long int >;
-#endif
-#endif
-
-#endif
-
-#endif
-
-} // namespace Containers
-} // namespace TNL
-
diff --git a/src/TNL/Containers/SharedVector_impl.h b/src/TNL/Containers/SharedVector_impl.h
deleted file mode 100644
index 446612f133408a6560b2e89dcaf60e4d35259ed6..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/SharedVector_impl.h
+++ /dev/null
@@ -1,425 +0,0 @@
-/***************************************************************************
-                          SharedVector.h  -  description
-                             -------------------
-    begin                : Nov 8, 2012
-    copyright            : (C) 2012 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <TNL/Containers/SharedVector.h>
-#include <TNL/Containers/Algorithms/VectorOperations.h>
-
-namespace TNL {
-namespace Containers {   
-
-template< typename Real,
-          typename Device,
-          typename Index >
-#ifndef HAVE_MIC
-__cuda_callable__
-#endif
-SharedVector< Real, Device, Index >::SharedVector()
-{
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-#ifndef HAVE_MIC
-__cuda_callable__
-#endif
-SharedVector< Real, Device, Index >::SharedVector( Real* data,
-                                                         const Index size )
-: Containers::SharedArray< Real, Device, Index >( data, size )
-{
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-#ifndef HAVE_MIC
-__cuda_callable__
-#endif
-SharedVector< Real, Device, Index >::SharedVector( Vector< Real, Device, Index >& vector )
-: Containers::SharedArray< Real, Device, Index >( vector )
-{
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-#ifndef HAVE_MIC
-__cuda_callable__
-#endif
-SharedVector< Real, Device, Index >::SharedVector( SharedVector< Real, Device, Index >& vector )
-: Containers::SharedArray< Real, Device, Index >( vector )
-{
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String SharedVector< Real, Device, Index > :: getType()
-{
-   return String( "Containers::SharedVector< " ) +
-                    TNL::getType< Real >() + ", " +
-                     Device :: getDeviceType() + ", " +
-                    TNL::getType< Index >() + " >";
-};
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String SharedVector< Real, Device, Index > :: getTypeVirtual() const
-{
-   return this->getType();
-};
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String SharedVector< Real, Device, Index > :: getSerializationType()
-{
-   return Vector< Real, Devices::Host, Index >::getType();
-};
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String SharedVector< Real, Device, Index > :: getSerializationTypeVirtual() const
-{
-   return this->getSerializationType();
-};
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void SharedVector< Real, Device, Index >::addElement( const IndexType i,
-                                                         const RealType& value )
-{
-   Algorithms::VectorOperations< Device >::addElement( *this, i, value );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void SharedVector< Real, Device, Index >::addElement( const IndexType i,
-                                                         const RealType& value,
-                                                         const RealType& thisElementMultiplicator )
-{
-   Algorithms::VectorOperations< Device >::addElement( *this, i, value, thisElementMultiplicator );
-}
-
-template< typename Real,
-           typename Device,
-           typename Index >
-SharedVector< Real, Device, Index >&
-   SharedVector< Real, Device, Index > :: operator = ( const SharedVector< Real, Device, Index >& vector )
-{
-   Containers::SharedArray< Real, Device, Index > :: operator = ( vector );
-   return ( *this );
-};
-
-template< typename Real,
-           typename Device,
-           typename Index >
-   template< typename Vector >
-SharedVector< Real, Device, Index >&
-   SharedVector< Real, Device, Index > :: operator = ( const Vector& vector )
-{
-   Containers::SharedArray< Real, Device, Index > :: operator = ( vector );
-   return ( *this );
-};
-
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename Vector >
-bool SharedVector< Real, Device, Index > :: operator == ( const Vector& vector ) const
-{
-   return Containers::SharedArray< Real, Device, Index > :: operator == ( vector );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename Vector >
-bool SharedVector< Real, Device, Index > :: operator != ( const Vector& vector ) const
-{
-   return Containers::SharedArray< Real, Device, Index > :: operator != ( vector );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename Vector >
-SharedVector< Real, Device, Index >& SharedVector< Real, Device, Index > :: operator -= ( const Vector& vector )
-{
-   this->addVector( vector, -1.0 );
-   return ( *this );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename Vector >
-SharedVector< Real, Device, Index >& SharedVector< Real, Device, Index > :: operator += ( const Vector& vector )
-{
-   this->addVector( vector );
-   return ( *this );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-SharedVector< Real, Device, Index >& SharedVector< Real, Device, Index > :: operator *= ( const RealType& c )
-{
-   Algorithms::VectorOperations< Device >::vectorScalarMultiplication( *this, c );
-   return *this;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-SharedVector< Real, Device, Index >& SharedVector< Real, Device, Index > :: operator /= ( const RealType& c )
-{
-   Algorithms::VectorOperations< Device >::vectorScalarMultiplication( *this, 1.0/ c );
-   return *this;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-Real SharedVector< Real, Device, Index > :: max() const
-{
-   return Algorithms::VectorOperations< Device > :: getVectorMax( *this );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-Real SharedVector< Real, Device, Index > :: min() const
-{
-   return Algorithms::VectorOperations< Device > :: getVectorMin( *this );
-}
-
-
-template< typename Real,
-          typename Device,
-          typename Index >
-Real SharedVector< Real, Device, Index > :: absMax() const
-{
-   return Algorithms::VectorOperations< Device > :: getVectorAbsMax( *this );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-Real SharedVector< Real, Device, Index > :: absMin() const
-{
-   return Algorithms::VectorOperations< Device > :: getVectorAbsMin( *this );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-Real SharedVector< Real, Device, Index > :: lpNorm( const Real& p ) const
-{
-   return Algorithms::VectorOperations< Device > :: getVectorLpNorm( *this, p );
-}
-
-
-template< typename Real,
-          typename Device,
-          typename Index >
-Real SharedVector< Real, Device, Index > :: sum() const
-{
-   return Algorithms::VectorOperations< Device > :: getVectorSum( *this );
-}
-
-
-template< typename Real,
-          typename Device,
-          typename Index >
-template< typename Vector >
-Real SharedVector< Real, Device, Index > :: differenceMax( const Vector& v ) const
-{
-   return Algorithms::VectorOperations< Device > :: getVectorDifferenceMax( *this, v );
-}
-
-
-template< typename Real,
-          typename Device,
-          typename Index >
-template< typename Vector >
-Real SharedVector< Real, Device, Index > :: differenceMin( const Vector& v ) const
-{
-   return Algorithms::VectorOperations< Device > :: getVectorDifferenceMin( *this, v );
-}
-
-
-template< typename Real,
-          typename Device,
-          typename Index >
-template< typename Vector >
-Real SharedVector< Real, Device, Index > :: differenceAbsMax( const Vector& v ) const
-{
-   return Algorithms::VectorOperations< Device > :: getVectorDifferenceAbsMax( *this, v );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-template< typename Vector >
-Real SharedVector< Real, Device, Index > :: differenceAbsMin( const Vector& v ) const
-{
-   return Algorithms::VectorOperations< Device > :: getVectorDifferenceAbsMin( *this, v );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-template< typename Vector >
-Real SharedVector< Real, Device, Index > :: differenceLpNorm( const Vector& v, const Real& p ) const
-{
-   return Algorithms::VectorOperations< Device > :: getVectorDifferenceLpNorm( *this, v, p );
-}
-
-
-template< typename Real,
-          typename Device,
-          typename Index >
-template< typename Vector >
-Real SharedVector< Real, Device, Index > :: differenceSum( const Vector& v ) const
-{
-   return Algorithms::VectorOperations< Device > :: getVectorDifferenceSum( *this, v );
-}
-
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void SharedVector< Real, Device, Index > :: scalarMultiplication( const Real& alpha )
-{
-   Algorithms::VectorOperations< Device > :: vectorScalarMultiplication( *this, alpha );
-}
-
-
-template< typename Real,
-          typename Device,
-          typename Index >
-template< typename Vector >
-Real SharedVector< Real, Device, Index > :: scalarProduct( const Vector& v )
-{
-   return Algorithms::VectorOperations< Device > :: getScalarProduct( *this, v );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-template< typename Vector >
-void SharedVector< Real, Device, Index > :: addVector( const Vector& x,
-                                                          const Real& alpha,
-                                                          const Real& thisMultiplicator )
-{
-   Algorithms::VectorOperations< Device > :: addVector( *this, x, alpha, thisMultiplicator );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename Vector >
-void
-SharedVector< Real, Device, Index >::
-addVectors( const Vector& v1,
-            const Real& multiplicator1,
-            const Vector& v2,
-            const Real& multiplicator2,
-            const Real& thisMultiplicator )
-{
-   Algorithms::VectorOperations< Device >::addVectors( *this, v1, multiplicator1, v2, multiplicator2, thisMultiplicator );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void SharedVector< Real, Device, Index > :: computePrefixSum()
-{
-   Algorithms::VectorOperations< Device >::computePrefixSum( *this, 0, this->getSize() );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void SharedVector< Real, Device, Index > :: computePrefixSum( const IndexType begin,
-                                                                 const IndexType end )
-{
-   Algorithms::VectorOperations< Device >::computePrefixSum( *this, begin, end );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void SharedVector< Real, Device, Index > :: computeExclusivePrefixSum()
-{
-   Algorithms::VectorOperations< Device >::computeExclusivePrefixSum( *this, 0, this->getSize() );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void SharedVector< Real, Device, Index > :: computeExclusivePrefixSum( const IndexType begin,
-                                                                          const IndexType end )
-{
-   Algorithms::VectorOperations< Device >::computeExclusivePrefixSum( *this, begin, end );
-}
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-#ifdef INSTANTIATE_FLOAT
-extern template class SharedVector< float, Devices::Host, int >;
-#endif
-extern template class SharedVector< double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template class SharedVector< long double, Devices::Host, int >;
-#endif
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-extern template class SharedVector< float, Devices::Host, long int >;
-#endif
-extern template class SharedVector< double, Devices::Host, long int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template class SharedVector< long double, Devices::Host, long int >;
-#endif
-#endif
-
-#ifdef HAVE_CUDA
-// TODO: fix this - it does not work with CUDA 5.5
-/*
-#ifdef INSTANTIATE_FLOAT
-extern template class SharedVector< float, Devices::Cuda, int >;
-#endif
-extern template class SharedVector< double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template class SharedVector< long double, Devices::Cuda, int >;
-#endif
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-extern template class SharedVector< float, Devices::Cuda, long int >;
-#endif
-extern template class SharedVector< double, Devices::Cuda, long int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template class SharedVector< long double, Devices::Cuda, long int >;
-#endif
- #endif
- */
-#endif
-
-#endif
-
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/StaticArray.h b/src/TNL/Containers/StaticArray.h
index 32f4528dbf5aaedf1c9561c849e42747bd5f1e4a..2766fe5221a900442025dd7679b57c2eccae201a 100644
--- a/src/TNL/Containers/StaticArray.h
+++ b/src/TNL/Containers/StaticArray.h
@@ -16,11 +16,11 @@
 namespace TNL {
 namespace Containers {   
 
-template< int Size, typename Element >
+template< int Size, typename Value >
 class StaticArray
 {
    public:
-   typedef Element ElementType;
+   typedef Value ValueType;
    typedef int     IndexType;
    enum { size = Size };
 
@@ -31,15 +31,15 @@ class StaticArray
    // reference: https://stackoverflow.com/q/4610503
    template< typename _unused = void >
    __cuda_callable__
-   inline StaticArray( const Element v[ Size ] );
+   inline StaticArray( const Value v[ Size ] );
 
    //! This sets all vector components to v
    __cuda_callable__
-   inline StaticArray( const Element& v );
+   inline StaticArray( const Value& v );
 
    //! Copy constructor
    __cuda_callable__
-   inline StaticArray( const StaticArray< Size, Element >& v );
+   inline StaticArray( const StaticArray< Size, Value >& v );
 
    static String getType();
 
@@ -47,23 +47,23 @@ class StaticArray
    inline int getSize() const;
 
    __cuda_callable__
-   inline Element* getData();
+   inline Value* getData();
 
    __cuda_callable__
-   inline const Element* getData() const;
+   inline const Value* getData() const;
 
    __cuda_callable__
-   inline const Element& operator[]( int i ) const;
+   inline const Value& operator[]( int i ) const;
 
    __cuda_callable__
-   inline Element& operator[]( int i );
+   inline Value& operator[]( int i );
 
    __cuda_callable__
-   inline StaticArray< Size, Element >& operator = ( const StaticArray< Size, Element >& array );
+   inline StaticArray< Size, Value >& operator = ( const StaticArray< Size, Value >& array );
 
    template< typename Array >
    __cuda_callable__
-   inline StaticArray< Size, Element >& operator = ( const Array& array );
+   inline StaticArray< Size, Value >& operator = ( const Array& array );
 
    template< typename Array >
    __cuda_callable__
@@ -73,12 +73,12 @@ class StaticArray
    __cuda_callable__
    inline bool operator != ( const Array& array ) const;
  
-   template< typename OtherElement >
+   template< typename OtherValue >
    __cuda_callable__
-   operator StaticArray< Size, OtherElement >() const;
+   operator StaticArray< Size, OtherValue >() const;
 
    __cuda_callable__
-   inline void setValue( const ElementType& val );
+   inline void setValue( const ValueType& val );
 
    bool save( File& file ) const;
 
@@ -89,14 +89,14 @@ class StaticArray
    std::ostream& write( std::ostream& str, const char* separator = " " ) const;
 
    protected:
-   Element data[ Size ];
+   Value data[ Size ];
 };
 
-template< typename Element >
-class StaticArray< 1, Element >
+template< typename Value >
+class StaticArray< 1, Value >
 {
    public:
-   typedef Element ElementType;
+   typedef Value ValueType;
    typedef int     IndexType;
    enum { size = 1 };
 
@@ -107,14 +107,14 @@ class StaticArray< 1, Element >
    // reference: https://stackoverflow.com/q/4610503
    template< typename _unused = void >
    __cuda_callable__
-   inline StaticArray( const Element v[ size ] );
+   inline StaticArray( const Value v[ size ] );
 
    __cuda_callable__
-   inline StaticArray( const Element& v );
+   inline StaticArray( const Value& v );
 
    //! Copy constructor
    __cuda_callable__
-   inline StaticArray( const StaticArray< size, Element >& v );
+   inline StaticArray( const StaticArray< size, Value >& v );
 
    static String getType();
 
@@ -122,31 +122,31 @@ class StaticArray< 1, Element >
    inline int getSize() const;
 
    __cuda_callable__
-   inline Element* getData();
+   inline Value* getData();
 
    __cuda_callable__
-   inline const Element* getData() const;
+   inline const Value* getData() const;
 
    __cuda_callable__
-   inline const Element& operator[]( int i ) const;
+   inline const Value& operator[]( int i ) const;
 
    __cuda_callable__
-   inline Element& operator[]( int i );
+   inline Value& operator[]( int i );
 
    //! Returns the first coordinate
    __cuda_callable__
-   inline Element& x();
+   inline Value& x();
 
    //! Returns the first coordinate
    __cuda_callable__
-   inline const Element& x() const;
+   inline const Value& x() const;
 
    __cuda_callable__
-   inline StaticArray< 1, Element >& operator = ( const StaticArray< 1, Element >& array );
+   inline StaticArray< 1, Value >& operator = ( const StaticArray< 1, Value >& array );
 
    template< typename Array >
    __cuda_callable__
-   inline StaticArray< 1, Element >& operator = ( const Array& array );
+   inline StaticArray< 1, Value >& operator = ( const Array& array );
 
    template< typename Array >
    __cuda_callable__
@@ -156,13 +156,13 @@ class StaticArray< 1, Element >
    __cuda_callable__
    inline bool operator != ( const Array& array ) const;
  
-   template< typename OtherElement >
+   template< typename OtherValue >
    __cuda_callable__
-   operator StaticArray< 1, OtherElement >() const;
+   operator StaticArray< 1, OtherValue >() const;
 
    __cuda_callable__
    inline
-   void setValue( const ElementType& val );
+   void setValue( const ValueType& val );
 
    bool save( File& file ) const;
 
@@ -173,14 +173,14 @@ class StaticArray< 1, Element >
    std::ostream& write( std::ostream& str, const char* separator = " " ) const;
 
    protected:
-   Element data[ size ];
+   Value data[ size ];
 };
 
-template< typename Element >
-class StaticArray< 2, Element >
+template< typename Value >
+class StaticArray< 2, Value >
 {
    public:
-   typedef Element ElementType;
+   typedef Value ValueType;
    typedef int     IndexType;
    enum { size = 2 };
 
@@ -191,18 +191,18 @@ class StaticArray< 2, Element >
    // reference: https://stackoverflow.com/q/4610503
    template< typename _unused = void >
    __cuda_callable__
-   inline StaticArray( const Element v[ size ] );
+   inline StaticArray( const Value v[ size ] );
 
    //! This sets all vector components to v
    __cuda_callable__
-   inline StaticArray( const Element& v );
+   inline StaticArray( const Value& v );
 
    __cuda_callable__
-   inline StaticArray( const Element& v1, const Element& v2 );
+   inline StaticArray( const Value& v1, const Value& v2 );
 
    //! Copy constructor
    __cuda_callable__
-   inline StaticArray( const StaticArray< size, Element >& v );
+   inline StaticArray( const StaticArray< size, Value >& v );
 
    static String getType();
 
@@ -210,39 +210,39 @@ class StaticArray< 2, Element >
    inline int getSize() const;
 
    __cuda_callable__
-   inline Element* getData();
+   inline Value* getData();
 
    __cuda_callable__
-   inline const Element* getData() const;
+   inline const Value* getData() const;
 
    __cuda_callable__
-   inline const Element& operator[]( int i ) const;
+   inline const Value& operator[]( int i ) const;
 
    __cuda_callable__
-   inline Element& operator[]( int i );
+   inline Value& operator[]( int i );
 
    //! Returns the first coordinate
    __cuda_callable__
-   inline Element& x();
+   inline Value& x();
 
    //! Returns the first coordinate
    __cuda_callable__
-   inline const Element& x() const;
+   inline const Value& x() const;
 
    //! Returns the second coordinate
    __cuda_callable__
-   inline Element& y();
+   inline Value& y();
 
    //! Returns the second coordinate
    __cuda_callable__
-   inline const Element& y() const;
+   inline const Value& y() const;
 
    __cuda_callable__
-   inline StaticArray< 2, Element >& operator = ( const StaticArray< 2, Element >& array );
+   inline StaticArray< 2, Value >& operator = ( const StaticArray< 2, Value >& array );
 
    template< typename Array >
    __cuda_callable__
-   inline StaticArray< 2, Element >& operator = ( const Array& array );
+   inline StaticArray< 2, Value >& operator = ( const Array& array );
 
    template< typename Array >
    __cuda_callable__
@@ -252,12 +252,12 @@ class StaticArray< 2, Element >
    __cuda_callable__
    inline bool operator != ( const Array& array ) const;
  
-   template< typename OtherElement >
+   template< typename OtherValue >
    __cuda_callable__
-   operator StaticArray< 2, OtherElement >() const;
+   operator StaticArray< 2, OtherValue >() const;
  
    __cuda_callable__
-   inline void setValue( const ElementType& val );
+   inline void setValue( const ValueType& val );
 
    bool save( File& file ) const;
 
@@ -268,14 +268,14 @@ class StaticArray< 2, Element >
    std::ostream& write( std::ostream& str, const char* separator = " " ) const;
 
    protected:
-   Element data[ size ];
+   Value data[ size ];
 };
 
-template< typename Element >
-class StaticArray< 3, Element >
+template< typename Value >
+class StaticArray< 3, Value >
 {
    public:
-   typedef Element ElementType;
+   typedef Value ValueType;
    typedef int     IndexType;
    enum { size = 3 };
 
@@ -286,18 +286,18 @@ class StaticArray< 3, Element >
    // reference: https://stackoverflow.com/q/4610503
    template< typename _unused = void >
    __cuda_callable__
-   inline StaticArray( const Element v[ size ] );
+   inline StaticArray( const Value v[ size ] );
 
    //! This sets all vector components to v
    __cuda_callable__
-   inline StaticArray( const Element& v );
+   inline StaticArray( const Value& v );
 
    __cuda_callable__
-   inline StaticArray( const Element& v1, const Element& v2, const Element& v3 );
+   inline StaticArray( const Value& v1, const Value& v2, const Value& v3 );
 
    //! Copy constructor
    __cuda_callable__
-   inline StaticArray( const StaticArray< size, Element >& v );
+   inline StaticArray( const StaticArray< size, Value >& v );
 
    static String getType();
 
@@ -305,47 +305,47 @@ class StaticArray< 3, Element >
    inline int getSize() const;
 
    __cuda_callable__
-   inline Element* getData();
+   inline Value* getData();
 
    __cuda_callable__
-   inline const Element* getData() const;
+   inline const Value* getData() const;
 
    __cuda_callable__
-   inline const Element& operator[]( int i ) const;
+   inline const Value& operator[]( int i ) const;
 
    __cuda_callable__
-   inline Element& operator[]( int i );
+   inline Value& operator[]( int i );
 
    //! Returns the first coordinate
    __cuda_callable__
-   inline Element& x();
+   inline Value& x();
 
    //! Returns the first coordinate
    __cuda_callable__
-   inline const Element& x() const;
+   inline const Value& x() const;
 
    //! Returns the second coordinate
    __cuda_callable__
-   inline Element& y();
+   inline Value& y();
 
    //! Returns the second coordinate
    __cuda_callable__
-   inline const Element& y() const;
+   inline const Value& y() const;
 
    //! Returns the third coordinate
    __cuda_callable__
-   inline Element& z();
+   inline Value& z();
 
    //! Returns the third coordinate
    __cuda_callable__
-   inline const Element& z() const;
+   inline const Value& z() const;
 
    __cuda_callable__
-   inline StaticArray< 3, Element >& operator = ( const StaticArray< 3, Element >& array );
+   inline StaticArray< 3, Value >& operator = ( const StaticArray< 3, Value >& array );
 
    template< typename Array >
    __cuda_callable__
-   inline StaticArray< 3, Element >& operator = ( const Array& array );
+   inline StaticArray< 3, Value >& operator = ( const Array& array );
 
    template< typename Array >
    __cuda_callable__
@@ -355,12 +355,12 @@ class StaticArray< 3, Element >
    __cuda_callable__
    inline bool operator != ( const Array& array ) const;
  
-   template< typename OtherElement >
+   template< typename OtherValue >
    __cuda_callable__
-   operator StaticArray< 3, OtherElement >() const;
+   operator StaticArray< 3, OtherValue >() const;
 
    __cuda_callable__
-   inline void setValue( const ElementType& val );
+   inline void setValue( const ValueType& val );
 
    bool save( File& file ) const;
 
@@ -371,11 +371,11 @@ class StaticArray< 3, Element >
    std::ostream& write( std::ostream& str, const char* separator = " " ) const;
 
    protected:
-   Element data[ size ];
+   Value data[ size ];
 };
 
-template< int Size, typename Element >
-std::ostream& operator << ( std::ostream& str, const StaticArray< Size, Element >& a );
+template< int Size, typename Value >
+std::ostream& operator << ( std::ostream& str, const StaticArray< Size, Value >& a );
 
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/StaticArray1D_impl.h b/src/TNL/Containers/StaticArray1D_impl.h
index a23849cca338b609ad65a3c68d99d0c712ccf099..3fd54145053bffb24e2ab4219e694102853b3165 100644
--- a/src/TNL/Containers/StaticArray1D_impl.h
+++ b/src/TNL/Containers/StaticArray1D_impl.h
@@ -16,153 +16,153 @@
 namespace TNL {
 namespace Containers {   
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline StaticArray< 1, Element >::StaticArray()
+inline StaticArray< 1, Value >::StaticArray()
 {
 }
 
-template< typename Element >
+template< typename Value >
    template< typename _unused >
 __cuda_callable__
-inline StaticArray< 1, Element >::StaticArray( const Element v[ size ] )
+inline StaticArray< 1, Value >::StaticArray( const Value v[ size ] )
 {
    data[ 0 ] = v[ 0 ];
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline StaticArray< 1, Element >::StaticArray( const Element& v )
+inline StaticArray< 1, Value >::StaticArray( const Value& v )
 {
    data[ 0 ] = v;
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline StaticArray< 1, Element >::StaticArray( const StaticArray< size, Element >& v )
+inline StaticArray< 1, Value >::StaticArray( const StaticArray< size, Value >& v )
 {
    data[ 0 ] = v[ 0 ];
 }
 
-template< typename Element >
-String StaticArray< 1, Element >::getType()
+template< typename Value >
+String StaticArray< 1, Value >::getType()
 {
    return String( "Containers::StaticArray< " ) +
           String( size ) +
           String( ", " ) +
-          TNL::getType< Element >() +
+          TNL::getType< Value >() +
           String( " >" );
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline int StaticArray< 1, Element >::getSize() const
+inline int StaticArray< 1, Value >::getSize() const
 {
    return size;
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline Element* StaticArray< 1, Element >::getData()
+inline Value* StaticArray< 1, Value >::getData()
 {
    return data;
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline const Element* StaticArray< 1, Element >::getData() const
+inline const Value* StaticArray< 1, Value >::getData() const
 {
    return data;
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline const Element& StaticArray< 1, Element >::operator[]( int i ) const
+inline const Value& StaticArray< 1, Value >::operator[]( int i ) const
 {
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, size, "Element index is out of bounds." );
    return data[ i ];
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline Element& StaticArray< 1, Element >::operator[]( int i )
+inline Value& StaticArray< 1, Value >::operator[]( int i )
 {
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, size, "Element index is out of bounds." );
    return data[ i ];
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline Element& StaticArray< 1, Element >::x()
+inline Value& StaticArray< 1, Value >::x()
 {
    return data[ 0 ];
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline const Element& StaticArray< 1, Element >::x() const
+inline const Value& StaticArray< 1, Value >::x() const
 {
    return data[ 0 ];
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline StaticArray< 1, Element >& StaticArray< 1, Element >::operator = ( const StaticArray< 1, Element >& array )
+inline StaticArray< 1, Value >& StaticArray< 1, Value >::operator = ( const StaticArray< 1, Value >& array )
 {
    data[ 0 ] = array[ 0 ];
    return *this;
 }
 
-template< typename Element >
+template< typename Value >
    template< typename Array >
 __cuda_callable__
-inline StaticArray< 1, Element >& StaticArray< 1, Element >::operator = ( const Array& array )
+inline StaticArray< 1, Value >& StaticArray< 1, Value >::operator = ( const Array& array )
 {
    data[ 0 ] = array[ 0 ];
    return *this;
 }
 
-template< typename Element >
+template< typename Value >
    template< typename Array >
 __cuda_callable__
-inline bool StaticArray< 1, Element >::operator == ( const Array& array ) const
+inline bool StaticArray< 1, Value >::operator == ( const Array& array ) const
 {
    return( ( int ) size == ( int ) Array::size && data[ 0 ] == array[ 0 ] );
 }
 
-template< typename Element >
+template< typename Value >
    template< typename Array >
 __cuda_callable__
-inline bool StaticArray< 1, Element >::operator != ( const Array& array ) const
+inline bool StaticArray< 1, Value >::operator != ( const Array& array ) const
 {
    return ! this->operator == ( array );
 }
 
-template< typename Element >
-   template< typename OtherElement >
+template< typename Value >
+   template< typename OtherValue >
 __cuda_callable__
-StaticArray< 1, Element >::
-operator StaticArray< 1, OtherElement >() const
+StaticArray< 1, Value >::
+operator StaticArray< 1, OtherValue >() const
 {
-   StaticArray< 1, OtherElement > aux;
+   StaticArray< 1, OtherValue > aux;
    aux[ 0 ] = data[ 0 ];
    return aux;
 }
 
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline void StaticArray< 1, Element >::setValue( const ElementType& val )
+inline void StaticArray< 1, Value >::setValue( const ValueType& val )
 {
    data[ 0 ] = val;
 }
 
-template< typename Element >
-bool StaticArray< 1, Element >::save( File& file ) const
+template< typename Value >
+bool StaticArray< 1, Value >::save( File& file ) const
 {
-   if( ! file. write< Element, Devices::Host, int >( data, size ) )
+   if( ! file. write< Value, Devices::Host, int >( data, size ) )
    {
       std::cerr << "Unable to write " << getType() << "." << std::endl;
       return false;
@@ -170,8 +170,8 @@ bool StaticArray< 1, Element >::save( File& file ) const
    return true;
 }
 
-template< typename Element >
-bool StaticArray< 1, Element >::load( File& file)
+template< typename Value >
+bool StaticArray< 1, Value >::load( File& file)
 {
    if( ! file.read( data, size ) )
    {
@@ -181,38 +181,17 @@ bool StaticArray< 1, Element >::load( File& file)
    return true;
 }
 
-template< typename Element >
-void StaticArray< 1, Element >::sort()
+template< typename Value >
+void StaticArray< 1, Value >::sort()
 {
 }
 
-template< typename Element >
-std::ostream& StaticArray< 1, Element >::write( std::ostream& str, const char* separator ) const
+template< typename Value >
+std::ostream& StaticArray< 1, Value >::write( std::ostream& str, const char* separator ) const
 {
    str << data[ 0 ];
    return str;
 }
 
-#ifdef UNDEF //TEMPLATE_EXPLICIT_INSTANTIATION
-
-// TODO: it does not work with CUDA
-
-#ifndef HAVE_CUDA
-extern template class StaticArray< 1, char >;
-extern template class StaticArray< 1, int >;
-#ifdef INSTANTIATE_LONG_INT
-extern template class StaticArray< 1, long int >;
-#endif
-#ifdef INSTANTIATE_FLOAT
-extern template class StaticArray< 1, float >;
-#endif
-extern template class StaticArray< 1, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template class StaticArray< 1, long double >;
-#endif
-#endif
-
-#endif
-
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/StaticArray2D_impl.h b/src/TNL/Containers/StaticArray2D_impl.h
index 44ed90b0a6c42f5d9ef28cc2a72959a8c17e586e..841c89529aa55571f0446eb2a7d0895b21aecf69 100644
--- a/src/TNL/Containers/StaticArray2D_impl.h
+++ b/src/TNL/Containers/StaticArray2D_impl.h
@@ -17,182 +17,182 @@
 namespace TNL {
 namespace Containers {   
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline StaticArray< 2, Element >::StaticArray()
+inline StaticArray< 2, Value >::StaticArray()
 {
 }
 
-template< typename Element >
+template< typename Value >
    template< typename _unused >
 __cuda_callable__
-inline StaticArray< 2, Element >::StaticArray( const Element v[ size ] )
+inline StaticArray< 2, Value >::StaticArray( const Value v[ size ] )
 {
    data[ 0 ] = v[ 0 ];
    data[ 1 ] = v[ 1 ];
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline StaticArray< 2, Element >::StaticArray( const Element& v )
+inline StaticArray< 2, Value >::StaticArray( const Value& v )
 {
    data[ 0 ] = v;
    data[ 1 ] = v;
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline StaticArray< 2, Element >::StaticArray( const Element& v1, const Element& v2 )
+inline StaticArray< 2, Value >::StaticArray( const Value& v1, const Value& v2 )
 {
    data[ 0 ] = v1;
    data[ 1 ] = v2;
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline StaticArray< 2, Element >::StaticArray( const StaticArray< size, Element >& v )
+inline StaticArray< 2, Value >::StaticArray( const StaticArray< size, Value >& v )
 {
    data[ 0 ] = v[ 0 ];
    data[ 1 ] = v[ 1 ];
 }
 
-template< typename Element >
-String StaticArray< 2, Element >::getType()
+template< typename Value >
+String StaticArray< 2, Value >::getType()
 {
    return String( "Containers::StaticArray< " ) +
           String( size ) +
           String( ", " ) +
-          TNL::getType< Element >() +
+          TNL::getType< Value >() +
           String( " >" );
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline int StaticArray< 2, Element >::getSize() const
+inline int StaticArray< 2, Value >::getSize() const
 {
    return size;
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline Element* StaticArray< 2, Element >::getData()
+inline Value* StaticArray< 2, Value >::getData()
 {
    return data;
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline const Element* StaticArray< 2, Element >::getData() const
+inline const Value* StaticArray< 2, Value >::getData() const
 {
    return data;
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline const Element& StaticArray< 2, Element >::operator[]( int i ) const
+inline const Value& StaticArray< 2, Value >::operator[]( int i ) const
 {
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, size, "Element index is out of bounds." );
    return data[ i ];
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline Element& StaticArray< 2, Element >::operator[]( int i )
+inline Value& StaticArray< 2, Value >::operator[]( int i )
 {
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, size, "Element index is out of bounds." );
    return data[ i ];
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline Element& StaticArray< 2, Element >::x()
+inline Value& StaticArray< 2, Value >::x()
 {
    return data[ 0 ];
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline const Element& StaticArray< 2, Element >::x() const
+inline const Value& StaticArray< 2, Value >::x() const
 {
    return data[ 0 ];
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline Element& StaticArray< 2, Element >::y()
+inline Value& StaticArray< 2, Value >::y()
 {
    return data[ 1 ];
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline const Element& StaticArray< 2, Element >::y() const
+inline const Value& StaticArray< 2, Value >::y() const
 {
    return data[ 1 ];
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline StaticArray< 2, Element >& StaticArray< 2, Element >::operator = ( const StaticArray< 2, Element >& array )
+inline StaticArray< 2, Value >& StaticArray< 2, Value >::operator = ( const StaticArray< 2, Value >& array )
 {
    data[ 0 ] = array[ 0 ];
    data[ 1 ] = array[ 1 ];
    return *this;
 }
 
-template< typename Element >
+template< typename Value >
    template< typename Array >
 __cuda_callable__
-inline StaticArray< 2, Element >& StaticArray< 2, Element >::operator = ( const Array& array )
+inline StaticArray< 2, Value >& StaticArray< 2, Value >::operator = ( const Array& array )
 {
    data[ 0 ] = array[ 0 ];
    data[ 1 ] = array[ 1 ];
    return *this;
 }
 
-template< typename Element >
+template< typename Value >
    template< typename Array >
 __cuda_callable__
-inline bool StaticArray< 2, Element >::operator == ( const Array& array ) const
+inline bool StaticArray< 2, Value >::operator == ( const Array& array ) const
 {
    return( ( int ) size == ( int ) Array::size &&
            data[ 0 ] == array[ 0 ] &&
            data[ 1 ] == array[ 1 ] );
 }
 
-template< typename Element >
+template< typename Value >
    template< typename Array >
 __cuda_callable__
-inline bool StaticArray< 2, Element >::operator != ( const Array& array ) const
+inline bool StaticArray< 2, Value >::operator != ( const Array& array ) const
 {
    return ! this->operator == ( array );
 }
 
-template< typename Element >
-   template< typename OtherElement >
+template< typename Value >
+   template< typename OtherValue >
 __cuda_callable__
-StaticArray< 2, Element >::
-operator StaticArray< 2, OtherElement >() const
+StaticArray< 2, Value >::
+operator StaticArray< 2, OtherValue >() const
 {
-   StaticArray< 2, OtherElement > aux;
+   StaticArray< 2, OtherValue > aux;
    aux[ 0 ] = data[ 0 ];
    aux[ 1 ] = data[ 1 ];
    return aux;
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline void StaticArray< 2, Element >::setValue( const ElementType& val )
+inline void StaticArray< 2, Value >::setValue( const ValueType& val )
 {
    data[ 1 ] = data[ 0 ] = val;
 }
 
-template< typename Element >
-bool StaticArray< 2, Element >::save( File& file ) const
+template< typename Value >
+bool StaticArray< 2, Value >::save( File& file ) const
 {
-   if( ! file. write< Element, Devices::Host, int >( data, size ) )
+   if( ! file. write< Value, Devices::Host, int >( data, size ) )
    {
       std::cerr << "Unable to write " << getType() << "." << std::endl;
       return false;
@@ -200,10 +200,10 @@ bool StaticArray< 2, Element >::save( File& file ) const
    return true;
 }
 
-template< typename Element >
-bool StaticArray< 2, Element >::load( File& file)
+template< typename Value >
+bool StaticArray< 2, Value >::load( File& file)
 {
-   if( ! file.read< Element, Devices::Host, int >( data, size ) )
+   if( ! file.read< Value, Devices::Host, int >( data, size ) )
    {
       std::cerr << "Unable to read " << getType() << "." << std::endl;
       return false;
@@ -211,40 +211,19 @@ bool StaticArray< 2, Element >::load( File& file)
    return true;
 }
 
-template< typename Element >
-void StaticArray< 2, Element >::sort()
+template< typename Value >
+void StaticArray< 2, Value >::sort()
 {
    if( data[ 0 ] > data[ 1 ] )
       swap( data[ 0 ], data[ 1 ] );
 }
 
-template< typename Element >
-std::ostream& StaticArray< 2, Element >::write( std::ostream& str, const char* separator ) const
+template< typename Value >
+std::ostream& StaticArray< 2, Value >::write( std::ostream& str, const char* separator ) const
 {
    str << data[ 0 ] << separator << data[ 1 ];
    return str;
 }
 
-#ifdef UNDEF //TEMPLATE_EXPLICIT_INSTANTIATION
-
-// TODO: it does not work with CUDA
-
-#ifndef HAVE_CUDA
-extern template class StaticArray< 2, char >;
-extern template class StaticArray< 2, int >;
-#ifdef INSTANTIATE_LONG_INT
-extern template class StaticArray< 2, long int >;
-#endif
-#ifdef INSTANTIATE_FLOAT
-extern template class StaticArray< 2, float >;
-#endif
-extern template class StaticArray< 2, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template class StaticArray< 2, long double >;
-#endif
-#endif
-
-#endif
-
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/StaticArray3D_impl.h b/src/TNL/Containers/StaticArray3D_impl.h
index 4e89783ca84dd6279930cd92f21526a34aa54cb3..1ed6d2c155f7e0d5d170d12ba8c426adca41b52d 100644
--- a/src/TNL/Containers/StaticArray3D_impl.h
+++ b/src/TNL/Containers/StaticArray3D_impl.h
@@ -17,142 +17,142 @@
 namespace TNL {
 namespace Containers {   
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline StaticArray< 3, Element >::StaticArray()
+inline StaticArray< 3, Value >::StaticArray()
 {
 }
 
-template< typename Element >
+template< typename Value >
    template< typename _unused >
 __cuda_callable__
-inline StaticArray< 3, Element >::StaticArray( const Element v[ size ] )
+inline StaticArray< 3, Value >::StaticArray( const Value v[ size ] )
 {
    data[ 0 ] = v[ 0 ];
    data[ 1 ] = v[ 1 ];
    data[ 2 ] = v[ 2 ];
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline StaticArray< 3, Element >::StaticArray( const Element& v )
+inline StaticArray< 3, Value >::StaticArray( const Value& v )
 {
    data[ 0 ] = v;
    data[ 1 ] = v;
    data[ 2 ] = v;
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline StaticArray< 3, Element >::StaticArray( const Element& v1, const Element& v2, const Element& v3 )
+inline StaticArray< 3, Value >::StaticArray( const Value& v1, const Value& v2, const Value& v3 )
 {
    data[ 0 ] = v1;
    data[ 1 ] = v2;
    data[ 2 ] = v3;
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline StaticArray< 3, Element >::StaticArray( const StaticArray< size, Element >& v )
+inline StaticArray< 3, Value >::StaticArray( const StaticArray< size, Value >& v )
 {
    data[ 0 ] = v[ 0 ];
    data[ 1 ] = v[ 1 ];
    data[ 2 ] = v[ 2 ];
 }
 
-template< typename Element >
-String StaticArray< 3, Element >::getType()
+template< typename Value >
+String StaticArray< 3, Value >::getType()
 {
    return String( "Containers::StaticArray< " ) +
           String( size ) +
           String( ", " ) +
-          TNL::getType< Element >() +
+          TNL::getType< Value >() +
           String( " >" );
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline int StaticArray< 3, Element >::getSize() const
+inline int StaticArray< 3, Value >::getSize() const
 {
    return size;
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline Element* StaticArray< 3, Element >::getData()
+inline Value* StaticArray< 3, Value >::getData()
 {
    return data;
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline const Element* StaticArray< 3, Element >::getData() const
+inline const Value* StaticArray< 3, Value >::getData() const
 {
    return data;
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline const Element& StaticArray< 3, Element >::operator[]( int i ) const
+inline const Value& StaticArray< 3, Value >::operator[]( int i ) const
 {
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, size, "Element index is out of bounds." );
    return data[ i ];
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline Element& StaticArray< 3, Element >::operator[]( int i )
+inline Value& StaticArray< 3, Value >::operator[]( int i )
 {
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, size, "Element index is out of bounds." );
    return data[ i ];
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline Element& StaticArray< 3, Element >::x()
+inline Value& StaticArray< 3, Value >::x()
 {
    return data[ 0 ];
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline const Element& StaticArray< 3, Element >::x() const
+inline const Value& StaticArray< 3, Value >::x() const
 {
    return data[ 0 ];
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline Element& StaticArray< 3, Element >::y()
+inline Value& StaticArray< 3, Value >::y()
 {
    return data[ 1 ];
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline const Element& StaticArray< 3, Element >::y() const
+inline const Value& StaticArray< 3, Value >::y() const
 {
    return data[ 1 ];
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline Element& StaticArray< 3, Element >::z()
+inline Value& StaticArray< 3, Value >::z()
 {
    return data[ 2 ];
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-inline const Element& StaticArray< 3, Element >::z() const
+inline const Value& StaticArray< 3, Value >::z() const
 {
    return data[ 2 ];
 }
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-StaticArray< 3, Element >& StaticArray< 3, Element >::operator = ( const StaticArray< 3, Element >& array )
+StaticArray< 3, Value >& StaticArray< 3, Value >::operator = ( const StaticArray< 3, Value >& array )
 {
    data[ 0 ] = array[ 0 ];
    data[ 1 ] = array[ 1 ];
@@ -160,10 +160,10 @@ StaticArray< 3, Element >& StaticArray< 3, Element >::operator = ( const StaticA
    return *this;
 }
 
-template< typename Element >
+template< typename Value >
    template< typename Array >
 __cuda_callable__
-StaticArray< 3, Element >& StaticArray< 3, Element >::operator = ( const Array& array )
+StaticArray< 3, Value >& StaticArray< 3, Value >::operator = ( const Array& array )
 {
    data[ 0 ] = array[ 0 ];
    data[ 1 ] = array[ 1 ];
@@ -171,10 +171,10 @@ StaticArray< 3, Element >& StaticArray< 3, Element >::operator = ( const Array&
    return *this;
 }
 
-template< typename Element >
+template< typename Value >
    template< typename Array >
 __cuda_callable__
-bool StaticArray< 3, Element >::operator == ( const Array& array ) const
+bool StaticArray< 3, Value >::operator == ( const Array& array ) const
 {
    return( ( int ) size == ( int ) Array::size &&
            data[ 0 ] == array[ 0 ] &&
@@ -182,38 +182,38 @@ bool StaticArray< 3, Element >::operator == ( const Array& array ) const
            data[ 2 ] == array[ 2 ] );
 }
 
-template< typename Element >
+template< typename Value >
    template< typename Array >
 __cuda_callable__
-bool StaticArray< 3, Element >::operator != ( const Array& array ) const
+bool StaticArray< 3, Value >::operator != ( const Array& array ) const
 {
    return ! this->operator == ( array );
 }
 
-template< typename Element >
-   template< typename OtherElement >
+template< typename Value >
+   template< typename OtherValue >
 __cuda_callable__
-StaticArray< 3, Element >::
-operator StaticArray< 3, OtherElement >() const
+StaticArray< 3, Value >::
+operator StaticArray< 3, OtherValue >() const
 {
-   StaticArray< 3, OtherElement > aux;
+   StaticArray< 3, OtherValue > aux;
    aux[ 0 ] = data[ 0 ];
    aux[ 1 ] = data[ 1 ];
    aux[ 2 ] = data[ 2 ];
    return aux;
 }
 
-template< typename Element >
+template< typename Value >
 __cuda_callable__
-void StaticArray< 3, Element >::setValue( const ElementType& val )
+void StaticArray< 3, Value >::setValue( const ValueType& val )
 {
    data[ 2 ] = data[ 1 ] = data[ 0 ] = val;
 }
 
-template< typename Element >
-bool StaticArray< 3, Element >::save( File& file ) const
+template< typename Value >
+bool StaticArray< 3, Value >::save( File& file ) const
 {
-   if( ! file. write< Element, Devices::Host, int >( data, size ) )
+   if( ! file. write< Value, Devices::Host, int >( data, size ) )
    {
       std::cerr << "Unable to write " << getType() << "." << std::endl;
       return false;
@@ -221,10 +221,10 @@ bool StaticArray< 3, Element >::save( File& file ) const
    return true;
 }
 
-template< typename Element >
-bool StaticArray< 3, Element >::load( File& file)
+template< typename Value >
+bool StaticArray< 3, Value >::load( File& file)
 {
-   if( ! file.read< Element, Devices::Host, int >( data, size ) )
+   if( ! file.read< Value, Devices::Host, int >( data, size ) )
    {
       std::cerr << "Unable to read " << getType() << "." << std::endl;
       return false;
@@ -232,8 +232,8 @@ bool StaticArray< 3, Element >::load( File& file)
    return true;
 }
 
-template< typename Element >
-void StaticArray< 3, Element >::sort()
+template< typename Value >
+void StaticArray< 3, Value >::sort()
 {
    /****
     * Bubble sort on three elements
@@ -246,34 +246,12 @@ void StaticArray< 3, Element >::sort()
       swap( data[ 0 ], data[ 1 ] );
 }
 
-template< typename Element >
-std::ostream& StaticArray< 3, Element >::write( std::ostream& str, const char* separator ) const
+template< typename Value >
+std::ostream& StaticArray< 3, Value >::write( std::ostream& str, const char* separator ) const
 {
    str << data[ 0 ] << separator << data[ 1 ] << separator << data[ 2 ];
    return str;
 }
 
-
-#ifdef UNDEF //TEMPLATE_EXPLICIT_INSTANTIATION
-
-// TODO: it does not work with CUDA
-
-#ifndef HAVE_CUDA
-extern template class StaticArray< 3, char >;
-extern template class StaticArray< 3, int >;
-#ifdef INSTANTIATE_LONG_INT
-extern template class StaticArray< 3, long int >;
-#endif
-#ifdef INSTANTIATE_FLOAT
-extern template class StaticArray< 3, float >;
-#endif
-extern template class StaticArray< 3, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template class StaticArray< 3, long double >;
-#endif
-#endif
-
-#endif
-
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/StaticArray_impl.cpp b/src/TNL/Containers/StaticArray_impl.cpp
deleted file mode 100644
index 1da438f8404e1cfc3605bb103bc2c649596ee6b7..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/StaticArray_impl.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-/***************************************************************************
-                          StaticArray_impl.cpp  -  description
-                             -------------------
-    begin                : Feb 10, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/StaticArray.h>
-
-namespace TNL {
-namespace Containers {    
-
-#ifndef HAVE_CUDA
-#ifdef UNDEF //TEMPLATE_EXPLICIT_INSTANTIATION
-
-template class StaticArray< 1, char >;
-template class StaticArray< 1, int >;
-#ifdef INSTANTIATE_LONG_INT
-template class StaticArray< 1, long int >;
-#endif
-#ifdef INSTANTIATE_FLOAT
-template class StaticArray< 1, float >;
-#endif
-template class StaticArray< 1, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class StaticArray< 1, long double >;
-#endif
-
-template class StaticArray< 2, char >;
-template class StaticArray< 2, int >;
-#ifdef INSTANTIATE_LONG_INT
-template class StaticArray< 2, long int >;
-#endif
-#ifdef INSTANTIATE_FLOAT
-template class StaticArray< 2, float >;
-#endif
-template class StaticArray< 2, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class StaticArray< 2, long double >;
-#endif
-
-template class StaticArray< 3, char >;
-template class StaticArray< 3, int >;
-#ifdef INSTANTIATE_LONG_INT
-template class StaticArray< 3, long int >;
-#endif
-#ifdef INSTANTIATE_FLOAT
-template class StaticArray< 3, float >;
-#endif
-template class StaticArray< 3, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class StaticArray< 3, long double >;
-#endif
-
-template class StaticArray< 4, char >;
-template class StaticArray< 4, int >;
-#ifdef INSTANTIATE_LONG_INT
-template class StaticArray< 4, long int >;
-#endif
-#ifdef INSTANTIATE_FLOAT
-template class StaticArray< 4, float >;
-#endif
-template class StaticArray< 4, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class StaticArray< 4, long double >;
-#endif
-
-#endif
-#endif
-
-} // namespace Containers
-} // namespace TNL
-
diff --git a/src/TNL/Containers/StaticArray_impl.cu b/src/TNL/Containers/StaticArray_impl.cu
deleted file mode 100644
index 5d3ab6b60c5e687a7e46cc2d4eb5a42a347bcb33..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/StaticArray_impl.cu
+++ /dev/null
@@ -1,75 +0,0 @@
-/***************************************************************************
-                          StaticArray_impl.cu  -  description
-                             -------------------
-    begin                : Feb 10, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/StaticArray.h>
-
-namespace TNL {
-namespace Containers {
-
-#ifndef HAVE_CUDA
-#ifdef UNDEF //TEMPLATE_EXPLICIT_INSTANTIATION
-
-template class StaticArray< 1, char >;
-template class StaticArray< 1, int >;
-#ifdef INSTANTIATE_LONG_INT
-template class StaticArray< 1, long int >;
-#endif
-#ifdef INSTANTIATE_FLOAT
-template class StaticArray< 1, float >;
-#endif
-template class StaticArray< 1, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class StaticArray< 1, long double >;
-#endif
-
-template class StaticArray< 2, char >;
-template class StaticArray< 2, int >;
-#ifdef INSTANTIATE_LONG_INT
-template class StaticArray< 2, long int >;
-#endif
-#ifdef INSTANTIATE_FLOAT
-template class StaticArray< 2, float >;
-#endif
-template class StaticArray< 2, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class StaticArray< 2, long double >;
-#endif
-
-template class StaticArray< 3, char >;
-template class StaticArray< 3, int >;
-#ifdef INSTANTIATE_LONG_INT
-template class StaticArray< 3, long int >;
-#endif
-#ifdef INSTANTIATE_FLOAT
-template class StaticArray< 3, float >;
-#endif
-template class StaticArray< 3, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class StaticArray< 3, long double >;
-#endif
-
-template class StaticArray< 4, char >;
-template class StaticArray< 4, int >;
-#ifdef INSTANTIATE_LONG_INT
-template class StaticArray< 4, long int >;
-#endif
-#ifdef INSTANTIATE_FLOAT
-template class StaticArray< 4, float >;
-#endif
-template class StaticArray< 4, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class StaticArray< 4, long double >;
-#endif
-
-#endif
-#endif
-
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/StaticArray_impl.h b/src/TNL/Containers/StaticArray_impl.h
index 9be36764a5b8e316900db466fa1a284fcf0ac4ab..898bc9bd5625ba73b77c015d0472337a0eeda81f 100644
--- a/src/TNL/Containers/StaticArray_impl.h
+++ b/src/TNL/Containers/StaticArray_impl.h
@@ -17,109 +17,109 @@
 namespace TNL {
 namespace Containers {   
 
-template< int Size, typename Element >
+template< int Size, typename Value >
 __cuda_callable__
-inline StaticArray< Size, Element >::StaticArray()
+inline StaticArray< Size, Value >::StaticArray()
 {
 };
 
-template< int Size, typename Element >
+template< int Size, typename Value >
    template< typename _unused >
 __cuda_callable__
-inline StaticArray< Size, Element >::StaticArray( const Element v[ Size ] )
+inline StaticArray< Size, Value >::StaticArray( const Value v[ Size ] )
 {
    for( int i = 0; i < Size; i++ )
       data[ i ] = v[ i ];
 }
 
-template< int Size, typename Element >
+template< int Size, typename Value >
 __cuda_callable__
-inline StaticArray< Size, Element >::StaticArray( const Element& v )
+inline StaticArray< Size, Value >::StaticArray( const Value& v )
 {
    for( int i = 0; i < Size; i++ )
       data[ i ] = v;
 }
 
-template< int Size, typename Element >
+template< int Size, typename Value >
 __cuda_callable__
-inline StaticArray< Size, Element >::StaticArray( const StaticArray< Size, Element >& v )
+inline StaticArray< Size, Value >::StaticArray( const StaticArray< Size, Value >& v )
 {
    for( int i = 0; i < Size; i++ )
       data[ i ] = v[ i ];
 }
 
-template< int Size, typename Element >
-String StaticArray< Size, Element >::getType()
+template< int Size, typename Value >
+String StaticArray< Size, Value >::getType()
 {
    return String( "Containers::StaticArray< " ) +
           String( Size ) +
           String( ", " ) +
-          TNL::getType< Element >() +
+          TNL::getType< Value >() +
           String( " >" );
 }
 
-template< int Size, typename Element >
+template< int Size, typename Value >
 __cuda_callable__
-inline int StaticArray< Size, Element >::getSize() const
+inline int StaticArray< Size, Value >::getSize() const
 {
    return size;
 }
 
-template< int Size, typename Element >
+template< int Size, typename Value >
 __cuda_callable__
-inline Element* StaticArray< Size, Element >::getData()
+inline Value* StaticArray< Size, Value >::getData()
 {
    return data;
 }
 
-template< int Size, typename Element >
+template< int Size, typename Value >
 __cuda_callable__
-inline const Element* StaticArray< Size, Element >::getData() const
+inline const Value* StaticArray< Size, Value >::getData() const
 {
    return data;
 }
 
-template< int Size, typename Element >
+template< int Size, typename Value >
 __cuda_callable__
-inline const Element& StaticArray< Size, Element >::operator[]( int i ) const
+inline const Value& StaticArray< Size, Value >::operator[]( int i ) const
 {
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, size, "Element index is out of bounds." );
    return data[ i ];
 }
 
-template< int Size, typename Element >
+template< int Size, typename Value >
 __cuda_callable__
-inline Element& StaticArray< Size, Element >::operator[]( int i )
+inline Value& StaticArray< Size, Value >::operator[]( int i )
 {
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, size, "Element index is out of bounds." );
    return data[ i ];
 }
 
-template< int Size, typename Element >
+template< int Size, typename Value >
 __cuda_callable__
-inline StaticArray< Size, Element >& StaticArray< Size, Element >::operator = ( const StaticArray< Size, Element >& array )
+inline StaticArray< Size, Value >& StaticArray< Size, Value >::operator = ( const StaticArray< Size, Value >& array )
 {
    for( int i = 0; i < size; i++ )
       data[ i ] = array[ i ];
    return *this;
 }
 
-template< int Size, typename Element >
+template< int Size, typename Value >
    template< typename Array >
 __cuda_callable__
-inline StaticArray< Size, Element >& StaticArray< Size, Element >::operator = ( const Array& array )
+inline StaticArray< Size, Value >& StaticArray< Size, Value >::operator = ( const Array& array )
 {
    for( int i = 0; i < size; i++ )
       data[ i ] = array[ i ];
    return *this;
 }
 
-template< int Size, typename Element >
+template< int Size, typename Value >
    template< typename Array >
 __cuda_callable__
-inline bool StaticArray< Size, Element >::operator == ( const Array& array ) const
+inline bool StaticArray< Size, Value >::operator == ( const Array& array ) const
 {
    if( ( int ) size != ( int ) Array::size )
       return false;
@@ -129,38 +129,38 @@ inline bool StaticArray< Size, Element >::operator == ( const Array& array ) con
    return true;
 }
 
-template< int Size, typename Element >
+template< int Size, typename Value >
    template< typename Array >
 __cuda_callable__
-inline bool StaticArray< Size, Element >::operator != ( const Array& array ) const
+inline bool StaticArray< Size, Value >::operator != ( const Array& array ) const
 {
    return ! this->operator == ( array );
 }
 
-template< int Size, typename Element >
-   template< typename OtherElement >
+template< int Size, typename Value >
+   template< typename OtherValue >
 __cuda_callable__
-StaticArray< Size, Element >::
-operator StaticArray< Size, OtherElement >() const
+StaticArray< Size, Value >::
+operator StaticArray< Size, OtherValue >() const
 {
-   StaticArray< Size, OtherElement > aux;
+   StaticArray< Size, OtherValue > aux;
    for( int i = 0; i < Size; i++ )
       aux[ i ] = data[ i ];
    return aux;
 }
 
-template< int Size, typename Element >
+template< int Size, typename Value >
 __cuda_callable__
-inline void StaticArray< Size, Element >::setValue( const ElementType& val )
+inline void StaticArray< Size, Value >::setValue( const ValueType& val )
 {
    for( int i = 0; i < Size; i++ )
       data[ i ] = val;
 }
 
-template< int Size, typename Element >
-bool StaticArray< Size, Element >::save( File& file ) const
+template< int Size, typename Value >
+bool StaticArray< Size, Value >::save( File& file ) const
 {
-   if( ! file. write< Element, Devices::Host, int >( data, size ) )
+   if( ! file. write< Value, Devices::Host, int >( data, size ) )
    {
       std::cerr << "Unable to write " << getType() << "." << std::endl;
       return false;
@@ -168,10 +168,10 @@ bool StaticArray< Size, Element >::save( File& file ) const
    return true;
 }
 
-template< int Size, typename Element >
-bool StaticArray< Size, Element >::load( File& file)
+template< int Size, typename Value >
+bool StaticArray< Size, Value >::load( File& file)
 {
-   if( ! file.read< Element, Devices::Host, int >( data, size ) )
+   if( ! file.read< Value, Devices::Host, int >( data, size ) )
    {
       std::cerr << "Unable to read " << getType() << "." << std::endl;
       return false;
@@ -179,8 +179,8 @@ bool StaticArray< Size, Element >::load( File& file)
    return true;
 }
 
-template< int Size, typename Element >
-void StaticArray< Size, Element >::sort()
+template< int Size, typename Value >
+void StaticArray< Size, Value >::sort()
 {
    /****
     * We assume that the array data is small and so
@@ -192,8 +192,8 @@ void StaticArray< Size, Element >::sort()
             swap( data[ i ], data[ i+1 ] );
 }
 
-template< int Size, typename Element >
-std::ostream& StaticArray< Size, Element >::write( std::ostream& str, const char* separator ) const
+template< int Size, typename Value >
+std::ostream& StaticArray< Size, Value >::write( std::ostream& str, const char* separator ) const
 {
    for( int i = 0; i < Size - 1; i++ )
       str << data[ i ] << separator;
@@ -202,8 +202,8 @@ std::ostream& StaticArray< Size, Element >::write( std::ostream& str, const char
 }
 
 
-template< int Size, typename Element >
-std::ostream& operator << ( std::ostream& str, const StaticArray< Size, Element >& a )
+template< int Size, typename Value >
+std::ostream& operator << ( std::ostream& str, const StaticArray< Size, Value >& a )
 {
    str << "[ ";
    a.write( str, ", " );
@@ -211,26 +211,5 @@ std::ostream& operator << ( std::ostream& str, const StaticArray< Size, Element
    return str;
 };
 
-#ifdef UNDEF //TEMPLATE_EXPLICIT_INSTANTIATION
-
-// TODO: it does not work with CUDA
-
-#ifndef HAVE_CUDA
-extern template class StaticArray< 4, char >;
-extern template class StaticArray< 4, int >;
-#ifdef INSTANTIATE_LONG_INT
-extern template class StaticArray< 4, long int >;
-#endif
-#ifdef INSTANTIATE_FLOAT
-extern template class StaticArray< 4, float >;
-#endif
-extern template class StaticArray< 4, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template class StaticArray< 4, long double >;
-#endif
-#endif
-
-#endif
-
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/StaticVector1D_impl.h b/src/TNL/Containers/StaticVector1D_impl.h
index f009b52afe4f7d630ad11c6324e5b5fe291dd71c..8045d12ac680c802bacb959842a0bc2b0ffe22e2 100644
--- a/src/TNL/Containers/StaticVector1D_impl.h
+++ b/src/TNL/Containers/StaticVector1D_impl.h
@@ -183,20 +183,5 @@ StaticVector< 1, Real >::lpNorm( const Real& p ) const
    return TNL::abs( this->data[ 0 ] );
 }
 
-#ifdef UNDEF //TEMPLATE_EXPLICIT_INSTANTIATION
-
-#ifndef HAVE_CUDA
-// TODO: does not work with CUDA
-#ifdef INSTANTIATE_FLOAT
-extern template class StaticVector< 1, float >;
-#endif
-extern template class StaticVector< 1, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template class StaticVector< 1, long double >;
-#endif
-#endif
-
-#endif
-
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/StaticVector2D_impl.h b/src/TNL/Containers/StaticVector2D_impl.h
index fdcc28aa51efcf1e3bf1da787d0957bf64a39db1..8691508b81557b6ce448ca24d65df2b2089bc035 100644
--- a/src/TNL/Containers/StaticVector2D_impl.h
+++ b/src/TNL/Containers/StaticVector2D_impl.h
@@ -212,21 +212,5 @@ StaticVector< 2, Real >::lpNorm( const Real& p ) const
                     TNL::pow( TNL::abs( this->data[ 1 ] ), p ), 1.0 / p ); 
 }
 
-#ifdef UNDEF //TEMPLATE_EXPLICIT_INSTANTIATION
-
-#ifndef HAVE_CUDA
-// TODO: does not work with CUDA
-#ifdef INSTANTIATE_FLOAT
-extern template class StaticVector< 2, float >;
-#endif
-extern template class StaticVector< 2, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template class StaticVector< 2, long double >;
-#endif
-#endif
-
-#endif
-
 } // namespace Containers
 } // namespace TNL
-
diff --git a/src/TNL/Containers/StaticVector3D_impl.h b/src/TNL/Containers/StaticVector3D_impl.h
index 655ed87365b1b53cd45d4c33805108ccc155cba5..1e2a44cfbc29caa97f12fb3e3b0d0b417cb31458 100644
--- a/src/TNL/Containers/StaticVector3D_impl.h
+++ b/src/TNL/Containers/StaticVector3D_impl.h
@@ -231,22 +231,5 @@ StaticVector< 3, Real >::lpNorm( const Real& p ) const
                     TNL::pow( TNL::abs( this->data[ 2 ] ), p ), 1.0 / p ); 
 }
 
-
-
-#ifdef UNDEF //TEMPLATE_EXPLICIT_INSTANTIATION
-
-#ifndef HAVE_CUDA
-// TODO: does not work with CUDA
-#ifdef INSTANTIATE_FLOAT
-extern template class StaticVector< 3, float >;
-#endif
-extern template class StaticVector< 3, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template class StaticVector< 3, long double >;
-#endif
-#endif
-
-#endif
-
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/StaticVector_impl.cpp b/src/TNL/Containers/StaticVector_impl.cpp
deleted file mode 100644
index e503d09669d8eb3d2fd0fe606bbda98aafb8bb2e..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/StaticVector_impl.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-/***************************************************************************
-                          StaticVector_impl.cpp  -  description
-                             -------------------
-    begin                : Feb 10, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/StaticVector.h>
-
-namespace TNL {
-namespace Containers {    
-
-#ifndef HAVE_CUDA
-#ifdef UNDEF //TEMPLATE_EXPLICIT_INSTANTIATION
-
-#ifdef INSTANTIATE_FLOAT
-template class StaticVector< 1, float >;
-#endif
-template class StaticVector< 1, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class StaticVector< 1, long double >;
-#endif
-
-#ifdef INSTANTIATE_FLOAT
-template class StaticVector< 2, float >;
-#endif
-template class StaticVector< 2, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class StaticVector< 2, long double >;
-#endif
-
-#ifdef INSTANTIATE_FLOAT
-template class StaticVector< 3, float >;
-#endif
-template class StaticVector< 3, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class StaticVector< 3, long double >;
-#endif
-
-#ifdef INSTANTIATE_FLOAT
-template class StaticVector< 4, float >;
-#endif
-template class StaticVector< 4, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class StaticVector< 4, long double >;
-#endif
-
-#endif
-#endif
-
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/StaticVector_impl.cu b/src/TNL/Containers/StaticVector_impl.cu
deleted file mode 100644
index ca460d353a40cc7d8efcfa1def62a5d4fc72e5b9..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/StaticVector_impl.cu
+++ /dev/null
@@ -1,55 +0,0 @@
-/***************************************************************************
-                          StaticVector_impl.cu  -  description
-                             -------------------
-    begin                : Feb 10, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/StaticVector.h>
-
-namespace TNL {
-namespace Vectors {
-
-#ifdef HAVE_CUDA
-#ifdef UNDEF //TEMPLATE_EXPLICIT_INSTANTIATION
-
-#ifdef INSTANTIATE_FLOAT
-template class StaticVector< 1, float >;
-#endif
-template class StaticVector< 1, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class StaticVector< 1, long double >;
-#endif
-
-#ifdef INSTANTIATE_FLOAT
-template class StaticVector< 2, float >;
-#endif
-template class StaticVector< 2, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class StaticVector< 2, long double >;
-#endif
-
-#ifdef INSTANTIATE_FLOAT
-template class StaticVector< 3, float >;
-#endif
-template class StaticVector< 3, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class StaticVector< 3, long double >;
-#endif
-
-#ifdef INSTANTIATE_FLOAT
-template class StaticVector< 4, float >;
-#endif
-template class StaticVector< 4, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class StaticVector< 4, long double >;
-#endif
-
-#endif
-#endif
-
-} // namespace Vectors
-} // namespace TNL
diff --git a/src/TNL/Containers/StaticVector_impl.h b/src/TNL/Containers/StaticVector_impl.h
index 5f28e277af22fb6bdd6beab0a58d8a158359c38c..bfb473c92f8440ad941869a39ec933d85025f27c 100644
--- a/src/TNL/Containers/StaticVector_impl.h
+++ b/src/TNL/Containers/StaticVector_impl.h
@@ -236,20 +236,5 @@ StaticVector< Size, Real > operator * ( const Scalar& c, const StaticVector< Siz
    return u * c;
 }
 
-#ifdef UNDEF //TEMPLATE_EXPLICIT_INSTANTIATION
-
-#ifndef HAVE_CUDA
-// TODO: does not work with CUDA
-#ifdef INSTANTIATE_FLOAT
-extern template class StaticVector< 4, float >;
-#endif
-extern template class StaticVector< 4, double >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template class StaticVector< 4, long double >;
-#endif
-#endif
-
-#endif
-
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Vector.h b/src/TNL/Containers/Vector.h
index 6ab6fc1fb9abef96f29295b02bcdceb13b539b63..72029b194992391e3edb35f5e9747b5a74222b2f 100644
--- a/src/TNL/Containers/Vector.h
+++ b/src/TNL/Containers/Vector.h
@@ -13,12 +13,13 @@
 #include <TNL/Containers/Array.h>
 
 namespace TNL {
-namespace Containers {   
+namespace Containers {
 
 template< typename Real = double,
           typename Device = Devices::Host,
           typename Index = int >
-class Vector : public Containers::Array< Real, Device, Index >
+class Vector
+: public Array< Real, Device, Index >
 {
    public:
 
@@ -28,9 +29,9 @@ class Vector : public Containers::Array< Real, Device, Index >
    typedef Vector< Real, TNL::Devices::Host, Index > HostType;
    typedef Vector< Real, TNL::Devices::Cuda, Index > CudaType;
 
-   Vector();
-
-   Vector( const Index size );
+   // inherit all constructors and assignment operators from Array
+   using Array< Real, Device, Index >::Array;
+   using Array< Real, Device, Index >::operator=;
 
    static String getType();
 
@@ -47,26 +48,15 @@ class Vector : public Containers::Array< Real, Device, Index >
                     const RealType& value,
                     const RealType& thisElementMultiplicator );
 
-   Vector< Real, Device, Index >& operator = ( const Vector< Real, Device, Index >& array );
-
-   template< typename VectorT >
-   Vector< Real, Device, Index >& operator = ( const VectorT& vector );
-
    template< typename VectorT >
-   bool operator == ( const VectorT& vector ) const;
+   Vector& operator -= ( const VectorT& vector );
 
    template< typename VectorT >
-   bool operator != ( const VectorT& vector ) const;
+   Vector& operator += ( const VectorT& vector );
 
-   template< typename VectorT >
-   Vector< Real, Device, Index >& operator -= ( const VectorT& vector );
-
-   template< typename VectorT >
-   Vector< Real, Device, Index >& operator += ( const VectorT& vector );
+   Vector& operator *= ( const RealType& c );
 
-   Vector< Real, Device, Index >& operator *= ( const RealType& c );
- 
-   Vector< Real, Device, Index >& operator /= ( const RealType& c );
+   Vector& operator /= ( const RealType& c );
 
    Real max() const;
 
@@ -112,7 +102,6 @@ class Vector : public Containers::Array< Real, Device, Index >
                    const Real& multiplicator = 1.0,
                    const Real& thisMultiplicator = 1.0 );
 
-
    //! Computes this = thisMultiplicator * this + multiplicator1 * v1 + multiplicator2 * v2.
    template< typename Vector >
    void addVectors( const Vector& v1,
diff --git a/src/TNL/Containers/VectorView.h b/src/TNL/Containers/VectorView.h
new file mode 100644
index 0000000000000000000000000000000000000000..c53a165d113d96e988129146cc6bdf4c0d27503a
--- /dev/null
+++ b/src/TNL/Containers/VectorView.h
@@ -0,0 +1,140 @@
+/***************************************************************************
+                          VectorView.h  -  description
+                             -------------------
+    begin                : Sep 1, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// Implemented by: Jakub Klinkovský
+
+#pragma once
+
+#include <TNL/Containers/ArrayView.h>
+
+namespace TNL {
+namespace Containers {
+
+template< typename Real, typename Device, typename Index >
+class Vector;
+
+template< int Size, typename Real >
+class StaticVector;
+
+template< typename Real = double,
+          typename Device = Devices::Host,
+          typename Index = int >
+class VectorView
+: public ArrayView< Real, Device, Index >
+{
+   using BaseType = ArrayView< Real, Device, Index >;
+   using NonConstReal = typename std::remove_const< Real >::type;
+public:
+   using RealType = Real;
+   using DeviceType = Device;
+   using IndexType = Index;
+   using HostType = VectorView< Real, Devices::Host, Index >;
+   using CudaType = VectorView< Real, Devices::Cuda, Index >;
+
+   // inherit all ArrayView's constructors
+#ifndef __NVCC__
+   using BaseType::ArrayView;
+#else
+   // workaround for a bug in nvcc 8.0 (seems to be fixed in 9.0)
+   using ArrayView< Real, Device, Index >::ArrayView;
+#endif
+
+   // initialization by base class is not a copy constructor so it has to be explicit
+   template< typename Element_ >  // template catches both const and non-const qualified Element
+   __cuda_callable__
+   VectorView( const ArrayView< Element_, Device, Index >& view )
+   : BaseType::ArrayView( view ) {}
+
+
+   static String getType();
+
+
+   // All other Vector methods follow...
+   void addElement( IndexType i, RealType value );
+
+   void addElement( IndexType i,
+                    RealType value,
+                    RealType thisElementMultiplicator );
+
+   template< typename Vector >
+   VectorView& operator-=( const Vector& vector );
+
+   template< typename Vector >
+   VectorView& operator+=( const Vector& vector );
+
+   VectorView& operator*=( RealType c );
+
+   VectorView& operator/=( RealType c );
+
+   NonConstReal max() const;
+
+   NonConstReal min() const;
+
+   NonConstReal absMax() const;
+
+   NonConstReal absMin() const;
+
+   template< typename ResultType = NonConstReal, typename Real_ >
+   ResultType lpNorm( Real_ p ) const;
+
+   template< typename ResultType = NonConstReal >
+   ResultType sum() const;
+
+   template< typename Vector >
+   NonConstReal differenceMax( const Vector& v ) const;
+
+   template< typename Vector >
+   NonConstReal differenceMin( const Vector& v ) const;
+
+   template< typename Vector >
+   NonConstReal differenceAbsMax( const Vector& v ) const;
+
+   template< typename Vector >
+   NonConstReal differenceAbsMin( const Vector& v ) const;
+
+   template< typename ResultType = NonConstReal, typename Vector, typename Real_ >
+   ResultType differenceLpNorm( const Vector& v, Real_ p ) const;
+
+   template< typename ResultType = NonConstReal, typename Vector >
+   ResultType differenceSum( const Vector& v ) const;
+
+   void scalarMultiplication( Real alpha );
+
+   //! Computes scalar dot product
+   template< typename Vector >
+   NonConstReal scalarProduct( const Vector& v );
+
+   //! Computes this = thisMultiplicator * this + alpha * x.
+   template< typename Vector >
+   void addVector( const Vector& x,
+                   Real alpha = 1.0,
+                   Real thisMultiplicator = 1.0 );
+
+   //! Computes this = thisMultiplicator * this + multiplicator1 * v1 + multiplicator2 * v2.
+   template< typename Vector >
+   void addVectors( const Vector& v1,
+                    Real multiplicator1,
+                    const Vector& v2,
+                    Real multiplicator2,
+                    Real thisMultiplicator = 1.0 );
+
+   void computePrefixSum();
+
+   void computePrefixSum( IndexType begin, IndexType end );
+
+   void computeExclusivePrefixSum();
+
+   void computeExclusivePrefixSum( IndexType begin, IndexType end );
+};
+
+} // namespace Containers
+} // namespace TNL
+
+#include <TNL/Containers/VectorView_impl.h>
diff --git a/src/TNL/Containers/VectorView_impl.h b/src/TNL/Containers/VectorView_impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..5e120b32c10a74d3f331be4b51a080e3efb19348
--- /dev/null
+++ b/src/TNL/Containers/VectorView_impl.h
@@ -0,0 +1,318 @@
+/***************************************************************************
+                          VectorView_impl.h  -  description
+                             -------------------
+    begin                : Sep 1, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Containers/Algorithms/VectorOperations.h>
+
+namespace TNL {
+namespace Containers {
+
+template< typename Real,
+          typename Device,
+          typename Index >
+String
+VectorView< Real, Device, Index >::
+getType()
+{
+   return String( "Containers::VectorView< " ) +
+                  TNL::getType< Real >() + ", " +
+                  Device::getDeviceType() + ", " +
+                  TNL::getType< Index >() + " >";
+}
+
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void
+VectorView< Real, Device, Index >::
+addElement( IndexType i, RealType value )
+{
+   Algorithms::VectorOperations< Device >::addElement( *this, i, value );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void
+VectorView< Real, Device, Index >::
+addElement( IndexType i, RealType value, RealType thisElementMultiplicator )
+{
+   Algorithms::VectorOperations< Device >::addElement( *this, i, value, thisElementMultiplicator );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Vector >
+VectorView< Real, Device, Index >&
+VectorView< Real, Device, Index >::
+operator-=( const Vector& vector )
+{
+   addVector( vector, -1.0 );
+   return *this;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Vector >
+VectorView< Real, Device, Index >&
+VectorView< Real, Device, Index >::
+operator+=( const Vector& vector )
+{
+   addVector( vector );
+   return *this;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+VectorView< Real, Device, Index >&
+VectorView< Real, Device, Index >::
+operator*=( RealType c )
+{
+   Algorithms::VectorOperations< Device >::vectorScalarMultiplication( *this, c );
+   return *this;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+VectorView< Real, Device, Index >&
+VectorView< Real, Device, Index >::
+operator/=( RealType c )
+{
+   Algorithms::VectorOperations< Device >::vectorScalarMultiplication( *this, 1.0 / c );
+   return *this;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+typename VectorView< Real, Device, Index >::NonConstReal
+VectorView< Real, Device, Index >::
+max() const
+{
+   return Algorithms::VectorOperations< Device >::template getVectorMax< VectorView, NonConstReal >( *this );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+typename VectorView< Real, Device, Index >::NonConstReal
+VectorView< Real, Device, Index >::
+min() const
+{
+   return Algorithms::VectorOperations< Device >::template getVectorMin< VectorView, NonConstReal >( *this );
+}
+
+
+template< typename Real,
+          typename Device,
+          typename Index >
+typename VectorView< Real, Device, Index >::NonConstReal
+VectorView< Real, Device, Index >::
+absMax() const
+{
+   return Algorithms::VectorOperations< Device >::template getVectorAbsMax< VectorView, NonConstReal >( *this );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+typename VectorView< Real, Device, Index >::NonConstReal
+VectorView< Real, Device, Index >::
+absMin() const
+{
+   return Algorithms::VectorOperations< Device >::template getVectorAbsMin< VectorView, NonConstReal >( *this );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename ResultType, typename Real_ >
+ResultType
+VectorView< Real, Device, Index >::
+lpNorm( const Real_ p ) const
+{
+   return Algorithms::VectorOperations< Device >::template getVectorLpNorm< VectorView, ResultType >( *this, p );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename ResultType >
+ResultType
+VectorView< Real, Device, Index >::
+sum() const
+{
+   return Algorithms::VectorOperations< Device >::template getVectorSum< VectorView, ResultType >( *this );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Vector >
+typename VectorView< Real, Device, Index >::NonConstReal
+VectorView< Real, Device, Index >::
+differenceMax( const Vector& v ) const
+{
+   return Algorithms::VectorOperations< Device >::template getVectorDifferenceMax< VectorView, Vector, NonConstReal >( *this, v );
+}
+
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Vector >
+typename VectorView< Real, Device, Index >::NonConstReal
+VectorView< Real, Device, Index >::differenceMin( const Vector& v ) const
+{
+   return Algorithms::VectorOperations< Device >::template getVectorDifferenceMin< VectorView, Vector, NonConstReal >( *this, v );
+}
+
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Vector >
+typename VectorView< Real, Device, Index >::NonConstReal
+VectorView< Real, Device, Index >::
+differenceAbsMax( const Vector& v ) const
+{
+   return Algorithms::VectorOperations< Device >::template getVectorDifferenceAbsMax< VectorView, Vector, NonConstReal >( *this, v );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Vector >
+typename VectorView< Real, Device, Index >::NonConstReal
+VectorView< Real, Device, Index >::
+differenceAbsMin( const Vector& v ) const
+{
+   return Algorithms::VectorOperations< Device >::template getVectorDifferenceAbsMin< VectorView, Vector, NonConstReal >( *this, v );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename ResultType, typename Vector, typename Real_ >
+ResultType
+VectorView< Real, Device, Index >::
+differenceLpNorm( const Vector& v, const Real_ p ) const
+{
+   return Algorithms::VectorOperations< Device >::template getVectorDifferenceLpNorm< VectorView, Vector, ResultType >( *this, v, p );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename ResultType, typename Vector >
+ResultType
+VectorView< Real, Device, Index >::
+differenceSum( const Vector& v ) const
+{
+   return Algorithms::VectorOperations< Device >::template getVectorDifferenceSum< VectorView, Vector, ResultType >( *this, v );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void
+VectorView< Real, Device, Index >::
+scalarMultiplication( Real alpha )
+{
+   Algorithms::VectorOperations< Device >::vectorScalarMultiplication( *this, alpha );
+}
+
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Vector >
+typename VectorView< Real, Device, Index >::NonConstReal
+VectorView< Real, Device, Index >::
+scalarProduct( const Vector& v )
+{
+   return Algorithms::VectorOperations< Device >::template getScalarProduct< VectorView, Vector, NonConstReal >( *this, v );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Vector >
+void
+VectorView< Real, Device, Index >::
+addVector( const Vector& x, Real alpha, Real thisMultiplicator )
+{
+   Algorithms::VectorOperations< Device >::addVector( *this, x, alpha, thisMultiplicator );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Vector >
+void
+VectorView< Real, Device, Index >::
+addVectors( const Vector& v1,
+            Real multiplicator1,
+            const Vector& v2,
+            Real multiplicator2,
+            Real thisMultiplicator )
+{
+   Algorithms::VectorOperations< Device >::addVectors( *this, v1, multiplicator1, v2, multiplicator2, thisMultiplicator );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void
+VectorView< Real, Device, Index >::
+computePrefixSum()
+{
+   Algorithms::VectorOperations< Device >::computePrefixSum( *this, 0, this->getSize() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void
+VectorView< Real, Device, Index >::
+computePrefixSum( IndexType begin, IndexType end )
+{
+   Algorithms::VectorOperations< Device >::computePrefixSum( *this, begin, end );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void
+VectorView< Real, Device, Index >::
+computeExclusivePrefixSum()
+{
+   Algorithms::VectorOperations< Device >::computeExclusivePrefixSum( *this, 0, this->getSize() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void
+VectorView< Real, Device, Index >::
+computeExclusivePrefixSum( IndexType begin, IndexType end )
+{
+   Algorithms::VectorOperations< Device >::computeExclusivePrefixSum( *this, begin, end );
+}
+
+} // namespace Containers
+} // namespace TNL
diff --git a/src/TNL/Containers/Vector_impl.cpp b/src/TNL/Containers/Vector_impl.cpp
deleted file mode 100644
index 6fe0deff839453394ff5c806ad3e9766dfa705d8..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Vector_impl.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-/***************************************************************************
-                          Vector_impl.cpp  -  description
-                             -------------------
-    begin                : Jan 20, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/Vector.h>
-
-namespace TNL {
-namespace Containers {    
-
-#ifdef UNDEF //TEMPLATE_EXPLICIT_INSTANTIATION
-
-#ifdef INSTANTIATE_FLOAT
-template class Vector< float, Devices::Host, int >;
-template Vector< float, Devices::Host, int >& Vector< float, Devices::Host, int >:: operator = ( const Vector< double, Devices::Host, int >& vector );
-#endif
-
-
-template class Vector< double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class Vector< long double, Devices::Host, int >;
-#endif
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class Vector< float, Devices::Host, long int >;
-#endif
-template class Vector< double, Devices::Host, long int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class Vector< long double, Devices::Host, long int >;
-#endif
-#endif
-
-#endif
-
-} // namespace Containers
-} // namespace TNL
-
diff --git a/src/TNL/Containers/Vector_impl.cu b/src/TNL/Containers/Vector_impl.cu
deleted file mode 100644
index 7a1a411350bbaf60393d1d29f29255ed07b59e82..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Vector_impl.cu
+++ /dev/null
@@ -1,41 +0,0 @@
-/***************************************************************************
-                          Vector_impl.cu  -  description
-                             -------------------
-    begin                : Jan 20, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/Vector.h>
-
-namespace TNL {
-namespace Vectors {
-
-#ifdef UNDEF //TEMPLATE_EXPLICIT_INSTANTIATION
-
-#ifdef HAVE_CUDA
-#ifdef INSTANTIATE_FLOAT
-template class Vector< float, Devices::Cuda, int >;
-#endif
-template class Vector< double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class Vector< long double, Devices::Cuda, int >;
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-template class Vector< float, Devices::Cuda, long int >;
-#endif
-template class Vector< double, Devices::Cuda, long int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-template class Vector< long double, Devices::Cuda, long int >;
-#endif
-#endif
-#endif
-
-#endif
-
-} // namespace Vectors
-} // namespace TNL
diff --git a/src/TNL/Containers/Vector_impl.h b/src/TNL/Containers/Vector_impl.h
index 05ce0a589d7df8feea465b63f2758797fb28b0f9..dacb23e591e823afa2097bb771c6be4d7cba781c 100644
--- a/src/TNL/Containers/Vector_impl.h
+++ b/src/TNL/Containers/Vector_impl.h
@@ -14,25 +14,7 @@
 #include <TNL/Containers/Algorithms/VectorOperations.h>
 
 namespace TNL {
-namespace Containers {   
-
-template< typename Real,
-          typename Device,
-          typename Index >
-Vector< Real, Device, Index >::
-Vector()
-{
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-Vector< Real, Device, Index >::
-Vector( const Index size )
-{
-   this->setSize( size );
-}
-
+namespace Containers {
 
 template< typename Real,
           typename Device,
@@ -100,51 +82,6 @@ addElement( const IndexType i,
    Algorithms::VectorOperations< Device >::addElement( *this, i, value, thisElementMultiplicator );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index >
-Vector< Real, Device, Index >&
-Vector< Real, Device, Index >::
-operator = ( const Vector< Real, Device, Index >& vector )
-{
-   Containers::Array< Real, Device, Index >::operator = ( vector );
-   return ( *this );
-};
-
-template< typename Real,
-           typename Device,
-           typename Index >
-   template< typename VectorT >
-Vector< Real, Device, Index >&
-Vector< Real, Device, Index >::
-operator = ( const VectorT& vector )
-{
-   Containers::Array< Real, Device, Index >::operator = ( vector );
-   return ( *this );
-};
-
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename VectorT >
-bool
-Vector< Real, Device, Index >::
-operator == ( const VectorT& vector ) const
-{
-   return Containers::Array< Real, Device, Index >::operator == ( vector );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename VectorT >
-bool
-Vector< Real, Device, Index >::
-operator != ( const VectorT& vector ) const
-{
-   return Containers::Array< Real, Device, Index >::operator != ( vector );
-}
-
 template< typename Real,
           typename Device,
           typename Index >
@@ -387,50 +324,5 @@ computeExclusivePrefixSum( const IndexType begin,
    Algorithms::VectorOperations< Device >::computeExclusivePrefixSum( *this, begin, end );
 }
 
-
-#ifdef UNDEF //TEMPLATE_EXPLICIT_INSTANTIATION
-
-#ifdef INSTANTIATE_FLOAT
-extern template class Vector< float, Devices::Host, int >;
-extern template Vector< float, Devices::Host, int >& Vector< float, Devices::Host, int >:: operator = ( const Vector< double, Devices::Host, int >& vector );
-#endif
-
-extern template class Vector< double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template class Vector< long double, Devices::Host, int >;
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-extern template class Vector< float, Devices::Host, long int >;
-#endif
-extern template class Vector< double, Devices::Host, long int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template class Vector< long double, Devices::Host, long int >;
-#endif
-#endif
-
-#ifdef HAVE_CUDA
-#ifdef INSTANTIATE_FLOAT
-extern template class Vector< float, Devices::Cuda, int >;
-#endif
-extern template class Vector< double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template class Vector< long double, Devices::Cuda, int >;
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-#ifdef INSTANTIATE_FLOAT
-extern template class Vector< float, Devices::Cuda, long int >;
-#endif
-extern template class Vector< double, Devices::Cuda, long int >;
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template class Vector< long double, Devices::Cuda, long int >;
-#endif
-#endif
-#endif
-
-#endif
-
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/CudaStreamPool.h b/src/TNL/CudaStreamPool.h
index aa0a3e5d1e2ddee0d4416040ae9815f5242955f6..1dd2b7907fe39b53e331b0147fff1cabe16424ef 100644
--- a/src/TNL/CudaStreamPool.h
+++ b/src/TNL/CudaStreamPool.h
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          UniquePointer.h  -  description
+                          CudaStreamPool.h  -  description
                              -------------------
     begin                : Oct 14, 2016
     copyright            : (C) 2016 by Tomas Oberhuber et al.
diff --git a/src/TNL/Devices/Cuda.cpp b/src/TNL/Devices/Cuda.cpp
index 2c8f85aeca4c1fc27a76c14115c7e29cda8c4251..1bc85e3c829ff4a2f44518f4016c7ef90a447b27 100644
--- a/src/TNL/Devices/Cuda.cpp
+++ b/src/TNL/Devices/Cuda.cpp
@@ -78,12 +78,16 @@ void Cuda::removeSmartPointer( SmartPointer* pointer )
 
 bool Cuda::synchronizeDevice( int deviceId )
 {
+#ifdef HAVE_CUDA_UNIFIED_MEMORY
+   return true;
+#else
    if( deviceId < 0 )
       deviceId = Devices::CudaDeviceInfo::getActiveDevice();
    smartPointersSynchronizationTimer.start();
    bool b = smartPointersRegister.synchronizeDevice( deviceId );
    smartPointersSynchronizationTimer.stop();
    return b;
+#endif
 }
 
 } // namespace Devices
diff --git a/src/TNL/Devices/Cuda.h b/src/TNL/Devices/Cuda.h
index 1baa50b1e7ee38a3e6a885bfcd2c6da01d09f39c..c73e327e9ac84ab10752a66e783a46da1b288c72 100644
--- a/src/TNL/Devices/Cuda.h
+++ b/src/TNL/Devices/Cuda.h
@@ -14,7 +14,7 @@
 #include <unistd.h>
 #include <TNL/String.h>
 #include <TNL/Assert.h>
-#include <TNL/SmartPointersRegister.h>
+#include <TNL/Pointers/SmartPointersRegister.h>
 #include <TNL/Timer.h>
 #include <TNL/Devices/CudaCallable.h>
 
diff --git a/src/TNL/Devices/MIC.h b/src/TNL/Devices/MIC.h
index 36678c0d27da1c873ee4bf0da2e71616c012d2bc..776b7c36fe7a44fd0063142708fd0789f350c45c 100644
--- a/src/TNL/Devices/MIC.h
+++ b/src/TNL/Devices/MIC.h
@@ -17,7 +17,7 @@
 #include <unistd.h>
 #include <TNL/String.h>
 #include <TNL/Assert.h>
-#include <TNL/SmartPointersRegister.h>
+#include <TNL/Pointers/SmartPointersRegister.h>
 #include <TNL/Timer.h>
 
 #include <TNL/Devices/CudaCallable.h>
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/MainBuildConfig.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/MainBuildConfig.h
index e829cc64f398dafcd3d6b890e9fe7756b94b4676..f8f9187fa514cc9d836bc8072a7adbfddd5f8216 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/MainBuildConfig.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/MainBuildConfig.h
@@ -11,6 +11,7 @@
 #pragma once
 
 #include <TNL/Solvers/BuildConfigTags.h>
+#include <TNL/Meshes/BuildConfigTags.h>
 
 namespace TNL {
 
@@ -22,7 +23,7 @@ namespace Solvers {
 /****
  * Turn off support for float and long double.
  */
-//template<> struct ConfigTagReal< HamiltonJacobiBuildConfig, float > { enum { enabled = false }; };
+template<> struct ConfigTagReal< HamiltonJacobiBuildConfig, float > { enum { enabled = false }; };
 template<> struct ConfigTagReal< HamiltonJacobiBuildConfig, long double > { enum { enabled = false }; };
 
 /****
@@ -54,4 +55,22 @@ template<> struct ConfigTagTimeDiscretisation< HamiltonJacobiBuildConfig, Implic
 //template<> struct ConfigTagExplicitSolver< HamiltonJacobiBuildConfig, ExplicitEulerSolverTag >{ enum { enabled = false }; };
 
 } // namespace Solvers
+
+namespace Meshes {
+namespace BuildConfigTags {
+
+/****
+ * Turn off support for float and long double.
+ */
+template<> struct GridRealTag< HamiltonJacobiBuildConfig, float > { enum { enabled = false }; };
+template<> struct GridRealTag< HamiltonJacobiBuildConfig, long double > { enum { enabled = false }; };
+
+/****
+ * Turn off support for short int and long int indexing.
+ */
+template<> struct GridIndexTag< HamiltonJacobiBuildConfig, short int >{ enum { enabled = false }; };
+template<> struct GridIndexTag< HamiltonJacobiBuildConfig, long int >{ enum { enabled = false }; };
+
+} // namespace BuildConfigTags
+} // namespace Meshes
 } // namespace TNL
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h
index a406a5b1772033bed2da5ab17330c892119923dc..b981a92a8cb5c7d495736ab1c12cb4b891167bee 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h
@@ -31,8 +31,8 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >
       typedef Index IndexType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       typedef Functions::MeshFunction< MeshType, 1, bool > InterfaceMapType;
-      using MeshFunctionPointer = SharedPointer< MeshFunctionType >;
-      using InterfaceMapPointer = SharedPointer< InterfaceMapType >;
+      using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >;
+      using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >;
       
       void initInterface( const MeshFunctionPointer& input,
                           MeshFunctionPointer& output,
@@ -61,8 +61,8 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >
       typedef Index IndexType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       typedef Functions::MeshFunction< MeshType, 2, bool > InterfaceMapType;
-      using MeshFunctionPointer = SharedPointer< MeshFunctionType >;
-      using InterfaceMapPointer = SharedPointer< InterfaceMapType >;      
+      using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >;
+      using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >;      
 
       void initInterface( const MeshFunctionPointer& input,
                           MeshFunctionPointer& output,
@@ -90,8 +90,8 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >
       typedef Index IndexType;
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       typedef Functions::MeshFunction< MeshType, 3, bool > InterfaceMapType;
-      using MeshFunctionPointer = SharedPointer< MeshFunctionType >;
-      using InterfaceMapPointer = SharedPointer< InterfaceMapType >;      
+      using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >;
+      using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >;      
 
       void initInterface( const MeshFunctionPointer& input,
                           MeshFunctionPointer& output,
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalProblem.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalProblem.h
index f7e1f1d386b061937417c0906590290f5a6703fa..0dde93802513e8cd3ca576b35cb8093d0d54b070 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalProblem.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalProblem.h
@@ -15,7 +15,7 @@
 
 #include <TNL/Problems/PDEProblem.h>
 #include <TNL/Functions/MeshFunction.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 #include "tnlFastSweepingMethod.h"
 
 template< typename Mesh,
@@ -38,13 +38,13 @@ class tnlDirectEikonalProblem
       typedef Functions::MeshFunction< Mesh > MeshFunctionType;
       typedef Problems::PDEProblem< Mesh, Communicator, RealType, DeviceType, IndexType > BaseType;
       using AnisotropyType = Anisotropy;
-      using AnisotropyPointer = SharedPointer< AnisotropyType, DeviceType >;
-      using MeshFunctionPointer = SharedPointer< MeshFunctionType >;
+      using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >;
+      using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >;
 
       using typename BaseType::MeshType;
       using typename BaseType::DofVectorType;
-      using MeshPointer = SharedPointer< MeshType >;
-      using DofVectorPointer = SharedPointer< DofVectorType >;
+      using MeshPointer = Pointers::SharedPointer< MeshType >;
+      using DofVectorPointer = Pointers::SharedPointer< DofVectorType >;
       
       static constexpr bool isTimeDependent() { return false; };
 
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h
index 54e577dacde239171ae299ee62b078bd7a406c35..fa807742735f8beaa034b9a2555d3bc4a57f9f8e 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h
@@ -12,7 +12,7 @@
 
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Functions/Analytic/Constant.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 #include "tnlDirectEikonalMethodsBase.h"
 
 
@@ -39,8 +39,9 @@ class FastSweepingMethod< Meshes::Grid< 1, Real, Device, Index >, Anisotropy >
       typedef Index IndexType;
       typedef Anisotropy AnisotropyType;
       typedef tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > BaseType;
-      using MeshPointer = SharedPointer< MeshType >;
-      using AnisotropyPointer = SharedPointer< AnisotropyType, DeviceType >;
+      using MeshPointer = Pointers::SharedPointer<  MeshType >;
+      using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >;
+      
       
       using typename BaseType::InterfaceMapType;
       using typename BaseType::MeshFunctionType;
@@ -81,8 +82,8 @@ class FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >
       typedef Index IndexType;
       typedef Anisotropy AnisotropyType;
       typedef tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > BaseType;
-      using MeshPointer = SharedPointer< MeshType >;
-      using AnisotropyPointer = SharedPointer< AnisotropyType, DeviceType >;
+      using MeshPointer = Pointers::SharedPointer<  MeshType >;
+      using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >;
 
       using typename BaseType::InterfaceMapType;
       using typename BaseType::MeshFunctionType;
@@ -121,8 +122,8 @@ class FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Anisotropy >
       typedef Index IndexType;
       typedef Anisotropy AnisotropyType;
       typedef tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > BaseType;
-      using MeshPointer = SharedPointer< MeshType >;
-      using AnisotropyPointer = SharedPointer< AnisotropyType, DeviceType >;
+      using MeshPointer = Pointers::SharedPointer<  MeshType >;
+      using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >;
       
       using typename BaseType::InterfaceMapType;
       using typename BaseType::MeshFunctionType;
diff --git a/src/TNL/Functions/MeshFunction.h b/src/TNL/Functions/MeshFunction.h
index 41765c729a3f13fa4b3b7a9f720256b9cbdf83d5..4ccdab9f312433d262e654eb4686df45216406fc 100644
--- a/src/TNL/Functions/MeshFunction.h
+++ b/src/TNL/Functions/MeshFunction.h
@@ -14,7 +14,7 @@
 #include <TNL/Functions/Domain.h>
 #include <TNL/Functions/MeshFunctionGnuplotWriter.h>
 #include <TNL/Functions/MeshFunctionVTKWriter.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 #include <TNL/Meshes/DistributedMeshes/DistributedMesh.h>
 #include <TNL/Meshes/DistributedMeshes/DistributedMeshSynchronizer.h>
 
@@ -36,7 +36,7 @@ class MeshFunction :
       using MeshType = Mesh;
       using DeviceType = typename MeshType::DeviceType;
       using IndexType = typename MeshType::GlobalIndexType;
-      using MeshPointer = SharedPointer< MeshType >;      
+      using MeshPointer = Pointers::SharedPointer< MeshType >;      
       using RealType = Real;
       using VectorType = Containers::Vector< RealType, DeviceType, IndexType >;
       using ThisType = Functions::MeshFunction< MeshType, MeshEntityDimension, RealType >;
@@ -61,7 +61,7 @@ class MeshFunction :
       
       template< typename Vector >
       MeshFunction( const MeshPointer& meshPointer,
-                    SharedPointer< Vector >& data,
+                    Pointers::SharedPointer<  Vector >& data,
                     const IndexType& offset = 0 );      
  
       static String getType();
@@ -80,6 +80,10 @@ class MeshFunction :
                   const String& prefix = "" );
  
       void bind( ThisType& meshFunction );
+      
+      template< typename Vector >
+      void bind( const Vector& data,
+                 const IndexType& offset = 0 );
  
       template< typename Vector >
       void bind( const MeshPointer& meshPointer,
@@ -88,7 +92,7 @@ class MeshFunction :
       
       template< typename Vector >
       void bind( const MeshPointer& meshPointer,
-                 const SharedPointer< Vector >& dataPtr,
+                 const Pointers::SharedPointer<  Vector >& dataPtr,
                  const IndexType& offset = 0 );
       
       void setMesh( const MeshPointer& meshPointer );
@@ -161,8 +165,11 @@ class MeshFunction :
  
       using Object::boundLoad;
 
-      template< typename CommunicatorType>
-      void synchronize( bool withPeriodicBoundaryConditions = false );
+      template< typename CommunicatorType,
+                typename PeriodicBoundariesMaskType = MeshFunction< Mesh, MeshEntityDimension, bool > >
+      void synchronize( bool withPeriodicBoundaryConditions = false,
+                        const Pointers::SharedPointer< PeriodicBoundariesMaskType, DeviceType >& mask =
+                           Pointers::SharedPointer< PeriodicBoundariesMaskType, DeviceType >( nullptr ) );
 
  
    protected:
@@ -181,6 +188,11 @@ class MeshFunction :
    
 };
 
+template< typename Mesh,
+          int MeshEntityDimension,
+          typename Real >
+std::ostream& operator << ( std::ostream& str, const MeshFunction< Mesh, MeshEntityDimension, Real >& f );
+
 } // namespace Functions
 } // namespace TNL
 
diff --git a/src/TNL/Functions/MeshFunctionEvaluator_impl.h b/src/TNL/Functions/MeshFunctionEvaluator_impl.h
index 540ae7077b73baab728c215c425b6bedfc478f1e..48eb961785fb2dab2c51f7fff8ea6f5fbda07a70 100644
--- a/src/TNL/Functions/MeshFunctionEvaluator_impl.h
+++ b/src/TNL/Functions/MeshFunctionEvaluator_impl.h
@@ -119,10 +119,8 @@ evaluateEntities( OutMeshFunctionPointer& meshFunction,
    typedef Functions::MeshFunctionEvaluatorAssignmentEntitiesProcessor< MeshType, TraverserUserData > AssignmentEntitiesProcessor;
    typedef Functions::MeshFunctionEvaluatorAdditionEntitiesProcessor< MeshType, TraverserUserData > AdditionEntitiesProcessor;
    //typedef typename OutMeshFunction::MeshPointer OutMeshPointer;
-   typedef SharedPointer< TraverserUserData, DeviceType > TraverserUserDataPointer;
    
-   SharedPointer< TraverserUserData, DeviceType >
-      userData( &function.template getData< DeviceType >(),
+   TraverserUserData userData( &function.template getData< DeviceType >(),
                 time,
                 &meshFunction.template modifyData< DeviceType >(),
                 outFunctionMultiplicator,
diff --git a/src/TNL/Functions/MeshFunction_impl.h b/src/TNL/Functions/MeshFunction_impl.h
index 8247ed19a0c58a0b52e8877cf2708a2862517cd2..3a8c74b6785b68357707ff9e7349a42a67e07f5f 100644
--- a/src/TNL/Functions/MeshFunction_impl.h
+++ b/src/TNL/Functions/MeshFunction_impl.h
@@ -9,7 +9,7 @@
 /* See Copyright Notice in tnl/Copyright */
 
 #include <TNL/Assert.h>
-#include <TNL/DevicePointer.h>
+#include <TNL/Pointers/DevicePointer.h>
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Functions/MeshFunctionEvaluator.h>
 #include <TNL/Functions/MeshFunctionNormGetter.h>
@@ -40,9 +40,6 @@ MeshFunction( const MeshPointer& meshPointer )
 
    this->meshPointer=meshPointer;
    this->data.setSize( getMesh().template getEntitiesCount< typename Mesh::template EntityType< MeshEntityDimension > >() );
-   TNL_ASSERT( this->data.getSize() == this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >(), 
-               std::cerr << "this->data.getSize() = " << this->data.getSize() << std::endl
-                         << "this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >() = " << this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >() );
 }
 
 template< typename Mesh,
@@ -69,7 +66,7 @@ MeshFunction( const MeshPointer& meshPointer,
 //: meshPointer( meshPointer )
 {
    TNL_ASSERT_GE( data.getSize(), meshPointer->template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >(),
-                  "The input vector is not large enough for binding to the mesh function." );      
+                  "The input vector is not large enough for binding to the mesh function." );
     setupSynchronizer(meshPointer->getDistributedMesh());
 
    this->meshPointer=meshPointer;
@@ -83,12 +80,12 @@ template< typename Mesh,
    template< typename Vector >
 MeshFunction< Mesh, MeshEntityDimension, Real >::
 MeshFunction( const MeshPointer& meshPointer,
-              SharedPointer< Vector >& data,
+              Pointers::SharedPointer<  Vector >& data,
               const IndexType& offset )
 //: meshPointer( meshPointer )
 {
    TNL_ASSERT_GE( data->getSize(), offset + meshPointer->template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >(),
-                  "The input vector is not large enough for binding to the mesh function." );      
+                  "The input vector is not large enough for binding to the mesh function." );
 
     setupSynchronizer(meshPointer->getDistributedMesh());
 
@@ -194,6 +191,20 @@ bind( ThisType& meshFunction )
    this->data.bind( meshFunction.getData() );
 }
 
+template< typename Mesh,
+          int MeshEntityDimension,
+          typename Real >
+   template< typename Vector >
+void
+MeshFunction< Mesh, MeshEntityDimension, Real >::
+bind( const Vector& data,
+      const IndexType& offset )
+{
+   TNL_ASSERT_GE( data.getSize(), offset + meshPointer->template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >(),
+                  "The input vector is not large enough for binding to the mesh function." );
+   this->data.bind( data, offset, getMesh().template getEntitiesCount< typename Mesh::template EntityType< MeshEntityDimension > >() );
+}
+
 template< typename Mesh,
           int MeshEntityDimension,
           typename Real >
@@ -204,9 +215,9 @@ bind( const MeshPointer& meshPointer,
       const Vector& data,
       const IndexType& offset )
 {
-   TNL_ASSERT_GE( data.getSize(), offset + meshPointer->template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >(), 
-                  "The input vector is not large enough for binding to the mesh function." );    
-   
+   TNL_ASSERT_GE( data.getSize(), offset + meshPointer->template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >(),
+                  "The input vector is not large enough for binding to the mesh function." );
+
    setupSynchronizer(meshPointer->getDistributedMesh());
    this->meshPointer=meshPointer;
    this->data.bind( data, offset, getMesh().template getEntitiesCount< typename Mesh::template EntityType< MeshEntityDimension > >() );
@@ -219,11 +230,11 @@ template< typename Mesh,
 void
 MeshFunction< Mesh, MeshEntityDimension, Real >::
 bind( const MeshPointer& meshPointer,
-      const SharedPointer< Vector >& data,
+      const Pointers::SharedPointer<  Vector >& data,
       const IndexType& offset )
 {
-   TNL_ASSERT_GE( data->getSize(), offset + meshPointer->template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >(), 
-                   "The input vector is not large enough for binding to the mesh function." );      
+   TNL_ASSERT_GE( data->getSize(), offset + meshPointer->template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >(),
+                  "The input vector is not large enough for binding to the mesh function." );
 
    setupSynchronizer(meshPointer->getDistributedMesh());
    this->meshPointer=meshPointer;
@@ -402,8 +413,8 @@ MeshFunction< Mesh, MeshEntityDimension, Real >&
 MeshFunction< Mesh, MeshEntityDimension, Real >::
 operator = ( const Function& f )
 {
-   DevicePointer< ThisType > thisDevicePtr( *this );
-   DevicePointer< typename std::add_const< Function >::type > fDevicePtr( f );
+   Pointers::DevicePointer< ThisType > thisDevicePtr( *this );
+   Pointers::DevicePointer< typename std::add_const< Function >::type > fDevicePtr( f );
    MeshFunctionEvaluator< ThisType, Function >::evaluate( thisDevicePtr, fDevicePtr );
    return *this;
 }
@@ -416,8 +427,8 @@ MeshFunction< Mesh, MeshEntityDimension, Real >&
 MeshFunction< Mesh, MeshEntityDimension, Real >::
 operator += ( const Function& f )
 {
-   DevicePointer< ThisType > thisDevicePtr( *this );
-   DevicePointer< typename std::add_const< Function >::type > fDevicePtr( f );
+   Pointers::DevicePointer< ThisType > thisDevicePtr( *this );
+   Pointers::DevicePointer< typename std::add_const< Function >::type > fDevicePtr( f );
    MeshFunctionEvaluator< ThisType, Function >::evaluate( thisDevicePtr, fDevicePtr, 1.0, 1.0 );
    return *this;
 }
@@ -430,8 +441,8 @@ MeshFunction< Mesh, MeshEntityDimension, Real >&
 MeshFunction< Mesh, MeshEntityDimension, Real >::
 operator -= ( const Function& f )
 {
-   DevicePointer< ThisType > thisDevicePtr( *this );
-   DevicePointer< typename std::add_const< Function >::type > fDevicePtr( f );
+   Pointers::DevicePointer< ThisType > thisDevicePtr( *this );
+   Pointers::DevicePointer< typename std::add_const< Function >::type > fDevicePtr( f );
    MeshFunctionEvaluator< ThisType, Function >::evaluate( thisDevicePtr, fDevicePtr, 1.0, -1.0 );
    return *this;
 }
@@ -463,11 +474,10 @@ bool
 MeshFunction< Mesh, MeshEntityDimension, Real >::
 save( File& file ) const
 {
-   TNL_ASSERT( this->data.getSize() == this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >(), 
-               std::cerr << "this->data.getSize() = " << this->data.getSize() << std::endl
-                         << "this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >() = " << this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >() );
+   TNL_ASSERT_EQ( this->data.getSize(), this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >(),
+                  "Size of the mesh function data does not match the mesh." );
    if( ! Object::save( file ) )
-      return false;              
+      return false;
    return this->data.save( file );
 }
 
@@ -479,12 +489,12 @@ MeshFunction< Mesh, MeshEntityDimension, Real >::
 load( File& file )
 {
    if( ! Object::load( file ) )
-      return false;   
+      return false;
    if( ! this->data.load( file ) )
       return false;
    const IndexType meshSize = this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >();
    if( this->data.getSize() != meshSize )
-   {      
+   {
       std::cerr << "Size of the data loaded to the mesh function (" << this->data.getSize() << ") does not fit with the mesh size (" << meshSize << ")." << std::endl;
       return false;
    }
@@ -533,15 +543,17 @@ write( const String& fileName,
 template< typename Mesh,
           int MeshEntityDimension,
           typename Real >
-template< typename CommunicatorType>
+template< typename CommunicatorType,
+          typename PeriodicBoundariesMaskType >
 void
 MeshFunction< Mesh, MeshEntityDimension, Real >:: 
-synchronize( bool periodicBoundaries )
+synchronize( bool periodicBoundaries,
+             const Pointers::SharedPointer< PeriodicBoundariesMaskType, DeviceType >& mask )
 {
     auto distrMesh = this->getMesh().getDistributedMesh();
     if(distrMesh != NULL && distrMesh->isDistributed())
     {
-        this->synchronizer.template synchronize<CommunicatorType>( *this, periodicBoundaries );
+        this->synchronizer.template synchronize<CommunicatorType>( *this, periodicBoundaries, mask );
     }
 }
 
@@ -556,7 +568,17 @@ setupSynchronizer( DistributedMeshType *distributedMesh )
       this->synchronizer.setDistributedGrid( distributedMesh );
 }
 
- 
+template< typename Mesh,
+          int MeshEntityDimension,
+          typename Real >
+std::ostream&
+operator << ( std::ostream& str, const MeshFunction< Mesh, MeshEntityDimension, Real >& f )
+{
+   str << f.getData();
+   return str;
+}
+
+
 } // namespace Functions
 } // namespace TNL
 
diff --git a/src/TNL/Functions/OperatorFunction.h b/src/TNL/Functions/OperatorFunction.h
index c5dbde42363c27f2d58d53018d7efb41096b76ad..33262fd4c63bf7d0f93a6b44c8a4b78e7d4f4863 100644
--- a/src/TNL/Functions/OperatorFunction.h
+++ b/src/TNL/Functions/OperatorFunction.h
@@ -74,7 +74,7 @@ class OperatorFunction< Operator, MeshFunctionT, void, true, IsAnalytic >
       typedef typename OperatorType::IndexType IndexType;
       typedef typename OperatorType::ExactOperatorType ExactOperatorType;
       typedef MeshFunction< MeshType, OperatorType::getPreimageEntitiesDimension() > PreimageFunctionType;
-      typedef SharedPointer< MeshType, DeviceType > MeshPointer;
+      typedef Pointers::SharedPointer<  MeshType, DeviceType > MeshPointer;
       
       static constexpr int getEntitiesDimension() { return OperatorType::getImageEntitiesDimension(); };     
       
@@ -154,7 +154,7 @@ class OperatorFunction< Operator, PreimageFunction, void, false, IsAnalytic >
       typedef Functions::MeshFunction< MeshType, Operator::getImageEntitiesDimension() > ImageFunctionType;
       typedef OperatorFunction< Operator, PreimageFunction, void, true > OperatorFunctionType;
       typedef typename OperatorType::ExactOperatorType ExactOperatorType;
-      typedef SharedPointer< MeshType, DeviceType > MeshPointer;
+      typedef Pointers::SharedPointer<  MeshType, DeviceType > MeshPointer;
       
       static constexpr int getEntitiesDimension() { return OperatorType::getImageEntitiesDimension(); };     
       
@@ -255,7 +255,7 @@ class OperatorFunction< Operator, Function, BoundaryConditions, false, IsAnalyti
  
       typedef Operator OperatorType;
       typedef typename OperatorType::MeshType MeshType;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer<  MeshType > MeshPointer;
       typedef typename OperatorType::RealType RealType;
       typedef typename OperatorType::DeviceType DeviceType;
       typedef typename OperatorType::IndexType IndexType;
diff --git a/src/TNL/Functions/TestFunction_impl.h b/src/TNL/Functions/TestFunction_impl.h
index 5231dede7574e6904ab7648049cb2609bd9e54b8..5a7e76485339306c4e506202d3574d59571e6b45 100644
--- a/src/TNL/Functions/TestFunction_impl.h
+++ b/src/TNL/Functions/TestFunction_impl.h
@@ -37,6 +37,8 @@
 #include <TNL/Operators/Analytic/Heaviside.h>
 #include <TNL/Operators/Analytic/SmoothHeaviside.h>
 
+#include "TestFunction.h"
+
 namespace TNL {
 namespace Functions {   
 
@@ -423,6 +425,7 @@ TestFunction< FunctionDimension, Real, Device >::
 getPartialDerivative( const PointType& vertex,
           const Real& time ) const
 {
+   TNL_ASSERT_TRUE( this->function, "The test function was not set properly." );
    using namespace TNL::Functions::Analytic;
    using namespace TNL::Operators::Analytic;
    Real scale( 1.0 );
diff --git a/src/TNL/Functions/VectorField.h b/src/TNL/Functions/VectorField.h
index dd347389ae7750487d8c35a151898ae7e9a310b5..a873165b401fa4f8dc10c7c6fac31682cb81dd2b 100644
--- a/src/TNL/Functions/VectorField.h
+++ b/src/TNL/Functions/VectorField.h
@@ -84,9 +84,9 @@ class VectorField< Size, MeshFunction< Mesh, MeshEntityDimension, Real > >
       
       typedef Mesh MeshType;
       typedef Real RealType;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer<  MeshType > MeshPointer;
       typedef MeshFunction< MeshType, MeshEntityDimension, RealType > FunctionType;
-      typedef SharedPointer< FunctionType > FunctionPointer;
+      typedef Pointers::SharedPointer<  FunctionType > FunctionPointer;
       typedef typename MeshType::DeviceType DeviceType;
       typedef typename MeshType::GlobalIndexType IndexType;
       typedef VectorField< Size, MeshFunction< Mesh, MeshEntityDimension, RealType > > ThisType;
@@ -201,7 +201,7 @@ class VectorField< Size, MeshFunction< Mesh, MeshEntityDimension, Real > >
       
       template< typename Vector >
       void bind( const MeshPointer& meshPointer,
-                 const SharedPointer< Vector >& dataPtr,
+                 const Pointers::SharedPointer< Vector >& dataPtr,
                  IndexType offset = 0 )
       {
          TNL_ASSERT_GE( dataPtr->getSize(), offset + Size * this->vectorField[ 0 ]->getDofs( meshPointer ),
diff --git a/src/TNL/Functions/VectorFieldEvaluator_impl.h b/src/TNL/Functions/VectorFieldEvaluator_impl.h
index 56668ef24f5d6a3d8163382a6b251b439cdd22f8..596b52e70628079aed186925a52a980a32128ba9 100644
--- a/src/TNL/Functions/VectorFieldEvaluator_impl.h
+++ b/src/TNL/Functions/VectorFieldEvaluator_impl.h
@@ -119,7 +119,7 @@ evaluateEntities( OutVectorFieldPointer& meshFunction,
    typedef Functions::VectorFieldEvaluatorAssignmentEntitiesProcessor< MeshType, TraverserUserData > AssignmentEntitiesProcessor;
    typedef Functions::VectorFieldEvaluatorAdditionEntitiesProcessor< MeshType, TraverserUserData > AdditionEntitiesProcessor;
    //typedef typename OutVectorField::MeshPointer OutMeshPointer;
-   typedef SharedPointer< TraverserUserData, DeviceType > TraverserUserDataPointer;
+   typedef Pointers::SharedPointer< TraverserUserData, DeviceType > TraverserUserDataPointer;
    
    SharedPointer< TraverserUserData, DeviceType >
       userData( &function.template getData< DeviceType >(),
diff --git a/src/TNL/Logger.cpp b/src/TNL/Logger.cpp
index 74a609003d2f5e5fdbff2a262a2964484895478f..988d4ed68edb1c9674b3370a978712bf3901edcd 100644
--- a/src/TNL/Logger.cpp
+++ b/src/TNL/Logger.cpp
@@ -54,30 +54,4 @@ void Logger :: writeCurrentTime( const char* label )
    writeParameter< String >( label, Devices::SystemInfo::getCurrentTime() );
 }
 
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-template void Logger::writeParameter< char* >( const String&,
-                                               const String&,
-                                               const Config::ParameterContainer&,
-                                               int );
-template void Logger::writeParameter< double >( const String&,
-                                                const String&,
-                                                const Config::ParameterContainer&,
-                                                int );
-template void Logger::writeParameter< int >( const String&,
-                                             const String&,
-                                             const Config::ParameterContainer&,
-                                             int );
-
-// TODO: fix this
-//template void Logger :: WriteParameter< char* >( const char*,
-//                                                 const char*&,
-//                                                 int );
-template void Logger::writeParameter< double >( const String&,
-                                                const double&,
-                                                int );
-template void Logger::writeParameter< int >( const String&,
-                                             const int&,
-                                             int );
-#endif
-
 } // namespace TNL
diff --git a/src/TNL/Logger.h b/src/TNL/Logger.h
index 791398649d8d61f89a984cbb8255bb0be22a69d9..5b7fda238a90468ad4aedd984eb55b40d3dd7077 100644
--- a/src/TNL/Logger.h
+++ b/src/TNL/Logger.h
@@ -53,33 +53,3 @@ class Logger
 } // namespace TNL
 
 #include <TNL/Logger_impl.h>
-
-namespace TNL {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-extern template void Logger::writeParameter< char* >( const String&,
-                                                      const String&,
-                                                      const Config::ParameterContainer&,
-                                                      int );
-extern template void Logger::writeParameter< double >( const String&,
-                                                       const String&,
-                                                       const Config::ParameterContainer&,
-                                                       int );
-extern template void Logger::writeParameter< int >( const String&,
-                                                    const String&,
-                                                    const Config::ParameterContainer&,
-                                                    int );
-
-// TODO: fix this
-//extern template void Logger :: WriteParameter< char* >( const char*,
-//                                                        const char*&,
-//                                                        int );
-extern template void Logger::writeParameter< double >( const String&,
-                                                       const double&,
-                                                       int );
-extern template void Logger::writeParameter< int >( const String&,
-                                                    const int&,
-                                                    int );
-#endif
-
-} // namespace TNL
diff --git a/src/TNL/Matrices/CSR.h b/src/TNL/Matrices/CSR.h
index c6d7dd5b7f48806635ddb9b49c9b3412e92f51cc..f6d4c6d31b895476e16bc2ce2616a9bc77fce505 100644
--- a/src/TNL/Matrices/CSR.h
+++ b/src/TNL/Matrices/CSR.h
@@ -164,10 +164,10 @@ public:
    void getTransposition( const CSR< Real2, Device, Index2 >& matrix,
                           const RealType& matrixMultiplicator = 1.0 );
 
-   template< typename Vector >
-   bool performSORIteration( const Vector& b,
+   template< typename Vector1, typename Vector2 >
+   bool performSORIteration( const Vector1& b,
                              const IndexType row,
-                             Vector& x,
+                             Vector2& x,
                              const RealType& omega = 1.0 ) const;
 
    // copy assignment
diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/CSR_impl.h
index 9e22c5a9d6abd27be5523e1428aa318dcd802f42..1516e932231c900c6e56b5442b88133cd5267a1a 100644
--- a/src/TNL/Matrices/CSR_impl.h
+++ b/src/TNL/Matrices/CSR_impl.h
@@ -494,10 +494,10 @@ void CSR< Real, Device, Index >::getTransposition( const CSR< Real2, Device, Ind
 template< typename Real,
           typename Device,
           typename Index >
-   template< typename Vector >
-bool CSR< Real, Device, Index >::performSORIteration( const Vector& b,
+   template< typename Vector1, typename Vector2 >
+bool CSR< Real, Device, Index >::performSORIteration( const Vector1& b,
                                                       const IndexType row,
-                                                      Vector& x,
+                                                      Vector2& x,
                                                       const RealType& omega ) const
 {
    TNL_ASSERT( row >=0 && row < this->getRows(),
diff --git a/src/TNL/Matrices/ChunkedEllpack.h b/src/TNL/Matrices/ChunkedEllpack.h
index ba14092163815ca30e7f296705d00013ed84e124..8c4a47a320b6a38fff9ab7c9622f79c3caae6b4b 100644
--- a/src/TNL/Matrices/ChunkedEllpack.h
+++ b/src/TNL/Matrices/ChunkedEllpack.h
@@ -224,10 +224,10 @@ public:
    void getTransposition( const ChunkedEllpack< Real2, Device, Index2 >& matrix,
                           const RealType& matrixMultiplicator = 1.0 );
 
-   template< typename Vector >
-   bool performSORIteration( const Vector& b,
+   template< typename Vector1, typename Vector2 >
+   bool performSORIteration( const Vector1& b,
                              const IndexType row,
-                             Vector& x,
+                             Vector2& x,
                              const RealType& omega = 1.0 ) const;
 
    // copy assignment
diff --git a/src/TNL/Matrices/ChunkedEllpack_impl.h b/src/TNL/Matrices/ChunkedEllpack_impl.h
index 5c5c71543c82c10cc2da55728fd7352d83b943b0..1a47fe4e608fa2d5087eb404730356517316a913 100644
--- a/src/TNL/Matrices/ChunkedEllpack_impl.h
+++ b/src/TNL/Matrices/ChunkedEllpack_impl.h
@@ -1154,11 +1154,11 @@ void ChunkedEllpack< Real, Device, Index >::getTransposition( const ChunkedEllpa
 template< typename Real,
           typename Device,
           typename Index >
-   template< typename Vector >
-bool ChunkedEllpack< Real, Device, Index >::performSORIteration( const Vector& b,
-                                                                                    const IndexType row,
-                                                                                    Vector& x,
-                                                                                    const RealType& omega ) const
+   template< typename Vector1, typename Vector2 >
+bool ChunkedEllpack< Real, Device, Index >::performSORIteration( const Vector1& b,
+                                                                 const IndexType row,
+                                                                 Vector2& x,
+                                                                 const RealType& omega ) const
 {
    TNL_ASSERT( row >=0 && row < this->getRows(),
               std::cerr << "row = " << row
diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index 3904f5c059b210ae9b61aa0f1455d4a2ca762964..2de30b3f96f5cf830f4f8c476bbec8025b00ad66 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -87,6 +87,14 @@ public:
 
    void setValue( const RealType& v );
 
+   __cuda_callable__
+   Real& operator()( const IndexType row,
+                     const IndexType column );
+
+   __cuda_callable__
+   const Real& operator()( const IndexType row,
+                           const IndexType column ) const;
+
    __cuda_callable__
    bool setElementFast( const IndexType row,
                         const IndexType column,
@@ -177,10 +185,10 @@ public:
    void getTransposition( const Matrix& matrix,
                           const RealType& matrixMultiplicator = 1.0 );
 
-   template< typename Vector >
-   void performSORIteration( const Vector& b,
+   template< typename Vector1, typename Vector2 >
+   void performSORIteration( const Vector1& b,
                              const IndexType row,
-                             Vector& x,
+                             Vector2& x,
                              const RealType& omega = 1.0 ) const;
 
    // copy assignment
diff --git a/src/TNL/Matrices/Dense_impl.h b/src/TNL/Matrices/Dense_impl.h
index 64d154779d7eaa2cc2ab7da0298c126015472703..32958f08b2e2551076f5a5e50dbdbc3cff50ba13 100644
--- a/src/TNL/Matrices/Dense_impl.h
+++ b/src/TNL/Matrices/Dense_impl.h
@@ -151,6 +151,33 @@ void Dense< Real, Device, Index >::setValue( const Real& value )
 }
 
 
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+Real& Dense< Real, Device, Index >::operator()( const IndexType row,
+                                                const IndexType column )
+{
+   TNL_ASSERT( row >= 0 && row < this->getRows() &&
+              column >= 0 && column < this->getColumns(),
+              printf( " row = %d, column = %d, this->getRows = %d, this->getColumns() = %d \n", row, column, this->getRows(), this->getColumns() ) );
+   return this->values.operator[]( this->getElementIndex( row, column ) );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+const Real& Dense< Real, Device, Index >::operator()( const IndexType row,
+                                                      const IndexType column ) const
+{
+   TNL_ASSERT( row >= 0 && row < this->getRows() &&
+              column >= 0 && column < this->getColumns(),
+              printf( " row = %d, column = %d, this->getRows = %d, this->getColumns() = %d \n", row, column, this->getRows(), this->getColumns() ) );
+   return this->values.operator[]( this->getElementIndex( row, column ) );
+}
+
+
 template< typename Real,
           typename Device,
           typename Index >
@@ -295,7 +322,7 @@ template< typename Real,
           typename Index >
 __cuda_callable__
 const Real& Dense< Real, Device, Index >::getElementFast( const IndexType row,
-                                                            const IndexType column ) const
+                                                          const IndexType column ) const
 {
    TNL_ASSERT( row >= 0 && row < this->getRows() &&
               column >= 0 && column < this->getColumns(),
@@ -847,11 +874,11 @@ void Dense< Real, Device, Index >::getTransposition( const Matrix& matrix,
 template< typename Real,
           typename Device,
           typename Index >
-   template< typename Vector >
-void Dense< Real, Device, Index >::performSORIteration( const Vector& b,
-                                                                 const IndexType row,
-                                                                 Vector& x,
-                                                                 const RealType& omega ) const
+   template< typename Vector1, typename Vector2 >
+void Dense< Real, Device, Index >::performSORIteration( const Vector1& b,
+                                                        const IndexType row,
+                                                        Vector2& x,
+                                                        const RealType& omega ) const
 {
    RealType sum( 0.0 ), diagonalValue;
    for( IndexType i = 0; i < this->getColumns(); i++ )
diff --git a/src/TNL/Matrices/Ellpack.h b/src/TNL/Matrices/Ellpack.h
index 3b4b00b3e0a30bb3fa3298149e7c37f02699b317..38333685bfbc59cd94dec2197463ca40557a57e5 100644
--- a/src/TNL/Matrices/Ellpack.h
+++ b/src/TNL/Matrices/Ellpack.h
@@ -161,10 +161,10 @@ public:
    void getTransposition( const Ellpack< Real2, Device, Index2 >& matrix,
                           const RealType& matrixMultiplicator = 1.0 );
 
-   template< typename Vector >
-   bool performSORIteration( const Vector& b,
+   template< typename Vector1, typename Vector2 >
+   bool performSORIteration( const Vector1& b,
                              const IndexType row,
-                             Vector& x,
+                             Vector2& x,
                              const RealType& omega = 1.0 ) const;
 
    template< typename Vector >
diff --git a/src/TNL/Matrices/Ellpack_impl.h b/src/TNL/Matrices/Ellpack_impl.h
index be72eb42e354f1b59676871a4426cd820d78eee7..9801b6bcac54bdff89337428d3b83968ebf3759a 100644
--- a/src/TNL/Matrices/Ellpack_impl.h
+++ b/src/TNL/Matrices/Ellpack_impl.h
@@ -540,11 +540,11 @@ void Ellpack< Real, Device, Index >::getTransposition( const Ellpack< Real2, Dev
 template< typename Real,
           typename Device,
           typename Index >
-   template< typename Vector >
-bool Ellpack< Real, Device, Index > :: performSORIteration( const Vector& b,
-                                                                           const IndexType row,
-                                                                           Vector& x,
-                                                                           const RealType& omega ) const
+   template< typename Vector1, typename Vector2 >
+bool Ellpack< Real, Device, Index > :: performSORIteration( const Vector1& b,
+                                                            const IndexType row,
+                                                            Vector2& x,
+                                                            const RealType& omega ) const
 {
    TNL_ASSERT( row >=0 && row < this->getRows(),
               std::cerr << "row = " << row
diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index 355ad05a80ca64e084d1fe4a3c69ca5b446f4c85..3271597f6eac18b7fa15a92cfd2e4331ef8a87cb 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -38,7 +38,9 @@ public:
 
    virtual IndexType getRowLength( const IndexType row ) const = 0;
 
-   virtual void getCompressedRowLengths( Containers::Vector< IndexType, DeviceType, IndexType >& rowLengths ) const;
+   // TODO: implementation is not parallel
+   // TODO: it would be nice if padding zeros could be stripped
+   virtual void getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const;
 
    template< typename Real2, typename Device2, typename Index2 >
    void setLike( const Matrix< Real2, Device2, Index2 >& matrix );
@@ -88,6 +90,7 @@ public:
    
    ValuesVector& getValues();
 
+   // TODO: parallelize and optimize for sparse matrices
    template< typename Matrix >
    bool operator == ( const Matrix& matrix ) const;
 
diff --git a/src/TNL/Matrices/MatrixSetter.h b/src/TNL/Matrices/MatrixSetter.h
index 8dd15f6a75c4079c20f6801168c4a0ec81da12be..ccc4e5fb77f3ff9502d2813bf13c70903d63ff12 100644
--- a/src/TNL/Matrices/MatrixSetter.h
+++ b/src/TNL/Matrices/MatrixSetter.h
@@ -16,7 +16,7 @@ namespace Matrices {
 template< typename DifferentialOperator,
           typename BoundaryConditions,
           typename CompressedRowLengthsVector >
-class MatrixSetterTraversalUserData
+class MatrixSetterTraverserUserData
 {
    public:
       
@@ -28,7 +28,7 @@ class MatrixSetterTraversalUserData
 
       CompressedRowLengthsVector* rowLengths;
 
-      MatrixSetterTraversalUserData( const DifferentialOperator* differentialOperator,
+      MatrixSetterTraverserUserData( const DifferentialOperator* differentialOperator,
                                      const BoundaryConditions* boundaryConditions,
                                      CompressedRowLengthsVector* rowLengths )
       : differentialOperator( differentialOperator ),
@@ -46,15 +46,15 @@ class MatrixSetter
 {
    public:
    typedef Mesh MeshType;
-   typedef SharedPointer< MeshType > MeshPointer;
+   typedef Pointers::SharedPointer<  MeshType > MeshPointer;
    typedef typename MeshType::DeviceType DeviceType;
    typedef typename CompressedRowLengthsVector::RealType IndexType;
-   typedef MatrixSetterTraversalUserData< DifferentialOperator,
+   typedef MatrixSetterTraverserUserData< DifferentialOperator,
                                           BoundaryConditions,
-                                          CompressedRowLengthsVector > TraversalUserData;
-   typedef SharedPointer< DifferentialOperator, DeviceType > DifferentialOperatorPointer;
-   typedef SharedPointer< BoundaryConditions, DeviceType > BoundaryConditionsPointer;
-   typedef SharedPointer< CompressedRowLengthsVector, DeviceType > CompressedRowLengthsVectorPointer;
+                                          CompressedRowLengthsVector > TraverserUserData;
+   typedef Pointers::SharedPointer<  DifferentialOperator, DeviceType > DifferentialOperatorPointer;
+   typedef Pointers::SharedPointer<  BoundaryConditions, DeviceType > BoundaryConditionsPointer;
+   typedef Pointers::SharedPointer<  CompressedRowLengthsVector, DeviceType > CompressedRowLengthsVectorPointer;
 
    template< typename EntityType >
    void getCompressedRowLengths( const MeshPointer& meshPointer,
@@ -62,14 +62,14 @@ class MatrixSetter
                                   const BoundaryConditionsPointer& boundaryConditionsPointer,
                                   CompressedRowLengthsVectorPointer& rowLengthsPointer ) const;
 
-   class TraversalBoundaryEntitiesProcessor
+   class TraverserBoundaryEntitiesProcessor
    {
       public:
 
          template< typename EntityType >
          __cuda_callable__
          static void processEntity( const MeshType& mesh,
-                                    TraversalUserData& userData,                                    
+                                    TraverserUserData& userData,
                                     const EntityType& entity )
          {
             ( *userData.rowLengths )[ entity.getIndex() ] =
@@ -78,14 +78,14 @@ class MatrixSetter
 
    };
 
-   class TraversalInteriorEntitiesProcessor
+   class TraverserInteriorEntitiesProcessor
    {
       public:
          
          template< typename EntityType >
          __cuda_callable__
          static void processEntity( const MeshType& mesh,
-                                    TraversalUserData& userData,
+                                    TraverserUserData& userData,
                                     const EntityType& entity )
          {
             ( *userData.rowLengths )[ entity.getIndex() ] =
@@ -114,9 +114,9 @@ class MatrixSetter< Meshes::Grid< Dimension, Real, Device, Index >,
    typedef typename MeshType::DeviceType DeviceType;
    typedef typename CompressedRowLengthsVector::RealType IndexType;
    typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef MatrixSetterTraversalUserData< DifferentialOperator,
+   typedef MatrixSetterTraverserUserData< DifferentialOperator,
                                              BoundaryConditions,
-                                             CompressedRowLengthsVector > TraversalUserData;
+                                             CompressedRowLengthsVector > TraverserUserData;
 
    template< typename EntityType >
    void getCompressedRowLengths( const MeshType& mesh,
@@ -124,14 +124,14 @@ class MatrixSetter< Meshes::Grid< Dimension, Real, Device, Index >,
                        const BoundaryConditions& boundaryConditions,
                        CompressedRowLengthsVector& rowLengths ) const;
 
-   class TraversalBoundaryEntitiesProcessor
+   class TraverserBoundaryEntitiesProcessor
    {
       public:
 
          template< typename EntityType >
          __cuda_callable__
          static void processEntity( const MeshType& mesh,
-                                    TraversalUserData& userData,
+                                    TraverserUserData& userData,
                                     const EntityType& entity )
          {
             ( *userData.rowLengths )[ entity.getIndex() ] =
@@ -140,14 +140,14 @@ class MatrixSetter< Meshes::Grid< Dimension, Real, Device, Index >,
 
    };
 
-   class TraversalInteriorEntitiesProcessor
+   class TraverserInteriorEntitiesProcessor
    {
       public:
  
          template< typename EntityType >
          __cuda_callable__
          static void processEntity( const MeshType& mesh,
-                                    TraversalUserData& userData,
+                                    TraverserUserData& userData,
                                     const EntityType& entity )
          {
             ( *userData.rowLengths )[ entity.getIndex() ] =
diff --git a/src/TNL/Matrices/MatrixSetter_impl.h b/src/TNL/Matrices/MatrixSetter_impl.h
index 98f39be7087c890f29edd8ce1e3d4777cb875479..6f8993f772405f8f22c9f614913a0edd1c4ba927 100644
--- a/src/TNL/Matrices/MatrixSetter_impl.h
+++ b/src/TNL/Matrices/MatrixSetter_impl.h
@@ -28,17 +28,17 @@ getCompressedRowLengths( const MeshPointer& meshPointer,
                           CompressedRowLengthsVectorPointer& rowLengthsPointer ) const
 {
    {
-      SharedPointer< TraversalUserData, DeviceType >
+      TraverserUserData
          userData( &differentialOperatorPointer.template getData< DeviceType >(),
                    &boundaryConditionsPointer.template getData< DeviceType >(),
                    &rowLengthsPointer.template modifyData< DeviceType >() );
-      Meshes::Traverser< MeshType, EntityType > meshTraversal;
-      meshTraversal.template processBoundaryEntities< TraversalUserData,
-                                                      TraversalBoundaryEntitiesProcessor >
+      Meshes::Traverser< MeshType, EntityType > meshTraverser;
+      meshTraverser.template processBoundaryEntities< TraverserUserData,
+                                                      TraverserBoundaryEntitiesProcessor >
                                                     ( meshPointer,
                                                       userData );
-      meshTraversal.template processInteriorEntities< TraversalUserData,
-                                                      TraversalInteriorEntitiesProcessor >
+      meshTraverser.template processInteriorEntities< TraverserUserData,
+                                                      TraverserInteriorEntitiesProcessor >
                                                     ( meshPointer,
                                                       userData );
    }
diff --git a/src/TNL/Matrices/Matrix_impl.h b/src/TNL/Matrices/Matrix_impl.h
index 1f8c63705bff5a1fa3f5b92980197ceb4ebbac3d..e9cf162cb52dd9f36c53b2b2367b7a29bfa6bfa8 100644
--- a/src/TNL/Matrices/Matrix_impl.h
+++ b/src/TNL/Matrices/Matrix_impl.h
@@ -40,7 +40,7 @@ void Matrix< Real, Device, Index >::setDimensions( const IndexType rows,
 template< typename Real,
           typename Device,
           typename Index >
-void Matrix< Real, Device, Index >::getCompressedRowLengths( Containers::Vector< IndexType, DeviceType, IndexType >& rowLengths ) const
+void Matrix< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const
 {
    rowLengths.setSize( this->getRows() );
    for( IndexType row = 0; row < this->getRows(); row++ )
@@ -180,6 +180,7 @@ void Matrix< Real, Device, Index >::print( std::ostream& str ) const
 template< typename Real,
           typename Device,
           typename Index >
+__cuda_callable__
 const Index&
 Matrix< Real, Device, Index >::
 getNumberOfColors() const
diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h
index 28de74b8ac64b9fdf002aa71a4ca6a34a6ad4b26..9b8f18779ceb9afa15e3e27f3610a2b0fa23fde6 100644
--- a/src/TNL/Matrices/Multidiagonal.h
+++ b/src/TNL/Matrices/Multidiagonal.h
@@ -177,10 +177,10 @@ public:
    void getTransposition( const Multidiagonal< Real2, Device, Index2 >& matrix,
                           const RealType& matrixMultiplicator = 1.0 );
 
-   template< typename Vector >
-   bool performSORIteration( const Vector& b,
+   template< typename Vector1, typename Vector2 >
+   bool performSORIteration( const Vector1& b,
                              const IndexType row,
-                             Vector& x,
+                             Vector2& x,
                              const RealType& omega = 1.0 ) const;
 
    // copy assignment
diff --git a/src/TNL/Matrices/Multidiagonal_impl.h b/src/TNL/Matrices/Multidiagonal_impl.h
index 47d827b93df0904b568c234cc8bfbea602479e46..5f7228d698db1a47dbc62f2b540c08b1e3f9b86c 100644
--- a/src/TNL/Matrices/Multidiagonal_impl.h
+++ b/src/TNL/Matrices/Multidiagonal_impl.h
@@ -576,11 +576,11 @@ void Multidiagonal< Real, Device, Index >::getTransposition( const Multidiagonal
 template< typename Real,
           typename Device,
           typename Index >
-   template< typename Vector >
-bool Multidiagonal< Real, Device, Index > :: performSORIteration( const Vector& b,
-                                                                           const IndexType row,
-                                                                           Vector& x,
-                                                                           const RealType& omega ) const
+   template< typename Vector1, typename Vector2 >
+bool Multidiagonal< Real, Device, Index > :: performSORIteration( const Vector1& b,
+                                                                  const IndexType row,
+                                                                  Vector2& x,
+                                                                  const RealType& omega ) const
 {
    TNL_ASSERT( row >=0 && row < this->getRows(),
               std::cerr << "row = " << row
diff --git a/src/TNL/Matrices/SlicedEllpack.h b/src/TNL/Matrices/SlicedEllpack.h
index 0557d26ebbd94ee57ab2bcd51bb029f48cbd30c7..815728d7a58d588a1791c1aa80b84bcd81da8f4b 100644
--- a/src/TNL/Matrices/SlicedEllpack.h
+++ b/src/TNL/Matrices/SlicedEllpack.h
@@ -187,10 +187,10 @@ public:
    void getTransposition( const SlicedEllpack< Real2, Device, Index2 >& matrix,
                           const RealType& matrixMultiplicator = 1.0 );
 
-   template< typename Vector >
-   bool performSORIteration( const Vector& b,
+   template< typename Vector1, typename Vector2 >
+   bool performSORIteration( const Vector1& b,
                              const IndexType row,
-                             Vector& x,
+                             Vector2& x,
                              const RealType& omega = 1.0 ) const;
 
    // copy assignment
diff --git a/src/TNL/Matrices/SlicedEllpack_impl.h b/src/TNL/Matrices/SlicedEllpack_impl.h
index 52288c650fbdadb7c06e2aba8933616fcd89786a..2ff01b49c51943c7411626257a57f273c6880b05 100644
--- a/src/TNL/Matrices/SlicedEllpack_impl.h
+++ b/src/TNL/Matrices/SlicedEllpack_impl.h
@@ -562,11 +562,11 @@ template< typename Real,
           typename Device,
           typename Index,
           int SliceSize >
-   template< typename Vector >
-bool SlicedEllpack< Real, Device, Index, SliceSize >::performSORIteration( const Vector& b,
-                                                                                    const IndexType row,
-                                                                                    Vector& x,
-                                                                                    const RealType& omega ) const
+   template< typename Vector1, typename Vector2 >
+bool SlicedEllpack< Real, Device, Index, SliceSize >::performSORIteration( const Vector1& b,
+                                                                           const IndexType row,
+                                                                           Vector2& x,
+                                                                           const RealType& omega ) const
 {
    TNL_ASSERT( row >=0 && row < this->getRows(),
               std::cerr << "row = " << row
diff --git a/src/TNL/Matrices/SparseRow.h b/src/TNL/Matrices/SparseRow.h
index b0429c011132dedaf1324985e839c72f1a94666e..7d39b7b2e9508e5973a4f81457a3b1eb58f42cad 100644
--- a/src/TNL/Matrices/SparseRow.h
+++ b/src/TNL/Matrices/SparseRow.h
@@ -49,6 +49,9 @@ class SparseRow
       __cuda_callable__
       const Real& getElementValue( const Index& elementIndex ) const;
 
+      __cuda_callable__
+      Index getLength() const;
+
       void print( std::ostream& str ) const;
 
    protected:
diff --git a/src/TNL/Matrices/SparseRow_impl.h b/src/TNL/Matrices/SparseRow_impl.h
index c4b69044bcd27268b3e6df8b53ab71ab379c9349..f6921b15bbf8f282f0dee1a2f6c842a230cf0dd3 100644
--- a/src/TNL/Matrices/SparseRow_impl.h
+++ b/src/TNL/Matrices/SparseRow_impl.h
@@ -98,6 +98,15 @@ getElementValue( const Index& elementIndex ) const
    return this->values[ elementIndex * step ];
 }
 
+template< typename Real, typename Index >
+__cuda_callable__
+Index
+SparseRow< Real, Index >::
+getLength() const
+{
+   return length;
+}
+
 template< typename Real, typename Index >
 void
 SparseRow< Real, Index >::
diff --git a/src/TNL/Matrices/Sparse_impl.h b/src/TNL/Matrices/Sparse_impl.h
index 61b3b21d68e4488493431faf890afd5c2537a1fe..ba4558d3f989374f65c12ace41391323e5a6adba 100644
--- a/src/TNL/Matrices/Sparse_impl.h
+++ b/src/TNL/Matrices/Sparse_impl.h
@@ -11,7 +11,7 @@
 #pragma once
 
 #include "Sparse.h"
-#include <TNL/DevicePointer.h>
+#include <TNL/Pointers/DevicePointer.h>
 
 namespace TNL {
 namespace Matrices {
@@ -122,7 +122,8 @@ void Sparse< Real, Device, Index >::allocateMatrixElements( const IndexType& num
     * Setting a column index to this->columns means that the
     * index is undefined.
     */
-   this->columnIndexes.setValue( this->columns );
+   if( numberOfMatrixElements > 0 )
+      this->columnIndexes.setValue( this->columns );
 }
 
 template< typename Real,
@@ -246,8 +247,8 @@ copySparseMatrix( Matrix1& A, const Matrix2& B )
       typename Matrix1::CompressedRowLengthsVector rowLengths;
       rowLengths.setSize( rows );
 
-      DevicePointer< Matrix1 > Apointer( A );
-      const DevicePointer< const Matrix2 > Bpointer( B );
+      Pointers::DevicePointer< Matrix1 > Apointer( A );
+      const Pointers::DevicePointer< const Matrix2 > Bpointer( B );
 
       // set row lengths
       Devices::Cuda::synchronizeDevice();
diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h
index 0769ca83f2f0355ee3ad4c726c601bd16bae8c3f..472cadffcd4194270d5218e3c0ea1415b2c7ae5c 100644
--- a/src/TNL/Matrices/Tridiagonal.h
+++ b/src/TNL/Matrices/Tridiagonal.h
@@ -167,11 +167,11 @@ public:
    void getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix,
                           const RealType& matrixMultiplicator = 1.0 );
 
-   template< typename Vector >
+   template< typename Vector1, typename Vector2 >
    __cuda_callable__
-   void performSORIteration( const Vector& b,
+   void performSORIteration( const Vector1& b,
                              const IndexType row,
-                             Vector& x,
+                             Vector2& x,
                              const RealType& omega = 1.0 ) const;
 
    // copy assignment
diff --git a/src/TNL/Matrices/Tridiagonal_impl.h b/src/TNL/Matrices/Tridiagonal_impl.h
index f3c073cd0d65285643df34d8fc2885e802c77e36..66fe9d7e80a80f93a1d16f1e741008f4de6a0787 100644
--- a/src/TNL/Matrices/Tridiagonal_impl.h
+++ b/src/TNL/Matrices/Tridiagonal_impl.h
@@ -535,12 +535,12 @@ void Tridiagonal< Real, Device, Index >::getTransposition( const Tridiagonal< Re
 template< typename Real,
           typename Device,
           typename Index >
-   template< typename Vector >
+   template< typename Vector1, typename Vector2 >
 __cuda_callable__
-void Tridiagonal< Real, Device, Index >::performSORIteration( const Vector& b,
-                                                                       const IndexType row,
-                                                                       Vector& x,
-                                                                       const RealType& omega ) const
+void Tridiagonal< Real, Device, Index >::performSORIteration( const Vector1& b,
+                                                              const IndexType row,
+                                                              Vector2& x,
+                                                              const RealType& omega ) const
 {
    RealType sum( 0.0 );
    if( row > 0 )
diff --git a/src/TNL/Meshes/DistributedMeshes/BufferEntitiesHelper.h b/src/TNL/Meshes/DistributedMeshes/BufferEntitiesHelper.h
index 658009c5632bc5fda9370c3d989e991991e745fa..0b3c7b3638266702374b669459498d2bbb98a540 100644
--- a/src/TNL/Meshes/DistributedMeshes/BufferEntitiesHelper.h
+++ b/src/TNL/Meshes/DistributedMeshes/BufferEntitiesHelper.h
@@ -20,6 +20,7 @@ namespace DistributedMeshes {
 
 
 template < typename MeshFunctionType,
+           typename PeriodicBoundariesMaskPointer,
            int dim,
            typename RealType=typename MeshFunctionType::MeshType::RealType,
            typename Device=typename MeshFunctionType::MeshType::DeviceType,
@@ -28,94 +29,136 @@ class BufferEntitiesHelper
 {
 };
 
-//======================================== 1D ====================================================
 
-template < typename MeshFunctionType, typename RealType, typename Device, typename Index >
-class BufferEntitiesHelper<MeshFunctionType,1,RealType,Device,Index>
+template < typename MeshFunctionType,
+           typename MaskPointer,
+           typename RealType,
+           typename Device,
+           typename Index >
+class BufferEntitiesHelper< MeshFunctionType, MaskPointer, 1, RealType, Device, Index >
 {
-    public:
-    static void BufferEntities(MeshFunctionType meshFunction, RealType * buffer, Index beginx, Index sizex, bool tobuffer)
-    {
-        auto mesh = meshFunction.getMesh();
-        RealType* meshFunctionData = meshFunction.getData().getData();
-        auto kernel = [tobuffer, mesh, buffer, meshFunctionData, beginx] __cuda_callable__ ( Index j )
-        {
+   public:
+      static void BufferEntities( 
+         MeshFunctionType& meshFunction,
+         const MaskPointer& maskPointer,
+         RealType* buffer,
+         bool isBoundary,
+         const Index& beginx,
+         const Index& sizex,
+         bool tobuffer )
+      {
+         auto mesh = meshFunction.getMesh();
+         RealType* meshFunctionData = meshFunction.getData().getData();
+         const typename MaskPointer::ObjectType* mask( nullptr );
+         if( maskPointer )
+            mask = &maskPointer.template getData< Device >();
+         auto kernel = [tobuffer, mesh, buffer, isBoundary, meshFunctionData, mask, beginx ] __cuda_callable__ ( Index j )
+         {
             typename MeshFunctionType::MeshType::Cell entity(mesh);
             entity.getCoordinates().x()=beginx+j;
             entity.refresh();
-            if(tobuffer)
-                buffer[j]=meshFunctionData[entity.getIndex()];
-            else
-                meshFunctionData[entity.getIndex()]=buffer[j];
-        };
-        ParallelFor< Device >::exec( 0, sizex, kernel );
-    };  
+            if( ! isBoundary || ! mask || ( *mask )[ entity.getIndex() ] )
+            {
+               if( tobuffer )
+                  buffer[ j ] = meshFunctionData[ entity.getIndex() ];
+               else
+                  meshFunctionData[ entity.getIndex() ] = buffer[ j ];
+            }
+         };
+         ParallelFor< Device >::exec( 0, sizex, kernel );
+      };  
 };
 
 
-//======================================== 2D ====================================================
-template <typename MeshFunctionType, typename RealType, typename Device, typename Index  > 
-class BufferEntitiesHelper<MeshFunctionType,2,RealType,Device,Index>
+template< typename MeshFunctionType,
+          typename MaskPointer, 
+          typename RealType,
+          typename Device,
+          typename Index  > 
+class BufferEntitiesHelper< MeshFunctionType, MaskPointer, 2, RealType, Device, Index >
 {
-    public:
-    static void BufferEntities(MeshFunctionType meshFunction, RealType * buffer, Index beginx, Index beginy, Index sizex, Index sizey,bool tobuffer)
-    {
-        auto mesh=meshFunction.getMesh();
-        RealType *meshFunctionData=meshFunction.getData().getData();
-        auto kernel = [tobuffer, mesh, buffer, meshFunctionData, beginx, sizex, beginy] __cuda_callable__ ( Index i, Index j )
-        {
+   public:
+      static void BufferEntities(
+         MeshFunctionType& meshFunction,
+         const MaskPointer& maskPointer,
+         RealType* buffer,
+         bool isBoundary,
+         const Index& beginx,
+         const Index& beginy,
+         const Index& sizex,
+         const Index& sizey,
+         bool tobuffer)
+      {
+         auto mesh=meshFunction.getMesh();
+         RealType* meshFunctionData = meshFunction.getData().getData();      
+         const typename MaskPointer::ObjectType* mask( nullptr );
+         if( maskPointer )
+            mask = &maskPointer.template getData< Device >();
+
+         auto kernel = [ tobuffer, mask, mesh, buffer, isBoundary, meshFunctionData, beginx, sizex, beginy] __cuda_callable__ ( Index i, Index j )
+         {
             typename MeshFunctionType::MeshType::Cell entity(mesh);
-            entity.getCoordinates().x()=beginx+j;
-            entity.getCoordinates().y()=beginy+i;				
+            entity.getCoordinates().x() = beginx + j;
+            entity.getCoordinates().y() = beginy + i;				
             entity.refresh();
-            if(tobuffer)
-                    buffer[i*sizex+j]=meshFunctionData[entity.getIndex()];
-            else
-                    meshFunctionData[entity.getIndex()]=buffer[i*sizex+j];
-        };
-        
-        ParallelFor2D< Device >::exec( 0, 0, sizey, sizex, kernel );       
-        
-    };
+            if( ! isBoundary || ! mask || ( *mask )[ entity.getIndex() ] )
+            {
+               if( tobuffer )
+                  buffer[ i * sizex + j ] = meshFunctionData[ entity.getIndex() ];
+               else
+                  meshFunctionData[ entity.getIndex() ] = buffer[ i * sizex + j ];
+            }
+         };
+         ParallelFor2D< Device >::exec( 0, 0, sizey, sizex, kernel );     
+      };
 };
 
 
-//======================================== 3D ====================================================
-template <typename MeshFunctionType, typename RealType, typename Device, typename Index >
-class BufferEntitiesHelper<MeshFunctionType,3,RealType,Device,Index>
+template< typename MeshFunctionType,
+          typename MaskPointer,
+          typename RealType,
+          typename Device,
+          typename Index >
+class BufferEntitiesHelper< MeshFunctionType, MaskPointer, 3, RealType, Device, Index >
 {
-    public:
-    static void BufferEntities(MeshFunctionType meshFunction, RealType * buffer, Index beginx, Index beginy, Index beginz, Index sizex, Index sizey, Index sizez, bool tobuffer)
-    {
-
-        auto mesh=meshFunction.getMesh();
-        RealType * meshFunctionData=meshFunction.getData().getData();
-        auto kernel = [tobuffer, mesh, buffer, meshFunctionData, beginx, sizex, beginy, sizey, beginz] __cuda_callable__ ( Index k, Index i, Index j )
-        {
+   public:
+      static void BufferEntities(
+         MeshFunctionType& meshFunction,
+         const MaskPointer& maskPointer,
+         RealType* buffer,
+         bool isBoundary,
+         const Index& beginx,
+         const Index& beginy,
+         const Index& beginz,
+         const Index& sizex,
+         const Index& sizey,
+         const Index& sizez,
+         bool tobuffer)
+      {
+
+         auto mesh=meshFunction.getMesh();
+         RealType * meshFunctionData=meshFunction.getData().getData();
+         const typename MaskPointer::ObjectType* mask( nullptr );
+         if( maskPointer )
+            mask = &maskPointer.template getData< Device >();         
+         auto kernel = [ tobuffer, mesh, mask, buffer, isBoundary, meshFunctionData, beginx, sizex, beginy, sizey, beginz] __cuda_callable__ ( Index k, Index i, Index j )
+         {
             typename MeshFunctionType::MeshType::Cell entity(mesh);
-            entity.getCoordinates().x()=beginx+j;
-            entity.getCoordinates().z()=beginz+k;
-            entity.getCoordinates().y()=beginy+i;
+            entity.getCoordinates().x() = beginx + j;
+            entity.getCoordinates().z() = beginz + k;
+            entity.getCoordinates().y() = beginy + i;
             entity.refresh();
-            if(tobuffer)
-                    buffer[k*sizex*sizey+i*sizex+j]=meshFunctionData[entity.getIndex()];
-            else
-                    meshFunctionData[entity.getIndex()]=buffer[k*sizex*sizey+i*sizex+j];
-        };
-
-        ParallelFor3D< Device >::exec( 0, 0, 0, sizez, sizey, sizex, kernel ); 
-
-        /*for(int k=0;k<sizez;k++)
-        {
-            for(int i=0;i<sizey;i++)
+            if( ! isBoundary || ! mask || ( *mask )[ entity.getIndex() ] )
             {
-                for(int j=0;j<sizex;j++)
-                {
-                        kernel(k,i,j);
-                }
+               if( tobuffer )
+                  buffer[ k * sizex * sizey + i * sizex + j ] = 
+                     meshFunctionData[ entity.getIndex() ];
+               else
+                  meshFunctionData[ entity.getIndex() ] = buffer[ k * sizex * sizey + i * sizex + j ];
             }
-        }*/
-    };
+         };
+         ParallelFor3D< Device >::exec( 0, 0, 0, sizez, sizey, sizex, kernel ); 
+      };
 };
 
 
diff --git a/src/TNL/Meshes/DistributedMeshes/DistributedGrid.hpp b/src/TNL/Meshes/DistributedMeshes/DistributedGrid.hpp
index 01cf6e58e76dcdc3ffbe286dd39bf1b3f27523c6..90e877415477d7f51503dac6b01a1ed87fb305b6 100644
--- a/src/TNL/Meshes/DistributedMeshes/DistributedGrid.hpp
+++ b/src/TNL/Meshes/DistributedMeshes/DistributedGrid.hpp
@@ -380,8 +380,6 @@ void
 DistributedMesh< Grid< Dimension, Real, Device, Index > >::
 setupNeighbors()
 {
-   int *neighbors = this->neighbors;
-
    for( int i = 0; i < getNeighborsCount(); i++ )
    {
       auto direction = Directions::template getXYZ< Dimension >( i );
diff --git a/src/TNL/Meshes/DistributedMeshes/DistributedGridIO_MeshFunction.h b/src/TNL/Meshes/DistributedMeshes/DistributedGridIO_MeshFunction.h
index fa3aee29597fd9a0f11ee8a694f5c19aeeeb002a..ed43e38b265d353936bfb09f907096695cea5af4 100644
--- a/src/TNL/Meshes/DistributedMeshes/DistributedGridIO_MeshFunction.h
+++ b/src/TNL/Meshes/DistributedMeshes/DistributedGridIO_MeshFunction.h
@@ -50,7 +50,7 @@ class DistributedGridIO<Functions::MeshFunction<MeshType>,LocalCopy,Device>
         CoordinatesType localSize=distrGrid->getLocalSize();
         CoordinatesType localBegin=distrGrid->getLocalBegin();
  
-        SharedPointer<MeshType> newMesh;
+        Pointers::SharedPointer<MeshType> newMesh;
         newMesh->setDimensions(localSize);
         newMesh->setSpaceSteps(spaceSteps);
         CoordinatesType newOrigin;
@@ -96,7 +96,7 @@ class DistributedGridIO<Functions::MeshFunction<MeshType>,LocalCopy,Device>
         CoordinatesType localSize=distrGrid->getLocalSize();
         CoordinatesType localBegin=distrGrid->getLocalBegin();
 
-        SharedPointer<MeshType> newMesh;
+        Pointers::SharedPointer<MeshType> newMesh;
         newMesh->setDimensions(localSize);
         newMesh->setSpaceSteps(spaceSteps);
         CoordinatesType newOrigin;
diff --git a/src/TNL/Meshes/DistributedMeshes/DistributedGridSynchronizer_1D.h b/src/TNL/Meshes/DistributedMeshes/DistributedGridSynchronizer_1D.h
index 56bc36b37a0dde70b7b57df42c76bbd62bf4ab23..e7968ba243c74845158605218acf669d501f3bfc 100644
--- a/src/TNL/Meshes/DistributedMeshes/DistributedGridSynchronizer_1D.h
+++ b/src/TNL/Meshes/DistributedMeshes/DistributedGridSynchronizer_1D.h
@@ -39,11 +39,10 @@ class DistributedMeshSynchronizer< Functions::MeshFunction< Grid< 1, GridReal, D
    public:
       using RealType = Real;
       typedef typename Grid< 1, GridReal, Device, Index >::Cell Cell;
-      typedef typename Functions::MeshFunction< Grid< 1, GridReal, Device, Index >,EntityDimension, RealType> MeshFunctionType;
+      // FIXME: clang does not like this (incomplete type error)
+//      typedef typename Functions::MeshFunction< Grid< 1, GridReal, Device, Index >,EntityDimension, RealType> MeshFunctionType;
       typedef typename Grid< 1, GridReal, Device, Index >::DistributedMeshType DistributedGridType;
       typedef typename DistributedGridType::CoordinatesType CoordinatesType;
-      //template< typename Real_ >
-      //using BufferEntitiesHelperType = BufferEntitiesHelper< MeshFunctionType, 1, Real_, Device >;
       using SubdomainOverlapsType = typename DistributedGridType::SubdomainOverlapsType;
 
       DistributedMeshSynchronizer()
@@ -76,12 +75,15 @@ class DistributedMeshSynchronizer< Functions::MeshFunction< Grid< 1, GridReal, D
 
       };
 
-      template<typename CommunicatorType>
+      template< typename CommunicatorType,
+                typename MeshFunctionType,
+                typename PeriodicBoundariesMaskPointer = Pointers::SharedPointer< MeshFunctionType > >
       void synchronize( MeshFunctionType &meshFunction,
-                        bool periodicBoundaries = false )
+                        bool periodicBoundaries = false,
+                        const PeriodicBoundariesMaskPointer& mask = PeriodicBoundariesMaskPointer( nullptr ) )
       {
          TNL_ASSERT_TRUE( isSet, "Synchronizer is not set, but used to synchronize" );
-
+         
          if( !distributedGrid->isDistributed() )
             return;
 
@@ -110,7 +112,8 @@ class DistributedMeshSynchronizer< Functions::MeshFunction< Grid< 1, GridReal, D
                       leftSource, rightSource,
                       lowerOverlap, upperOverlap,
                       neighbors,
-                      periodicBoundaries );
+                      periodicBoundaries,
+                      PeriodicBoundariesMaskPointer( nullptr ) ); // the mask is used only when receiving data 
 
          //async send
          typename CommunicatorType::Request requests[ 4 ];
@@ -157,24 +160,33 @@ class DistributedMeshSynchronizer< Functions::MeshFunction< Grid< 1, GridReal, D
             lowerOverlap,
             upperOverlap,
             neighbors,
-            periodicBoundaries );
+            periodicBoundaries,
+            mask );
       }
       
    private:
-      template <typename Real_ >
-      void copyBuffers( MeshFunctionType meshFunction, TNL::Containers::Array<Real_,Device>* buffers, bool toBuffer,
+      template< typename Real_,
+                typename MeshFunctionType,
+                typename PeriodicBoundariesMaskPointer >
+      void copyBuffers( 
+         MeshFunctionType& meshFunction,
+         TNL::Containers::Array<Real_,Device>* buffers,
+         bool toBuffer,
          int left, int right,
          const SubdomainOverlapsType& lowerOverlap,
          const SubdomainOverlapsType& upperOverlap,
          const int* neighbors,
-         bool periodicBoundaries )
+         bool periodicBoundaries,
+         const PeriodicBoundariesMaskPointer& mask )
       
       {
-         typedef BufferEntitiesHelper< MeshFunctionType, 1, Real_, Device > Helper;
-         if( neighbors[ Left ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ Left ].getData(), left, lowerOverlap.x(), toBuffer );
-         if( neighbors[ Right ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ Right ].getData(), right, upperOverlap.x(), toBuffer );
+         typedef BufferEntitiesHelper< MeshFunctionType, PeriodicBoundariesMaskPointer, 1, Real_, Device > Helper;
+         bool leftIsBoundary = ( neighbors[ Left ] == -1 );
+         bool rightIsBoundary = ( neighbors[ Right ] == -1 );
+         if( ! leftIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ Left ].getData(), leftIsBoundary, left, lowerOverlap.x(), toBuffer );
+         if( ! rightIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ Right ].getData(), rightIsBoundary, right, upperOverlap.x(), toBuffer );
       }
 
       Containers::Array<RealType, Device> sendBuffers[ 2 ], receiveBuffers[ 2 ];
diff --git a/src/TNL/Meshes/DistributedMeshes/DistributedGridSynchronizer_2D.h b/src/TNL/Meshes/DistributedMeshes/DistributedGridSynchronizer_2D.h
index fa4c5f6ac125dac58deba55c9abca8f950f563bc..ba61f2b3d0190a3b7baabea1e094f7b475713d55 100644
--- a/src/TNL/Meshes/DistributedMeshes/DistributedGridSynchronizer_2D.h
+++ b/src/TNL/Meshes/DistributedMeshes/DistributedGridSynchronizer_2D.h
@@ -39,12 +39,10 @@ class DistributedMeshSynchronizer< Functions::MeshFunction< Grid< 2, GridReal, D
     public:
       
       typedef typename Grid< 2, GridReal, Device, Index >::Cell Cell;
-      typedef typename Functions::MeshFunction< Grid< 2, GridReal, Device, Index >,EntityDimension, RealType> MeshFunctionType;
+      // FIXME: clang does not like this (incomplete type error)
+//      typedef typename Functions::MeshFunction< Grid< 2, GridReal, Device, Index >,EntityDimension, RealType> MeshFunctionType;
       typedef typename Grid< 2, GridReal, Device, Index >::DistributedMeshType DistributedGridType; 
-      typedef typename MeshFunctionType::RealType Real;
       typedef typename DistributedGridType::CoordinatesType CoordinatesType;
-      template< typename Real_ >
-      using BufferEntitiesHelperType = BufferEntitiesHelper< MeshFunctionType, 2, Real_, Device >;
       using SubdomainOverlapsType = typename DistributedGridType::SubdomainOverlapsType;
 
 
@@ -88,9 +86,12 @@ class DistributedMeshSynchronizer< Functions::MeshFunction< Grid< 2, GridReal, D
 
       }
 
-      template<typename CommunicatorType>
+      template< typename CommunicatorType,
+                typename MeshFunctionType,
+                typename PeriodicBoundariesMaskPointer = Pointers::SharedPointer< MeshFunctionType > >
       void synchronize( MeshFunctionType &meshFunction,
-                        bool periodicBoundaries = false )
+                        bool periodicBoundaries = false,
+                        const PeriodicBoundariesMaskPointer& mask = PeriodicBoundariesMaskPointer( nullptr ) )
       {
 
          TNL_ASSERT_TRUE( isSet, "Synchronizer is not set, but used to synchronize" );
@@ -136,7 +137,8 @@ class DistributedMeshSynchronizer< Functions::MeshFunction< Grid< 2, GridReal, D
             xCenter, yCenter,
             lowerOverlap, upperOverlap, localSize,
             neighbors,
-            periodicBoundaries );
+            periodicBoundaries,
+            PeriodicBoundariesMaskPointer( nullptr ) ); // the mask is used only when receiving data
 
          //async send and receive
          typename CommunicatorType::Request requests[ 16 ];
@@ -166,39 +168,55 @@ class DistributedMeshSynchronizer< Functions::MeshFunction< Grid< 2, GridReal, D
               xCenter, yCenter,
               lowerOverlap, upperOverlap, localSize,
               neighbors,
-              periodicBoundaries );
+              periodicBoundaries,
+              mask );
       }
     
    private:
       
-      template< typename Real_ >
-      void copyBuffers(MeshFunctionType meshFunction, Containers::Array<Real_, Device, Index> * buffers, bool toBuffer,
-                       int left, int right, int up, int down,
-                       int xcenter, int ycenter,
-                       const CoordinatesType& lowerOverlap,
-                       const CoordinatesType& upperOverlap,
-                       const CoordinatesType& localSize,
-                       const int *neighbors,
-                       bool periodicBoundaries )
+      template< typename Real_,
+                typename MeshFunctionType,
+                typename PeriodicBoundariesMaskPointer >
+      void copyBuffers( 
+         MeshFunctionType& meshFunction,
+         Containers::Array<Real_, Device, Index>* buffers,
+         bool toBuffer,
+         int left, int right, int up, int down,
+         int xcenter, int ycenter,
+         const CoordinatesType& lowerOverlap,
+         const CoordinatesType& upperOverlap,
+         const CoordinatesType& localSize,
+         const int *neighbors,
+         bool periodicBoundaries,
+         const PeriodicBoundariesMaskPointer& mask )
       {
          // TODO: SWAP up and down
-         using Helper = BufferEntitiesHelper< MeshFunctionType, 2, Real_, Device >;
-         if( neighbors[ Left ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ Left ].getData(), left, ycenter, lowerOverlap.x(), localSize.y(), toBuffer );
-         if( neighbors[ Right ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ Right ].getData(), right, ycenter, upperOverlap.x(), localSize.y(), toBuffer );
-         if( neighbors[ Up ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ Up ].getData(), xcenter, up, localSize.x(), lowerOverlap.y(), toBuffer );
-         if( neighbors[ Down ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ Down ].getData(), xcenter, down, localSize.x(), upperOverlap.y(), toBuffer );
-         if( neighbors[ UpLeft ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ UpLeft ].getData(), left, up, lowerOverlap.x(), lowerOverlap.y(), toBuffer );
-         if( neighbors[ UpRight ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ UpRight ].getData(), right, up, upperOverlap.x(), lowerOverlap.y(), toBuffer );
-         if( neighbors[ DownLeft ] != -1 || periodicBoundaries )        
-            Helper::BufferEntities( meshFunction, buffers[ DownLeft ].getData(), left, down, lowerOverlap.x(), upperOverlap.y(), toBuffer );
-         if( neighbors[ DownRight ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ DownRight ].getData(), right, down, upperOverlap.x(), upperOverlap.y(), toBuffer );
+         bool leftIsBoundary = ( neighbors[ Left ] == -1 );
+         bool rightIsBoundary = ( neighbors[ Right ] == -1 );
+         bool upIsBoundary = ( neighbors[ Up ] == -1 );
+         bool downIsBoundary = ( neighbors[ Down ] == -1 );
+         bool upLeftIsBoundary = ( neighbors[ UpLeft ] == -1 );
+         bool upRightIsBoundary = ( neighbors[ UpRight ] == -1 );
+         bool downLeftIsBoundary = ( neighbors[ DownLeft ] == -1 );
+         bool downRightIsBoundary = ( neighbors[ DownRight ] == -1 );
+         
+         using Helper = BufferEntitiesHelper< MeshFunctionType, PeriodicBoundariesMaskPointer, 2, Real_, Device >;
+         if( ! leftIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ Left      ].getData(), leftIsBoundary,      left,    ycenter, lowerOverlap.x(), localSize.y(),    toBuffer );
+         if( ! rightIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ Right     ].getData(), rightIsBoundary,     right,   ycenter, upperOverlap.x(), localSize.y(),    toBuffer );
+         if( ! upIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ Up        ].getData(), upIsBoundary,        xcenter, up,      localSize.x(),    lowerOverlap.y(), toBuffer );
+         if( ! downIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ Down      ].getData(), downIsBoundary,      xcenter, down,    localSize.x(),    upperOverlap.y(), toBuffer );
+         if( ! upLeftIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ UpLeft    ].getData(), upLeftIsBoundary,    left,    up,      lowerOverlap.x(), lowerOverlap.y(), toBuffer );
+         if( ! upRightIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ UpRight   ].getData(), upRightIsBoundary,   right,   up,      upperOverlap.x(), lowerOverlap.y(), toBuffer );
+         if( ! downLeftIsBoundary || periodicBoundaries )        
+            Helper::BufferEntities( meshFunction, mask, buffers[ DownLeft  ].getData(), downLeftIsBoundary,  left,    down,    lowerOverlap.x(), upperOverlap.y(), toBuffer );
+         if( ! downRightIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ DownRight ].getData(), downRightIsBoundary, right,   down,    upperOverlap.x(), upperOverlap.y(), toBuffer );
       }
       
       DistributedGridType *distributedGrid;
diff --git a/src/TNL/Meshes/DistributedMeshes/DistributedGridSynchronizer_3D.h b/src/TNL/Meshes/DistributedMeshes/DistributedGridSynchronizer_3D.h
index c6970961edfed3646e8a795fe80296b9a6fa4c94..8715ef359d42a857c18143ddb4cb4d10aba3105e 100644
--- a/src/TNL/Meshes/DistributedMeshes/DistributedGridSynchronizer_3D.h
+++ b/src/TNL/Meshes/DistributedMeshes/DistributedGridSynchronizer_3D.h
@@ -38,12 +38,10 @@ class DistributedMeshSynchronizer< Functions::MeshFunction< Grid< 3, GridReal, D
 
    public:
       typedef typename Grid< 3, GridReal, Device, Index >::Cell Cell;
-      typedef typename Functions::MeshFunction< Grid< 3, GridReal, Device, Index >,EntityDimension, RealType> MeshFunctionType;
+      // FIXME: clang does not like this (incomplete type error)
+//      typedef typename Functions::MeshFunction< Grid< 3, GridReal, Device, Index >,EntityDimension, RealType> MeshFunctionType;
       typedef typename Grid< 3, GridReal, Device, Index >::DistributedMeshType DistributedGridType; 
-      typedef typename MeshFunctionType::RealType Real;
       typedef typename DistributedGridType::CoordinatesType CoordinatesType;
-      template< typename Real_ >
-      using BufferEntitiesHelperType = BufferEntitiesHelper< MeshFunctionType, 3, Real_, Device >;
       using SubdomainOverlapsType = typename DistributedGridType::SubdomainOverlapsType;
           
       DistributedMeshSynchronizer()
@@ -105,9 +103,12 @@ class DistributedMeshSynchronizer< Functions::MeshFunction< Grid< 3, GridReal, D
         
      }
         
-      template<typename CommunicatorType>
+      template< typename CommunicatorType,
+                typename MeshFunctionType,
+                typename PeriodicBoundariesMaskPointer = Pointers::SharedPointer< MeshFunctionType > >
       void synchronize( MeshFunctionType &meshFunction,
-                        bool periodicBoundaries = false )
+                        bool periodicBoundaries = false,
+                        const PeriodicBoundariesMaskPointer& mask = PeriodicBoundariesMaskPointer( nullptr ) )
       {
 
          TNL_ASSERT_TRUE( isSet, "Synchronizer is not set, but used to synchronize" );
@@ -121,7 +122,7 @@ class DistributedMeshSynchronizer< Functions::MeshFunction< Grid< 3, GridReal, D
         
          westSource   = lowerOverlap.x();
          eastSource   = localGridSize.x() - 2 * upperOverlap.x();
-         northSource   = lowerOverlap.y();
+         northSource  = lowerOverlap.y();
          southSource  = localGridSize.y() - 2 * upperOverlap.y();
          bottomSource = lowerOverlap.z();
          topSource    = localGridSize.z() - 2 * upperOverlap.z();
@@ -162,7 +163,8 @@ class DistributedMeshSynchronizer< Functions::MeshFunction< Grid< 3, GridReal, D
             xCenter, yCenter, zCenter,
             lowerOverlap, upperOverlap, localSize,
             neighbors,
-            periodicBoundaries );
+            periodicBoundaries,
+            PeriodicBoundariesMaskPointer( nullptr ) ); // the mask is used only when receiving data );
         
          //async send and receive
          typename CommunicatorType::Request requests[52];
@@ -192,83 +194,122 @@ class DistributedMeshSynchronizer< Functions::MeshFunction< Grid< 3, GridReal, D
             xCenter, yCenter, zCenter,
             lowerOverlap, upperOverlap, localSize,
             neighbors,
-            periodicBoundaries );
+            periodicBoundaries,
+            mask );
     }
     
    private:
       
-      template< typename Real_ >
-      void copyBuffers( MeshFunctionType meshFunction, Containers::Array<Real_, Device, Index>* buffers, bool toBuffer,
-              int west, int east, int north, int south, int bottom, int top,
-              int xcenter, int ycenter, int zcenter,
-              const CoordinatesType& lowerOverlap,
-              const CoordinatesType& upperOverlap,
-              const CoordinatesType& localSize,
-              const int *neighbor,
-              bool periodicBoundaries )
+      template< typename Real_, 
+                typename MeshFunctionType,
+                typename PeriodicBoundariesMaskPointer >
+      void copyBuffers( 
+         MeshFunctionType& meshFunction,
+         Containers::Array<Real_, Device, Index>* buffers,
+         bool toBuffer,
+         int west, int east, int north, int south, int bottom, int top,
+         int xcenter, int ycenter, int zcenter,
+         const CoordinatesType& lowerOverlap,
+         const CoordinatesType& upperOverlap,
+         const CoordinatesType& localSize,
+         const int* neighbor,
+         bool periodicBoundaries,
+         const PeriodicBoundariesMaskPointer& mask )
       {
-         using Helper = BufferEntitiesHelper< MeshFunctionType, 3, Real_, Device >;
+         bool westIsBoundary = ( neighbor[ West ] == -1 );
+         bool eastIsBoundary = ( neighbor[ East ] == -1 );
+         bool northIsBoundary = ( neighbor[ North ] == -1 );
+         bool southIsBoundary = ( neighbor[ South ] == -1 );
+         bool bottomIsBoundary = ( neighbor[ Bottom ] == -1 );
+         bool topIsBoundary = ( neighbor[ Top ] == -1 );
+
+         bool northWestIsBoundary = ( neighbor[ NorthWest ] == -1 );
+         bool northEastIsBoundary = ( neighbor[ NorthEast ] == -1 );
+         bool southWestIsBoundary = ( neighbor[ SouthWest ] == -1 );
+         bool southEastIsBoundary = ( neighbor[ SouthEast ] == -1 );
+         
+         bool bottomWestIsBoundary = ( neighbor[ BottomWest ] == -1 );
+         bool bottomEastIsBoundary = ( neighbor[ BottomEast ] == -1 );
+         bool bottomNorthIsBoundary = ( neighbor[ BottomNorth ] == -1 );
+         bool bottomSouthIsBoundary = ( neighbor[ BottomSouth ] == -1 );
+
+         bool topWestIsBoundary = ( neighbor[ TopWest ] == -1 );
+         bool topEastIsBoundary = ( neighbor[ TopEast ] == -1 );
+         bool topNorthIsBoundary = ( neighbor[ TopNorth ] == -1 );
+         bool topSouthIsBoundary = ( neighbor[ TopSouth ] == -1 );
+
+         bool bottomNorthWestIsBoundary = ( neighbor[ BottomNorthWest ] == -1 );
+         bool bottomNorthEastIsBoundary = ( neighbor[ BottomNorthEast ] == -1 );
+         bool bottomSouthWestIsBoundary = ( neighbor[ BottomSouthWest ] == -1 );
+         bool bottomSouthEastIsBoundary = ( neighbor[ BottomSouthEast ] == -1 );
+
+         bool topNorthWestIsBoundary = ( neighbor[ TopNorthWest ] == -1 );
+         bool topNorthEastIsBoundary = ( neighbor[ TopNorthEast ] == -1 );
+         bool topSouthWestIsBoundary = ( neighbor[ TopSouthWest ] == -1 );
+         bool topSouthEastIsBoundary = ( neighbor[ TopSouthEast ] == -1 );
+         
+         using Helper = BufferEntitiesHelper< MeshFunctionType, PeriodicBoundariesMaskPointer, 3, Real_, Device >;
          //X-Y-Z
-         if( neighbor[ West ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ West ].getData(),   west,    ycenter, zcenter, lowerOverlap.x(), localSize.y(),     localSize.z(),    toBuffer );
-         if( neighbor[ East ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ East ].getData(),   east,    ycenter, zcenter, upperOverlap.x(), localSize.y(),     localSize.z(),    toBuffer );
-         if( neighbor[ North ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ North ].getData(),  xcenter, north,   zcenter, localSize.x(),    lowerOverlap.y(),  localSize.z(),    toBuffer );
-         if( neighbor[ South ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ South ].getData(),  xcenter, south,   zcenter, localSize.x(),     upperOverlap.y(), localSize.z(),    toBuffer );
-         if( neighbor[ Bottom ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ Bottom ].getData(), xcenter, ycenter, bottom,  localSize.x(),     localSize.y(),    lowerOverlap.z(), toBuffer );
-         if( neighbor[ Top ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ Top ].getData(),    xcenter, ycenter, top,     localSize.x(),     localSize.y(),    upperOverlap.z(), toBuffer );	
+         if( ! westIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ West ].getData(),   westIsBoundary,   west,    ycenter, zcenter, lowerOverlap.x(), localSize.y(),     localSize.z(),    toBuffer );
+         if( ! eastIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ East ].getData(),   eastIsBoundary,   east,    ycenter, zcenter, upperOverlap.x(), localSize.y(),     localSize.z(),    toBuffer );
+         if( ! northIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ North ].getData(),  northIsBoundary,  xcenter, north,   zcenter, localSize.x(),    lowerOverlap.y(),  localSize.z(),    toBuffer );
+         if( ! southIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ South ].getData(),  southIsBoundary,  xcenter, south,   zcenter, localSize.x(),     upperOverlap.y(), localSize.z(),    toBuffer );
+         if( ! bottomIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ Bottom ].getData(), bottomIsBoundary, xcenter, ycenter, bottom,  localSize.x(),     localSize.y(),    lowerOverlap.z(), toBuffer );
+         if( ! topIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ Top ].getData(),    topIsBoundary,    xcenter, ycenter, top,     localSize.x(),     localSize.y(),    upperOverlap.z(), toBuffer );	
          
          //XY
-         if( neighbor[ NorthWest ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ NorthWest ].getData(), west, north, zcenter, lowerOverlap.x(), lowerOverlap.y(), localSize.z(), toBuffer );
-         if( neighbor[ NorthEast ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ NorthEast ].getData(), east, north, zcenter, upperOverlap.x(), lowerOverlap.y(), localSize.z(), toBuffer );
-         if( neighbor[ SouthWest ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ SouthWest ].getData(), west, south, zcenter, lowerOverlap.x(), upperOverlap.y(), localSize.z(), toBuffer );
-         if( neighbor[ SouthEast ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ SouthEast ].getData(), east, south, zcenter, upperOverlap.x(), upperOverlap.y(), localSize.z(), toBuffer );
+         if( ! northWestIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ NorthWest ].getData(), northWestIsBoundary, west, north, zcenter, lowerOverlap.x(), lowerOverlap.y(), localSize.z(), toBuffer );
+         if( ! northEastIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ NorthEast ].getData(), northEastIsBoundary, east, north, zcenter, upperOverlap.x(), lowerOverlap.y(), localSize.z(), toBuffer );
+         if( ! southWestIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ SouthWest ].getData(), southWestIsBoundary, west, south, zcenter, lowerOverlap.x(), upperOverlap.y(), localSize.z(), toBuffer );
+         if( ! southEastIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ SouthEast ].getData(), southEastIsBoundary, east, south, zcenter, upperOverlap.x(), upperOverlap.y(), localSize.z(), toBuffer );
          
          //XZ
-         if( neighbor[ BottomWest ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ BottomWest ].getData(), west, ycenter, bottom, lowerOverlap.x(), localSize.y(), lowerOverlap.z(), toBuffer );
-         if( neighbor[ BottomEast ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ BottomEast ].getData(), east, ycenter, bottom, upperOverlap.x(), localSize.y(), lowerOverlap.z(), toBuffer );
-         if( neighbor[ TopWest ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ TopWest ].getData(),    west, ycenter, top,    lowerOverlap.x(), localSize.y(), upperOverlap.z(), toBuffer );
-         if( neighbor[ TopEast ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ TopEast ].getData(),    east, ycenter, top,    upperOverlap.x(), localSize.y(), upperOverlap.z(), toBuffer );   
+         if( ! bottomWestIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ BottomWest ].getData(), bottomWestIsBoundary, west, ycenter, bottom, lowerOverlap.x(), localSize.y(), lowerOverlap.z(), toBuffer );
+         if( ! bottomEastIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ BottomEast ].getData(), bottomEastIsBoundary, east, ycenter, bottom, upperOverlap.x(), localSize.y(), lowerOverlap.z(), toBuffer );
+         if( ! topWestIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ TopWest ].getData(),    topWestIsBoundary,    west, ycenter, top,    lowerOverlap.x(), localSize.y(), upperOverlap.z(), toBuffer );
+         if( ! topEastIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ TopEast ].getData(),    topEastIsBoundary,    east, ycenter, top,    upperOverlap.x(), localSize.y(), upperOverlap.z(), toBuffer );   
          
          //YZ
-         if( neighbor[ BottomNorth ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ BottomNorth ].getData(), xcenter, north, bottom, localSize.x(), lowerOverlap.y(), lowerOverlap.z(), toBuffer );
-         if( neighbor[ BottomSouth ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ BottomSouth ].getData(), xcenter, south, bottom, localSize.x(), upperOverlap.y(), lowerOverlap.z(), toBuffer );
-         if( neighbor[ TopNorth ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ TopNorth ].getData(),    xcenter, north, top,    localSize.x(), lowerOverlap.y(), upperOverlap.z(), toBuffer );
-         if( neighbor[ TopSouth ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ TopSouth ].getData(),    xcenter, south, top,    localSize.x(), upperOverlap.y(), upperOverlap.z(), toBuffer );
+         if( ! bottomNorthIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ BottomNorth ].getData(), bottomNorthIsBoundary, xcenter, north, bottom, localSize.x(), lowerOverlap.y(), lowerOverlap.z(), toBuffer );
+         if( ! bottomSouthIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ BottomSouth ].getData(), bottomSouthIsBoundary, xcenter, south, bottom, localSize.x(), upperOverlap.y(), lowerOverlap.z(), toBuffer );
+         if( ! topNorthIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ TopNorth ].getData(),    topNorthIsBoundary,    xcenter, north, top,    localSize.x(), lowerOverlap.y(), upperOverlap.z(), toBuffer );
+         if( ! topSouthIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ TopSouth ].getData(),    topSouthIsBoundary,    xcenter, south, top,    localSize.x(), upperOverlap.y(), upperOverlap.z(), toBuffer );
          
          //XYZ
-         if( neighbor[ BottomNorthWest ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ BottomNorthWest ].getData(), west, north, bottom, lowerOverlap.x(), lowerOverlap.y(), lowerOverlap.z(), toBuffer );
-         if( neighbor[ BottomNorthEast ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ BottomNorthEast ].getData(), east, north, bottom, upperOverlap.x(), lowerOverlap.y(), lowerOverlap.z(), toBuffer );
-         if( neighbor[ BottomSouthWest ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ BottomSouthWest ].getData(), west, south, bottom, lowerOverlap.x(), upperOverlap.y(), lowerOverlap.z(), toBuffer );
-         if( neighbor[ BottomSouthEast ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ BottomSouthEast ].getData(), east, south, bottom, upperOverlap.x(), upperOverlap.y(), lowerOverlap.z(), toBuffer );
-         if( neighbor[ TopNorthWest ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ TopNorthWest ].getData(),    west, north, top,    lowerOverlap.x(), lowerOverlap.y(), upperOverlap.z(), toBuffer );
-         if( neighbor[ TopNorthEast ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ TopNorthEast ].getData(),    east, north, top,    upperOverlap.x(), lowerOverlap.y(), upperOverlap.z(), toBuffer );
-         if( neighbor[ TopSouthWest ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ TopSouthWest ].getData(),    west, south, top,    lowerOverlap.x(), upperOverlap.y(), upperOverlap.z(), toBuffer );
-         if( neighbor[ TopSouthEast ] != -1 || periodicBoundaries )
-            Helper::BufferEntities( meshFunction, buffers[ TopSouthEast ].getData(),    east, south, top,    upperOverlap.x(), upperOverlap.y(), upperOverlap.z(), toBuffer );   
+         if( ! bottomNorthWestIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ BottomNorthWest ].getData(), bottomNorthWestIsBoundary, west, north, bottom, lowerOverlap.x(), lowerOverlap.y(), lowerOverlap.z(), toBuffer );
+         if( ! bottomNorthEastIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ BottomNorthEast ].getData(), bottomNorthEastIsBoundary, east, north, bottom, upperOverlap.x(), lowerOverlap.y(), lowerOverlap.z(), toBuffer );
+         if( ! bottomSouthWestIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ BottomSouthWest ].getData(), bottomSouthWestIsBoundary, west, south, bottom, lowerOverlap.x(), upperOverlap.y(), lowerOverlap.z(), toBuffer );
+         if( ! bottomSouthEastIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ BottomSouthEast ].getData(), bottomSouthEastIsBoundary, east, south, bottom, upperOverlap.x(), upperOverlap.y(), lowerOverlap.z(), toBuffer );
+         if( ! topNorthWestIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ TopNorthWest ].getData(),    topNorthWestIsBoundary,    west, north, top,    lowerOverlap.x(), lowerOverlap.y(), upperOverlap.z(), toBuffer );
+         if( ! topNorthEastIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ TopNorthEast ].getData(),    topNorthEastIsBoundary,    east, north, top,    upperOverlap.x(), lowerOverlap.y(), upperOverlap.z(), toBuffer );
+         if( ! topSouthWestIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ TopSouthWest ].getData(),    topSouthEastIsBoundary,    west, south, top,    lowerOverlap.x(), upperOverlap.y(), upperOverlap.z(), toBuffer );
+         if( ! topSouthEastIsBoundary || periodicBoundaries )
+            Helper::BufferEntities( meshFunction, mask, buffers[ TopSouthEast ].getData(),    topSouthEastIsBoundary,    east, south, top,    upperOverlap.x(), upperOverlap.y(), upperOverlap.z(), toBuffer );   
       }
     
    private:
diff --git a/src/TNL/Meshes/DistributedMeshes/DistributedMesh.h b/src/TNL/Meshes/DistributedMeshes/DistributedMesh.h
index 3043c859863ff2b917677b3cb51929414fcab7e1..c029cdc50fef8ac03b55ddf559be547421d3fb53 100644
--- a/src/TNL/Meshes/DistributedMeshes/DistributedMesh.h
+++ b/src/TNL/Meshes/DistributedMeshes/DistributedMesh.h
@@ -10,6 +10,8 @@
 
 #pragma once
 
+#include <TNL/Containers/StaticVector.h>
+
 namespace TNL {
 namespace Meshes { 
 namespace DistributedMeshes {
@@ -18,10 +20,13 @@ template< typename MeshType >
 class DistributedMesh
 {
 public:
-    bool IsDistributed(void)
-    {
-        return false;
-    };
+   // FIXME: this is not going to work
+   using SubdomainOverlapsType = Containers::StaticVector< MeshType::getMeshDimension(), typename MeshType::GlobalIndexType >;
+
+   bool IsDistributed(void)
+   {
+      return false;
+   };
 };
 
 } // namespace DistributedMeshes
diff --git a/src/TNL/Meshes/DistributedMeshes/DistributedMeshSynchronizer.h b/src/TNL/Meshes/DistributedMeshes/DistributedMeshSynchronizer.h
index 1cfad10f6051a506985d2d419f6f4245321e88cd..03fff7fa190238a8167ebc169f089271d998888b 100644
--- a/src/TNL/Meshes/DistributedMeshes/DistributedMeshSynchronizer.h
+++ b/src/TNL/Meshes/DistributedMeshes/DistributedMeshSynchronizer.h
@@ -19,12 +19,14 @@ class DistributedMeshSynchronizer
 {
    public:
    
-      typedef typename MeshFunctionType::DistributedMeshType DistributedMeshType;
+      // FIXME: clang does not like this (incomplete type error)
+//      typedef typename MeshFunctionType::DistributedMeshType DistributedMeshType;
       
+      template< typename DistributedMeshType >
       void setDistributedGrid( DistributedMeshType *distributedGrid )
       {
          TNL_ASSERT_TRUE( false, "Distribution of this type of mesh is NOT implemented" );
-      }; 
+      } 
 
 };
 
diff --git a/src/TNL/Meshes/GridDetails/CMakeLists.txt b/src/TNL/Meshes/GridDetails/CMakeLists.txt
index 4c3b51e9930d815147508d1618de6a9d6388eeb4..0da067f142c99ed0db8435a2c3818133d007026f 100644
--- a/src/TNL/Meshes/GridDetails/CMakeLists.txt
+++ b/src/TNL/Meshes/GridDetails/CMakeLists.txt
@@ -27,9 +27,4 @@ SET( headers BoundaryGridEntityChecker.h
              Traverser_Grid3D.h
              Traverser_Grid3D_impl.h )
 
-SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/TNL/Meshes/GridDetails )    
-SET( tnl_mesh_griddetails_SOURCES
-     ${CURRENT_DIR}/Grid_impl.cpp
-     PARENT_SCOPE )
-
 INSTALL( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Meshes/GridDetails )
diff --git a/src/TNL/Meshes/GridDetails/Grid1D.h b/src/TNL/Meshes/GridDetails/Grid1D.h
index 73a2ebfb9a7fd3be809e6ec53b2170471226f469..bf7e4fa29b2e5e3b139867619cf7322c5b10e79d 100644
--- a/src/TNL/Meshes/GridDetails/Grid1D.h
+++ b/src/TNL/Meshes/GridDetails/Grid1D.h
@@ -99,11 +99,9 @@ class Grid< 1, Real, Device, Index > : public Object
    inline Index getEntityIndex( const Entity& entity ) const;
 
    __cuda_callable__
-   
-   inline void setSpaceSteps(const PointType& steps);
-
    inline const PointType& getSpaceSteps() const;
 
+   inline void setSpaceSteps(const PointType& steps);
 
    template< int xPow >
    __cuda_callable__
diff --git a/src/TNL/Meshes/GridDetails/Grid2D.h b/src/TNL/Meshes/GridDetails/Grid2D.h
index c6181a63bfc82c1fdcb43d944572bff815dbe9c5..b6d4015ac4c7217f983a58a1c647aecfc28c14ec 100644
--- a/src/TNL/Meshes/GridDetails/Grid2D.h
+++ b/src/TNL/Meshes/GridDetails/Grid2D.h
@@ -99,7 +99,6 @@ class Grid< 2, Real, Device, Index > : public Object
    inline Index getEntityIndex( const Entity& entity ) const;
 
    __cuda_callable__
-
    inline const PointType& getSpaceSteps() const;
 
    inline void setSpaceSteps(const PointType& steps);
diff --git a/src/TNL/Meshes/GridDetails/GridTraverser.h b/src/TNL/Meshes/GridDetails/GridTraverser.h
index 7e821d714bcdb3b6081ac59ae160ab898434979b..3a74c085bcd24b9935cc6750cc2da3587a795a15 100644
--- a/src/TNL/Meshes/GridDetails/GridTraverser.h
+++ b/src/TNL/Meshes/GridDetails/GridTraverser.h
@@ -11,7 +11,7 @@
 #pragma once
 
 #include <TNL/Meshes/Grid.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 #include <TNL/CudaStreamPool.h>
 
 namespace TNL {
@@ -35,7 +35,7 @@ class GridTraverser< Meshes::Grid< 1, Real, Devices::Host, Index > >
    public:
       
       typedef Meshes::Grid< 1, Real, Devices::Host, Index > GridType;
-      typedef SharedPointer< GridType > GridPointer;
+      typedef Pointers::SharedPointer<  GridType > GridPointer;
       typedef Real RealType;
       typedef Devices::Host DeviceType;
       typedef Index IndexType;
@@ -51,7 +51,7 @@ class GridTraverser< Meshes::Grid< 1, Real, Devices::Host, Index > >
          const GridPointer& gridPointer,
          const CoordinatesType begin,
          const CoordinatesType end,
-         SharedPointer< UserData, DeviceType >& userData,
+         UserData& userData,
          const int& stream = 0 );
 };
 
@@ -65,7 +65,7 @@ class GridTraverser< Meshes::Grid< 1, Real, Devices::Cuda, Index > >
    public:
       
       typedef Meshes::Grid< 1, Real, Devices::Cuda, Index > GridType;
-      typedef SharedPointer< GridType > GridPointer;
+      typedef Pointers::SharedPointer<  GridType > GridPointer;
       typedef Real RealType;
       typedef Devices::Cuda DeviceType;
       typedef Index IndexType;
@@ -81,7 +81,7 @@ class GridTraverser< Meshes::Grid< 1, Real, Devices::Cuda, Index > >
          const GridPointer& gridPointer,
          const CoordinatesType& begin,
          const CoordinatesType& end,
-         SharedPointer< UserData, DeviceType >& userData,
+         UserData& userData,
          const int& stream = 0 );
 };
 
@@ -95,7 +95,7 @@ class GridTraverser< Meshes::Grid< 1, Real, Devices::MIC, Index > >
    public:
       
       typedef Meshes::Grid< 1, Real, Devices::MIC, Index > GridType;
-      typedef SharedPointer< GridType > GridPointer;
+      typedef Pointers::SharedPointer<  GridType > GridPointer;
       typedef Real RealType;
       typedef Devices::MIC DeviceType;
       typedef Index IndexType;
@@ -111,7 +111,7 @@ class GridTraverser< Meshes::Grid< 1, Real, Devices::MIC, Index > >
          const GridPointer& gridPointer,
          const CoordinatesType& begin,
          const CoordinatesType& end,
-         SharedPointer< UserData, DeviceType >& userData,
+         UserData& userData,
          const int& stream = 0 );
 };
 
@@ -127,7 +127,7 @@ class GridTraverser< Meshes::Grid< 2, Real, Devices::Host, Index > >
    public:
       
       typedef Meshes::Grid< 2, Real, Devices::Host, Index > GridType;
-      typedef SharedPointer< GridType > GridPointer;
+      typedef Pointers::SharedPointer<  GridType > GridPointer;
       typedef Real RealType;
       typedef Devices::Host DeviceType;
       typedef Index IndexType;
@@ -146,7 +146,7 @@ class GridTraverser< Meshes::Grid< 2, Real, Devices::Host, Index > >
          const GridPointer& gridPointer,
          const CoordinatesType begin,
          const CoordinatesType end,
-         SharedPointer< UserData, DeviceType >& userData,
+         UserData& userData,
          // FIXME: hack around nvcc bug (error: default argument not at end of parameter list)
 //         const int& stream = 0,
          const int& stream,
@@ -165,7 +165,7 @@ class GridTraverser< Meshes::Grid< 2, Real, Devices::Cuda, Index > >
    public:
       
       typedef Meshes::Grid< 2, Real, Devices::Cuda, Index > GridType;
-      typedef SharedPointer< GridType > GridPointer;
+      typedef Pointers::SharedPointer<  GridType > GridPointer;
       typedef Real RealType;
       typedef Devices::Cuda DeviceType;
       typedef Index IndexType;
@@ -184,7 +184,7 @@ class GridTraverser< Meshes::Grid< 2, Real, Devices::Cuda, Index > >
          const GridPointer& gridPointer,
          const CoordinatesType& begin,
          const CoordinatesType& end,
-         SharedPointer< UserData, DeviceType >& userData,
+         UserData& userData,
          // FIXME: hack around nvcc bug (error: default argument not at end of parameter list)
 //         const int& stream = 0,
          const int& stream,
@@ -203,7 +203,7 @@ class GridTraverser< Meshes::Grid< 2, Real, Devices::MIC, Index > >
    public:
       
       typedef Meshes::Grid< 2, Real, Devices::MIC, Index > GridType;
-      typedef SharedPointer< GridType > GridPointer;
+      typedef Pointers::SharedPointer<  GridType > GridPointer;
       typedef Real RealType;
       typedef Devices::MIC DeviceType;
       typedef Index IndexType;
@@ -222,7 +222,7 @@ class GridTraverser< Meshes::Grid< 2, Real, Devices::MIC, Index > >
          const GridPointer& gridPointer,
          const CoordinatesType& begin,
          const CoordinatesType& end,
-         SharedPointer< UserData, DeviceType >& userData,
+         UserData& userData,
          // FIXME: hack around nvcc bug (error: default argument not at end of parameter list)
 //         const int& stream = 0,
          const int& stream,
@@ -241,7 +241,7 @@ class GridTraverser< Meshes::Grid< 3, Real, Devices::Host, Index > >
    public:
       
       typedef Meshes::Grid< 3, Real, Devices::Host, Index > GridType;
-      typedef SharedPointer< GridType > GridPointer;
+      typedef Pointers::SharedPointer<  GridType > GridPointer;
       typedef Real RealType;
       typedef Devices::Host DeviceType;
       typedef Index IndexType;
@@ -261,7 +261,7 @@ class GridTraverser< Meshes::Grid< 3, Real, Devices::Host, Index > >
          const GridPointer& gridPointer,
          const CoordinatesType begin,
          const CoordinatesType end,
-         SharedPointer< UserData, DeviceType >& userData,
+         UserData& userData,
          // FIXME: hack around nvcc bug (error: default argument not at end of parameter list)
 //         const int& stream = 0,
          const int& stream,
@@ -280,7 +280,7 @@ class GridTraverser< Meshes::Grid< 3, Real, Devices::Cuda, Index > >
    public:
       
       typedef Meshes::Grid< 3, Real, Devices::Cuda, Index > GridType;
-      typedef SharedPointer< GridType > GridPointer;
+      typedef Pointers::SharedPointer<  GridType > GridPointer;
       typedef Real RealType;
       typedef Devices::Cuda DeviceType;
       typedef Index IndexType;
@@ -300,7 +300,7 @@ class GridTraverser< Meshes::Grid< 3, Real, Devices::Cuda, Index > >
          const GridPointer& gridPointer,
          const CoordinatesType& begin,
          const CoordinatesType& end,
-         SharedPointer< UserData, DeviceType >& userData,
+         UserData& userData,
          // FIXME: hack around nvcc bug (error: default argument not at end of parameter list)
 //         const int& stream = 0,
          const int& stream,
@@ -319,7 +319,7 @@ class GridTraverser< Meshes::Grid< 3, Real, Devices::MIC, Index > >
    public:
       
       typedef Meshes::Grid< 3, Real, Devices::MIC, Index > GridType;
-      typedef SharedPointer< GridType > GridPointer;
+      typedef Pointers::SharedPointer<  GridType > GridPointer;
       typedef Real RealType;
       typedef Devices::MIC DeviceType;
       typedef Index IndexType;
@@ -339,7 +339,7 @@ class GridTraverser< Meshes::Grid< 3, Real, Devices::MIC, Index > >
          const GridPointer& gridPointer,
          const CoordinatesType& begin,
          const CoordinatesType& end,
-         SharedPointer< UserData, DeviceType >& userData,
+         UserData& userData,
          // FIXME: hack around nvcc bug (error: default argument not at end of parameter list)
 //         const int& stream = 0,
          const int& stream,
diff --git a/src/TNL/Meshes/GridDetails/GridTraverser_impl.h b/src/TNL/Meshes/GridDetails/GridTraverser_impl.h
index 3b232f6930bf54ea980dfc0c95b38382a6433019..258325a768cde7c37fdecedd34829c07a1374bc8 100644
--- a/src/TNL/Meshes/GridDetails/GridTraverser_impl.h
+++ b/src/TNL/Meshes/GridDetails/GridTraverser_impl.h
@@ -12,6 +12,8 @@
 
 #pragma once
 
+//#define GRID_TRAVERSER_USE_STREAMS
+
 #include "GridTraverser.h"
 
 #include <TNL/Exceptions/CudaSupportMissing.h>
@@ -35,7 +37,7 @@ processEntities(
    const GridPointer& gridPointer,
    const CoordinatesType begin,
    const CoordinatesType end,
-   SharedPointer< UserData, DeviceType >& userDataPointer,
+   UserData& userData,
    const int& stream )
 {
    GridEntity entity( *gridPointer );
@@ -45,10 +47,10 @@ processEntities(
 
       entity.getCoordinates() = begin;
       entity.refresh();
-      EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity );
+      EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
       entity.getCoordinates() = end;
       entity.refresh();
-      EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity );
+      EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
    }
    else
    {
@@ -59,7 +61,7 @@ processEntities(
            entity.getCoordinates().x() ++ )
       {
          entity.refresh();
-         EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity );
+         EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
       }*/ 
 #ifdef HAVE_OPENMP
 #pragma omp parallel firstprivate( begin, end ) if( Devices::Host::isOMPEnabled() )
@@ -73,7 +75,7 @@ processEntities(
          {
             entity.getCoordinates().x() = x;
             entity.refresh();
-            EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity );
+            EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
          }      
       }
       
@@ -92,7 +94,7 @@ template< typename Real,
 __global__ void
 GridTraverser1D(
    const Meshes::Grid< 1, Real, Devices::Cuda, Index >* grid,
-   UserData* userData,
+   UserData userData,
    const typename GridEntity::CoordinatesType begin,
    const typename GridEntity::CoordinatesType end,
    const Index gridIdx )
@@ -107,7 +109,7 @@ GridTraverser1D(
    {   
       GridEntity entity( *grid, coordinates );
       entity.refresh();
-      EntitiesProcessor::processEntity( entity.getMesh(), *userData, entity );
+      EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
    }
 }
 
@@ -119,7 +121,7 @@ template< typename Real,
 __global__ void
 GridBoundaryTraverser1D(
    const Meshes::Grid< 1, Real, Devices::Cuda, Index >* grid,
-   UserData* userData,
+   UserData userData,
    const typename GridEntity::CoordinatesType begin,
    const typename GridEntity::CoordinatesType end )
 {
@@ -133,14 +135,14 @@ GridBoundaryTraverser1D(
       coordinates.x() = begin.x();
       GridEntity entity( *grid, coordinates );
       entity.refresh();
-      EntitiesProcessor::processEntity( entity.getMesh(), *userData, entity );
+      EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
    }
    if( threadIdx.x == 1 )
    {
       coordinates.x() = end.x();
       GridEntity entity( *grid, coordinates );
       entity.refresh();
-      EntitiesProcessor::processEntity( entity.getMesh(), *userData, entity );
+      EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
    }
 }
 
@@ -159,7 +161,7 @@ processEntities(
    const GridPointer& gridPointer,
    const CoordinatesType& begin,
    const CoordinatesType& end,
-   SharedPointer< UserData, DeviceType >& userDataPointer,
+   UserData& userData,
    const int& stream )
 {
 #ifdef HAVE_CUDA
@@ -174,7 +176,7 @@ processEntities(
       GridBoundaryTraverser1D< Real, Index, GridEntity, UserData, EntitiesProcessor >
             <<< cudaBlocks, cudaBlockSize, 0, s >>>
             ( &gridPointer.template getData< Devices::Cuda >(),
-              &userDataPointer.template modifyData< Devices::Cuda >(),
+              userData,
               begin,
               end );
    }
@@ -189,7 +191,7 @@ processEntities(
          GridTraverser1D< Real, Index, GridEntity, UserData, EntitiesProcessor >
             <<< cudaBlocks, cudaBlockSize, 0, s >>>
             ( &gridPointer.template getData< Devices::Cuda >(),
-              &userDataPointer.template modifyData< Devices::Cuda >(),
+              userData,
               begin,
               end,
               gridXIdx );
@@ -223,7 +225,7 @@ processEntities(
    const GridPointer& gridPointer,
    const CoordinatesType& begin,
    const CoordinatesType& end,
-   SharedPointer< UserData, DeviceType >& userDataPointer,
+   UserData& userData,
    const int& stream )
 {
     std::cout << "Not Implemented yet Grid Traverser <1, Real, Device::MIC>" << std::endl;
@@ -239,7 +241,7 @@ processEntities(
       GridBoundaryTraverser1D< Real, Index, GridEntity, UserData, EntitiesProcessor >
             <<< cudaBlocks, cudaBlockSize, 0, s >>>
             ( &gridPointer.template getData< Devices::Cuda >(),
-              &userDataPointer.template modifyData< Devices::Cuda >(),
+              userData,
               begin,
               end );
    }
@@ -254,7 +256,7 @@ processEntities(
          GridTraverser1D< Real, Index, GridEntity, UserData, EntitiesProcessor >
             <<< cudaBlocks, cudaBlockSize, 0, s >>>
             ( &gridPointer.template getData< Devices::Cuda >(),
-              &userDataPointer.template modifyData< Devices::Cuda >(),
+              userData,
               begin,
               end,
               gridXIdx );
@@ -288,7 +290,7 @@ processEntities(
    const GridPointer& gridPointer,
    const CoordinatesType begin,
    const CoordinatesType end,
-   SharedPointer< UserData, DeviceType >& userDataPointer,
+   UserData& userData,
    const int& stream,
    const GridEntityParameters&... gridEntityParameters )
 {
@@ -303,10 +305,10 @@ processEntities(
          {
             entity.getCoordinates().y() = begin.y();
             entity.refresh();
-            EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity );
+            EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
             entity.getCoordinates().y() = end.y();
             entity.refresh();
-            EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity );
+            EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
          }
       if( XOrthogonalBoundary )
          for( entity.getCoordinates().y() = begin.y();
@@ -315,10 +317,10 @@ processEntities(
          {
             entity.getCoordinates().x() = begin.x();
             entity.refresh();
-            EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity );
+            EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
             entity.getCoordinates().x() = end.x();
             entity.refresh();
-            EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity );
+            EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
          }
    }
    else
@@ -333,7 +335,7 @@ processEntities(
               entity.getCoordinates().x() ++ )
          {
             entity.refresh();
-            EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity );
+            EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
          }*/
 #ifdef HAVE_OPENMP
 #pragma omp parallel firstprivate( begin, end ) if( Devices::Host::isOMPEnabled() )
@@ -349,7 +351,7 @@ processEntities(
                entity.getCoordinates().x() = x;
                entity.getCoordinates().y() = y;
                entity.refresh();
-               EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity );
+               EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
             }      
       }
    }
@@ -358,7 +360,7 @@ processEntities(
 /****
  * 2D traverser, CUDA
  */
-#ifdef HAVE_CUDA
+#ifdef HAVE_CUDA 
 template< typename Real,
           typename Index,
           typename GridEntity,
@@ -369,7 +371,7 @@ template< typename Real,
 __global__ void 
 GridTraverser2D(
    const Meshes::Grid< 2, Real, Devices::Cuda, Index >* grid,
-   UserData* userData,
+   UserData userData,
    const typename GridEntity::CoordinatesType begin,
    const typename GridEntity::CoordinatesType end,
    const dim3 gridIdx,
@@ -389,12 +391,13 @@ GridTraverser2D(
       {
          EntitiesProcessor::processEntity
          ( *grid,
-           *userData,
+           userData,
            entity );
       }
    }
 }
 
+// Boundary traverser using streams
 template< typename Real,
           typename Index,
           typename GridEntity,
@@ -405,7 +408,7 @@ template< typename Real,
 __global__ void 
 GridTraverser2DBoundaryAlongX(
    const Meshes::Grid< 2, Real, Devices::Cuda, Index >* grid,
-   UserData* userData,
+   UserData userData,
    const Index beginX,
    const Index endX,
    const Index fixedY,
@@ -424,11 +427,12 @@ GridTraverser2DBoundaryAlongX(
       entity.refresh();
       EntitiesProcessor::processEntity
       ( *grid,
-        *userData,
+        userData,
         entity );
    }   
 }
 
+// Boundary traverser using streams
 template< typename Real,
           typename Index,
           typename GridEntity,
@@ -439,7 +443,7 @@ template< typename Real,
 __global__ void 
 GridTraverser2DBoundaryAlongY(
    const Meshes::Grid< 2, Real, Devices::Cuda, Index >* grid,
-   UserData* userData,
+   UserData userData,
    const Index beginY,
    const Index endY,
    const Index fixedX,
@@ -458,12 +462,160 @@ GridTraverser2DBoundaryAlongY(
       entity.refresh();
       EntitiesProcessor::processEntity
       ( *grid,
-        *userData,
+        userData,
         entity );
    }   
 }
 
-#endif
+
+template< typename Real,
+          typename Index,
+          typename GridEntity,
+          typename UserData,
+          typename EntitiesProcessor,
+          bool processOnlyBoundaryEntities,
+          typename... GridEntityParameters >
+__global__ void 
+GridTraverser2DBoundary(
+   const Meshes::Grid< 2, Real, Devices::Cuda, Index >* grid,
+   UserData userData,
+   const Index beginX,
+   const Index endX,
+   const Index beginY,
+   const Index endY,
+   const Index blocksPerFace,
+   const dim3 gridIdx,
+   const GridEntityParameters... gridEntityParameters )
+{
+   using GridType = Meshes::Grid< 2, Real, Devices::Cuda, Index >;
+   using CoordinatesType = typename GridType::CoordinatesType;
+   
+   const Index faceIdx = blockIdx.x / blocksPerFace;
+   const Index faceBlockIdx = blockIdx.x % blocksPerFace;
+   const Index threadId = faceBlockIdx * blockDim. x + threadIdx.x;
+   if( faceIdx < 2 )
+   {
+      const Index entitiesAlongX = endX - beginX + 1;
+      if( threadId < entitiesAlongX )
+      {
+         GridEntity entity( *grid, 
+            CoordinatesType(  beginX + threadId, faceIdx == 0 ? beginY : endY ),
+            gridEntityParameters... );
+         //printf( "faceIdx %d Thread %d -> %d %d \n ", faceIdx, threadId, entity.getCoordinates().x(), entity.getCoordinates().y() );
+         entity.refresh();
+         EntitiesProcessor::processEntity( *grid, userData, entity );
+      }
+   }
+   else
+   {
+      const Index entitiesAlongY = endY - beginY - 1;   
+      if( threadId < entitiesAlongY )
+      {
+         GridEntity entity( *grid, 
+            CoordinatesType(  faceIdx == 2 ? beginX : endX, beginY + threadId + 1  ),
+            gridEntityParameters... );
+         //printf( "faceIdx %d Thread %d -> %d %d \n ", faceIdx, threadId, entity.getCoordinates().x(), entity.getCoordinates().y() );
+         entity.refresh();
+         EntitiesProcessor::processEntity( *grid, userData, entity );
+      }
+   }
+   
+   
+   
+   /*const Index aux = max( entitiesAlongX, entitiesAlongY );
+   const Index& warpSize = Devices::Cuda::getWarpSize();
+   const Index threadsPerAxis = warpSize * ( aux / warpSize + ( aux % warpSize != 0 ) );
+   
+   Index threadId = Devices::Cuda::getGlobalThreadIdx_x( gridIdx );
+   GridEntity entity( *grid, 
+         CoordinatesType( 0, 0 ),
+         gridEntityParameters... );
+   CoordinatesType& coordinates = entity.getCoordinates();
+   const Index axisIndex = threadId / threadsPerAxis;
+   //printf( "axisIndex %d, threadId %d thradsPerAxis %d \n", axisIndex, threadId, threadsPerAxis );   
+   threadId -= axisIndex * threadsPerAxis;
+   switch( axisIndex )
+   {
+      case 1:
+         coordinates = CoordinatesType( beginX + threadId, beginY );
+         if( threadId < entitiesAlongX )
+         {
+            //printf( "X1: Thread %d -> %d %d \n ", threadId, coordinates.x(), coordinates.y() );
+            entity.refresh();
+            EntitiesProcessor::processEntity( *grid, userData, entity );
+         }
+         break;
+      case 2:
+         coordinates = CoordinatesType( beginX + threadId, endY );
+         if( threadId < entitiesAlongX )
+         {
+            //printf( "X2: Thread %d -> %d %d \n ", threadId, coordinates.x(), coordinates.y() );
+            entity.refresh();
+            EntitiesProcessor::processEntity( *grid, userData, entity );
+         }
+         break;
+      case 3:
+         coordinates = CoordinatesType( beginX, beginY + threadId + 1 );
+         if( threadId < entitiesAlongY )
+         {
+            //printf( "Y1: Thread %d -> %d %d \n ", threadId, coordinates.x(), coordinates.y() );
+            entity.refresh();
+            EntitiesProcessor::processEntity( *grid, userData, entity );
+         }
+         break;
+      case 4:
+         coordinates = CoordinatesType( endX, beginY + threadId + 1 );
+         if( threadId < entitiesAlongY )
+         {
+            //printf( "Y2: Thread %d -> %d %d \n ", threadId, coordinates.x(), coordinates.y() );
+            entity.refresh();
+            EntitiesProcessor::processEntity( *grid, userData, entity );
+         }
+         break;
+   }*/
+   
+   /*if( threadId < entitiesAlongX )
+   {
+      GridEntity entity( *grid, 
+         CoordinatesType( beginX + threadId, beginY ),
+         gridEntityParameters... );
+      //printf( "X1: Thread %d -> %d %d x %d %d \n ", threadId, 
+      //   entity.getCoordinates().x(), entity.getCoordinates().y(),
+      //   grid->getDimensions().x(), grid->getDimensions().y() );
+      entity.refresh();
+      EntitiesProcessor::processEntity( *grid, userData, entity );
+   }
+   else if( ( threadId -= entitiesAlongX ) < entitiesAlongX && threadId >= 0 )
+   {
+      GridEntity entity( *grid, 
+         CoordinatesType( beginX + threadId, endY ),
+         gridEntityParameters... );
+      entity.refresh();
+      //printf( "X2: Thread %d -> %d %d \n ", threadId, entity.getCoordinates().x(), entity.getCoordinates().y() );
+      EntitiesProcessor::processEntity( *grid, userData, entity );
+   }
+   else if( ( ( threadId -= entitiesAlongX ) < entitiesAlongY - 1 ) && threadId >= 0 )
+   {
+      GridEntity entity( *grid,
+         CoordinatesType( beginX, beginY + threadId + 1 ),
+      gridEntityParameters... );
+      entity.refresh();
+      //printf( "Y1: Thread %d -> %d %d \n ", threadId, entity.getCoordinates().x(), entity.getCoordinates().y() );
+      EntitiesProcessor::processEntity( *grid, userData, entity );      
+   }
+   else if( ( ( threadId -= entitiesAlongY - 1 ) < entitiesAlongY - 1  ) && threadId >= 0 )
+   {
+      GridEntity entity( *grid,
+         CoordinatesType( endX, beginY + threadId + 1 ),
+      gridEntityParameters... );
+      entity.refresh();
+      //printf( "Y2: Thread %d -> %d %d \n ", threadId, entity.getCoordinates().x(), entity.getCoordinates().y() );
+      EntitiesProcessor::processEntity( *grid, userData, entity );
+   }*/
+}
+
+
+#endif // HAVE_CUDA
 
 template< typename Real,
           typename Index >
@@ -481,7 +633,7 @@ processEntities(
    const GridPointer& gridPointer,
    const CoordinatesType& begin,
    const CoordinatesType& end,
-   SharedPointer< UserData, DeviceType >& userDataPointer,
+   UserData& userData,
    const int& stream,
    const GridEntityParameters&... gridEntityParameters )
 {
@@ -489,6 +641,7 @@ processEntities(
    if( processOnlyBoundaryEntities && 
        ( GridEntity::getEntityDimension() == 2 || GridEntity::getEntityDimension() == 0 ) )
    {
+#ifdef GRID_TRAVERSER_USE_STREAMS            
       dim3 cudaBlockSize( 256 );
       dim3 cudaBlocksCountAlongX, cudaGridsCountAlongX,
            cudaBlocksCountAlongY, cudaGridsCountAlongY;
@@ -508,7 +661,7 @@ processEntities(
          GridTraverser2DBoundaryAlongX< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                <<< cudaGridSize, cudaBlockSize, 0, s1 >>>
                ( &gridPointer.template getData< Devices::Cuda >(),
-                 &userDataPointer.template modifyData< Devices::Cuda >(),
+                 userData,
                  begin.x(),
                  end.x(),
                  begin.y(),
@@ -517,7 +670,7 @@ processEntities(
          GridTraverser2DBoundaryAlongX< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                <<< cudaGridSize, cudaBlockSize, 0, s2 >>>
                ( &gridPointer.template getData< Devices::Cuda >(),
-                 &userDataPointer.template modifyData< Devices::Cuda >(),
+                 userData,
                  begin.x(),
                  end.x(),
                  end.y(),
@@ -532,7 +685,7 @@ processEntities(
          GridTraverser2DBoundaryAlongY< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                <<< cudaGridSize, cudaBlockSize, 0, s3 >>>
                ( &gridPointer.template getData< Devices::Cuda >(),
-                 &userDataPointer.template modifyData< Devices::Cuda >(),
+                 userData,
                  begin.y() + 1,
                  end.y() - 1,
                  begin.x(),
@@ -541,7 +694,7 @@ processEntities(
          GridTraverser2DBoundaryAlongY< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                <<< cudaGridSize, cudaBlockSize, 0, s4 >>>
                ( &gridPointer.template getData< Devices::Cuda >(),
-                 &userDataPointer.template modifyData< Devices::Cuda >(),
+                 userData,
                  begin.y() + 1,
                  end.y() - 1,
                  end.x(),
@@ -552,7 +705,38 @@ processEntities(
       cudaStreamSynchronize( s2 );
       cudaStreamSynchronize( s3 );
       cudaStreamSynchronize( s4 );
-      TNL_CHECK_CUDA_DEVICE;
+#else // not defined GRID_TRAVERSER_USE_STREAMS
+      dim3 cudaBlockSize( 256 );      
+      dim3 cudaBlocksCount, cudaGridsCount;
+      const IndexType entitiesAlongX = end.x() - begin.x() + 1;
+      const IndexType entitiesAlongY = end.x() - begin.x() - 1;
+      const IndexType maxFaceSize = max( entitiesAlongX, entitiesAlongY );
+      const IndexType blocksPerFace = maxFaceSize / cudaBlockSize.x + ( maxFaceSize % cudaBlockSize.x != 0 );
+      IndexType cudaThreadsCount = 4 * cudaBlockSize.x * blocksPerFace;
+      Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, cudaThreadsCount );
+      //std::cerr << "blocksPerFace = " << blocksPerFace << "Threads count = " << cudaThreadsCount 
+      //          << "cudaBlockCount = " << cudaBlocksCount.x << std::endl;      
+      dim3 gridIdx, cudaGridSize;
+      Devices::Cuda::synchronizeDevice();
+      for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x++ )
+      {
+         Devices::Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
+         //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCountAlongX, cudaGridSize, cudaGridsCountAlongX );
+         GridTraverser2DBoundary< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
+               <<< cudaGridSize, cudaBlockSize >>>
+               ( &gridPointer.template getData< Devices::Cuda >(),
+                 userData,
+                 begin.x(),
+                 end.x(),
+                 begin.y(),
+                 end.y(),
+                 blocksPerFace,
+                 gridIdx,
+                 gridEntityParameters... );
+      }
+#endif //GRID_TRAVERSER_USE_STREAMS
+      //getchar();      
+      TNL_CHECK_CUDA_DEVICE;      
    }
    else
    {
@@ -575,7 +759,7 @@ processEntities(
             GridTraverser2D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                <<< cudaGridSize, cudaBlockSize, 0, s >>>
                ( &gridPointer.template getData< Devices::Cuda >(),
-                 &userDataPointer.template modifyData< Devices::Cuda >(),
+                 userData,
                  begin,
                  end,
                  gridIdx,
@@ -594,6 +778,7 @@ processEntities(
 #endif
 }
 
+
 /****
  * 2D traverser, MIC
  */
@@ -613,7 +798,7 @@ processEntities(
    const GridPointer& gridPointer,
    const CoordinatesType& begin,
    const CoordinatesType& end,
-   SharedPointer< UserData, DeviceType >& userDataPointer,
+   UserData& userData,
    const int& stream,
    const GridEntityParameters&... gridEntityParameters )
 {
@@ -712,7 +897,7 @@ processEntities(
    const GridPointer& gridPointer,
    const CoordinatesType begin,
    const CoordinatesType end,
-   SharedPointer< UserData, DeviceType >& userDataPointer,
+   UserData& userData,
    const int& stream,
    const GridEntityParameters&... gridEntityParameters )
 {
@@ -730,10 +915,10 @@ processEntities(
             {
                entity.getCoordinates().z() = begin.z();
                entity.refresh();
-               EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity );
+               EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
                entity.getCoordinates().z() = end.z();
                entity.refresh();
-               EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity );
+               EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
             }
       if( YOrthogonalBoundary )
          for( entity.getCoordinates().z() = begin.z();
@@ -745,10 +930,10 @@ processEntities(
             {
                entity.getCoordinates().y() = begin.y();
                entity.refresh();
-               EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity );
+               EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
                entity.getCoordinates().y() = end.y();
                entity.refresh();
-               EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity );
+               EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
             }
       if( XOrthogonalBoundary )
          for( entity.getCoordinates().z() = begin.z();
@@ -760,10 +945,10 @@ processEntities(
             {
                entity.getCoordinates().x() = begin.x();
                entity.refresh();
-               EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity );
+               EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
                entity.getCoordinates().x() = end.x();
                entity.refresh();
-               EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity );
+               EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
             }
    }
    else
@@ -781,7 +966,7 @@ processEntities(
                  entity.getCoordinates().x() ++ )
             {
                entity.refresh();
-               EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity );
+               EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
             }*/
 #ifdef HAVE_OPENMP
 #pragma omp parallel firstprivate( begin, end ) if( Devices::Host::isOMPEnabled() )
@@ -799,7 +984,7 @@ processEntities(
                   entity.getCoordinates().y() = y;
                   entity.getCoordinates().z() = z;
                   entity.refresh();
-                  EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity );
+                  EntitiesProcessor::processEntity( entity.getMesh(), userData, entity );
             }
       }      
    }
@@ -819,7 +1004,7 @@ template< typename Real,
 __global__ void
 GridTraverser3D(
    const Meshes::Grid< 3, Real, Devices::Cuda, Index >* grid,
-   UserData* userData,
+   UserData userData,
    const typename GridEntity::CoordinatesType begin,
    const typename GridEntity::CoordinatesType end,
    const dim3 gridIdx,
@@ -840,7 +1025,7 @@ GridTraverser3D(
       {
          EntitiesProcessor::processEntity
          ( *grid,
-           *userData,
+           userData,
            entity );
       }
    }
@@ -856,7 +1041,7 @@ template< typename Real,
 __global__ void 
 GridTraverser3DBoundaryAlongXY(
    const Meshes::Grid< 3, Real, Devices::Cuda, Index >* grid,
-   UserData* userData,
+   UserData userData,
    const Index beginX,
    const Index endX,
    const Index beginY,
@@ -878,7 +1063,7 @@ GridTraverser3DBoundaryAlongXY(
       entity.refresh();
       EntitiesProcessor::processEntity
       ( *grid,
-        *userData,
+        userData,
         entity );
    }
 }
@@ -893,7 +1078,7 @@ template< typename Real,
 __global__ void 
 GridTraverser3DBoundaryAlongXZ(
    const Meshes::Grid< 3, Real, Devices::Cuda, Index >* grid,
-   UserData* userData,
+   UserData userData,
    const Index beginX,
    const Index endX,
    const Index beginZ,
@@ -915,7 +1100,7 @@ GridTraverser3DBoundaryAlongXZ(
       entity.refresh();
       EntitiesProcessor::processEntity
       ( *grid,
-        *userData,
+        userData,
         entity );
    }   
 }
@@ -930,7 +1115,7 @@ template< typename Real,
 __global__ void 
 GridTraverser3DBoundaryAlongYZ(
    const Meshes::Grid< 3, Real, Devices::Cuda, Index >* grid,
-   UserData* userData,
+   UserData userData,
    const Index beginY,
    const Index endY,
    const Index beginZ,
@@ -952,7 +1137,7 @@ GridTraverser3DBoundaryAlongYZ(
       entity.refresh();
       EntitiesProcessor::processEntity
       ( *grid,
-        *userData,
+        userData,
         entity );
    }   
 }
@@ -975,7 +1160,7 @@ processEntities(
    const GridPointer& gridPointer,
    const CoordinatesType& begin,
    const CoordinatesType& end,
-   SharedPointer< UserData, DeviceType >& userDataPointer,
+   UserData& userData,
    const int& stream,
    const GridEntityParameters&... gridEntityParameters )
 {
@@ -1013,7 +1198,7 @@ processEntities(
             GridTraverser3DBoundaryAlongXY< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                   <<< cudaBlocksCountAlongXY, cudaBlockSize, 0 , s1 >>>
                   ( &gridPointer.template getData< Devices::Cuda >(),
-                    &userDataPointer.template modifyData< Devices::Cuda >(),
+                    userData,
                     begin.x(),
                     end.x(),
                     begin.y(),
@@ -1024,7 +1209,7 @@ processEntities(
             GridTraverser3DBoundaryAlongXY< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                   <<< cudaBlocksCountAlongXY, cudaBlockSize, 0, s2 >>>
                   ( &gridPointer.template getData< Devices::Cuda >(),
-                    &userDataPointer.template modifyData< Devices::Cuda >(),
+                    userData,
                     begin.x(),
                     end.x(),
                     begin.y(),
@@ -1040,7 +1225,7 @@ processEntities(
             GridTraverser3DBoundaryAlongXZ< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                   <<< cudaBlocksCountAlongXZ, cudaBlockSize, 0, s3 >>>
                   ( &gridPointer.template getData< Devices::Cuda >(),
-                    &userDataPointer.template modifyData< Devices::Cuda >(),
+                    userData,
                     begin.x(),
                     end.x(),               
                     begin.z() + 1,
@@ -1051,7 +1236,7 @@ processEntities(
             GridTraverser3DBoundaryAlongXZ< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                   <<< cudaBlocksCountAlongXZ, cudaBlockSize, 0, s4 >>>
                   ( &gridPointer.template getData< Devices::Cuda >(),
-                    &userDataPointer.template modifyData< Devices::Cuda >(),
+                    userData,
                     begin.x(),
                     end.x(),               
                     begin.z() + 1,
@@ -1067,7 +1252,7 @@ processEntities(
             GridTraverser3DBoundaryAlongYZ< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                   <<< cudaBlocksCountAlongYZ, cudaBlockSize, 0, s5 >>>
                   ( &gridPointer.template getData< Devices::Cuda >(),
-                    &userDataPointer.template modifyData< Devices::Cuda >(),
+                    userData,
                     begin.y() + 1,
                     end.y() - 1,               
                     begin.z() + 1,
@@ -1078,7 +1263,7 @@ processEntities(
             GridTraverser3DBoundaryAlongYZ< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                   <<< cudaBlocksCountAlongYZ, cudaBlockSize, 0, s6 >>>
                   ( &gridPointer.template getData< Devices::Cuda >(),
-                    &userDataPointer.template modifyData< Devices::Cuda >(),
+                    userData,
                     begin.y() + 1,
                     end.y() - 1,               
                     begin.z() + 1,
@@ -1118,7 +1303,7 @@ processEntities(
                GridTraverser3D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                   <<< gridSize, cudaBlockSize, 0, s >>>
                   ( &gridPointer.template getData< Devices::Cuda >(),
-                    &userDataPointer.template modifyData< Devices::Cuda >(),
+                    userData,
                     begin,
                     end,
                     gridIdx,
@@ -1157,7 +1342,7 @@ processEntities(
    const GridPointer& gridPointer,
    const CoordinatesType& begin,
    const CoordinatesType& end,
-   SharedPointer< UserData, DeviceType >& userDataPointer,
+   UserData& userData,
    const int& stream,
    const GridEntityParameters&... gridEntityParameters )
 {
@@ -1183,7 +1368,7 @@ processEntities(
             GridTraverser3D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
                <<< cudaBlocks, cudaBlockSize, 0, s >>>
                ( &gridPointer.template getData< Devices::Cuda >(),
-                 &userDataPointer.template modifyData< Devices::Cuda >(),
+                 userData,
                  begin,
                  end,
                  gridXIdx,
diff --git a/src/TNL/Meshes/GridDetails/Grid_impl.cpp b/src/TNL/Meshes/GridDetails/Grid_impl.cpp
deleted file mode 100644
index 5073e0c1078aa1ca6f6240d0c7948b53e818f6ed..0000000000000000000000000000000000000000
--- a/src/TNL/Meshes/GridDetails/Grid_impl.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-/***************************************************************************
-                          Grid_impl.cpp  -  description
-                             -------------------
-    begin                : Jan 21, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Meshes/Grid.h>
-
-namespace TNL {
-namespace Meshes{
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-template class Grid< 1, float,  Devices::Host, int >;
-template class Grid< 1, double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_INT
-template class Grid< 1, float,  Devices::Host, long int >;
-template class Grid< 1, double, Devices::Host, long int >;
-#endif
-
-template class Grid< 2, float,  Devices::Host, int >;
-template class Grid< 2, double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_INT
-template class Grid< 2, float,  Devices::Host, long int >;
-template class Grid< 2, double, Devices::Host, long int >;
-#endif
-
-template class Grid< 3, float,  Devices::Host, int >;
-template class Grid< 3, double, Devices::Host, int >;
-#ifdef INSTANTIATE_LONG_INT
-template class Grid< 3, float,  Devices::Host, long int >;
-template class Grid< 3, double, Devices::Host, long int >;
-#endif
-
-#ifdef HAVE_CUDA
-#endif
-
-template class Grid< 1, float,  Devices::Cuda, int >;
-template class Grid< 1, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_INT
-template class Grid< 1, float,  Devices::Cuda, long int >;
-template class Grid< 1, double, Devices::Cuda, long int >;
-#endif
-
-template class Grid< 2, float,  Devices::Cuda, int >;
-template class Grid< 2, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_INT
-template class Grid< 2, float,  Devices::Cuda, long int >;
-template class Grid< 2, double, Devices::Cuda, long int >;
-#endif
-
-template class Grid< 3, float,  Devices::Cuda, int >;
-template class Grid< 3, double, Devices::Cuda, int >;
-#ifdef INSTANTIATE_LONG_INT
-template class Grid< 3, float,  Devices::Cuda, long int >;
-template class Grid< 3, double, Devices::Cuda, long int >;
-#endif
-
-#endif
-
-} // namespace Meshes
-} // namespace TNL
-
-
-
diff --git a/src/TNL/Meshes/GridDetails/NeighborGridEntitiesStorage.h b/src/TNL/Meshes/GridDetails/NeighborGridEntitiesStorage.h
index 2bef7d8bdffbe4f797367af3ffb49af01fb47970..dd9562add377b02c3ab9ca91fa4804762182b46c 100644
--- a/src/TNL/Meshes/GridDetails/NeighborGridEntitiesStorage.h
+++ b/src/TNL/Meshes/GridDetails/NeighborGridEntitiesStorage.h
@@ -22,12 +22,7 @@ template< typename GridEntity,
           int NeighborEntityDimension,
           typename GridEntityConfig,
           bool storage = GridEntityConfig::template neighborEntityStorage< GridEntity >( NeighborEntityDimension ) >
-class NeighborGridEntityLayer{};   
-   
-template< typename GridEntity,
-          int NeighborEntityDimension,
-          typename GridEntityConfig >
-class NeighborGridEntityLayer< GridEntity, NeighborEntityDimension, GridEntityConfig, true >
+class NeighborGridEntityLayer
 : public NeighborGridEntityLayer< GridEntity, NeighborEntityDimension - 1, GridEntityConfig >
 {
    public:
@@ -63,8 +58,9 @@ class NeighborGridEntityLayer< GridEntity, NeighborEntityDimension, GridEntityCo
 };
 
 template< typename GridEntity,
-          typename GridEntityConfig >
-class NeighborGridEntityLayer< GridEntity, 0, GridEntityConfig, true >
+          typename GridEntityConfig,
+          bool storage >
+class NeighborGridEntityLayer< GridEntity, 0, GridEntityConfig, storage >
 {
    public:
  
@@ -93,52 +89,6 @@ class NeighborGridEntityLayer< GridEntity, 0, GridEntityConfig, true >
       NeighborEntityGetterType neighborEntities;
 };
 
-template< typename GridEntity,
-          int NeighborEntityDimension,
-          typename GridEntityConfig >
-class NeighborGridEntityLayer< GridEntity, NeighborEntityDimension, GridEntityConfig, false >
-: public NeighborGridEntityLayer< GridEntity, NeighborEntityDimension - 1, GridEntityConfig >
-{
-   public:
-      
-      typedef NeighborGridEntityLayer< GridEntity, NeighborEntityDimension - 1, GridEntityConfig > BaseType;      
-      typedef NeighborGridEntityGetter< GridEntity, NeighborEntityDimension > NeighborEntityGetterType;
-
-      using BaseType::getNeighborEntities;
- 
-      __cuda_callable__
-      NeighborGridEntityLayer( const GridEntity& entity )
-      : BaseType( entity )
-      {}
-
-      __cuda_callable__
-      const NeighborEntityGetterType& getNeighborEntities( const DimensionTag< NeighborEntityDimension >& tag ) const {}
- 
-      __cuda_callable__
-      void refresh( const typename GridEntity::GridType& grid,
-                    const typename GridEntity::GridType::IndexType& entityIndex ) {}
-};
-
-template< typename GridEntity,
-          typename GridEntityConfig >
-class NeighborGridEntityLayer< GridEntity, 0, GridEntityConfig, false >
-{
-   public:
-      
-      typedef NeighborGridEntityGetter< GridEntity, 0 > NeighborEntityGetterType;
-         
-      __cuda_callable__
-      NeighborGridEntityLayer( const GridEntity& entity ){}
-
-      __cuda_callable__
-      const NeighborEntityGetterType& getNeighborEntities( const DimensionTag< 0 >& tag ) const {}
- 
-      __cuda_callable__
-      void refresh( const typename GridEntity::GridType& grid,
-                    const typename GridEntity::GridType::IndexType& entityIndex ) {}
-};
-
-
 
 
 template< typename GridEntity,
diff --git a/src/TNL/Meshes/GridDetails/Traverser_Grid1D.h b/src/TNL/Meshes/GridDetails/Traverser_Grid1D.h
index cc16fac8acf7968be3f8fd905fea62c0d48bf630..feedcea043c40edb9bb28ab715ba2d4d27af2e82 100644
--- a/src/TNL/Meshes/GridDetails/Traverser_Grid1D.h
+++ b/src/TNL/Meshes/GridDetails/Traverser_Grid1D.h
@@ -11,7 +11,7 @@
 #pragma once
 
 #include <TNL/Meshes/Traverser.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 
 namespace TNL {
 namespace Meshes {
@@ -24,7 +24,7 @@ class Traverser< Meshes::Grid< 1, Real, Device, Index >, GridEntity, 1 >
 {
    public:
       using GridType = Meshes::Grid< 1, Real, Device, Index >;
-      using GridPointer = SharedPointer< GridType >;
+      using GridPointer = Pointers::SharedPointer< GridType >;
       using CoordinatesType = typename GridType::CoordinatesType;
       using DistributedGridType = Meshes::DistributedMeshes::DistributedMesh< GridType >;
       using SubdomainOverlapsType = typename DistributedGridType::SubdomainOverlapsType;
@@ -32,18 +32,17 @@ class Traverser< Meshes::Grid< 1, Real, Device, Index >, GridEntity, 1 >
       template< typename UserData,
                 typename EntitiesProcessor >
       void processBoundaryEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, Device >& userDataPointer ) const;
+                                    UserData& userData ) const;
 
       template< typename UserData,
                 typename EntitiesProcessor >
       void processInteriorEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, Device >& userDataPointer ) const;
- 
+                                    UserData& userData ) const;
+
       template< typename UserData,
                 typename EntitiesProcessor >
       void processAllEntities( const GridPointer& gridPointer,
-                               SharedPointer< UserData, Device >& userDataPointer ) const;
- 
+                               UserData& userData ) const;
 };
 
 
@@ -55,7 +54,7 @@ class Traverser< Meshes::Grid< 1, Real, Device, Index >, GridEntity, 0 >
 {
    public:
       using GridType = Meshes::Grid< 1, Real, Device, Index >;
-      using GridPointer = SharedPointer< GridType >;
+      using GridPointer = Pointers::SharedPointer< GridType >;
       using CoordinatesType = typename GridType::CoordinatesType;
       using DistributedGridType = Meshes::DistributedMeshes::DistributedMesh< GridType >;
       using SubdomainOverlapsType = typename DistributedGridType::SubdomainOverlapsType;
@@ -63,17 +62,17 @@ class Traverser< Meshes::Grid< 1, Real, Device, Index >, GridEntity, 0 >
       template< typename UserData,
                 typename EntitiesProcessor >
       void processBoundaryEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, Device >& userDataPointer ) const;
+                                    UserData& userData ) const;
 
       template< typename UserData,
                 typename EntitiesProcessor >
       void processInteriorEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, Device >& userDataPointer ) const;
- 
+                                    UserData& userData ) const;
+
       template< typename UserData,
                 typename EntitiesProcessor >
       void processAllEntities( const GridPointer& gridPointer,
-                               SharedPointer< UserData, Device >& userDataPointer ) const;
+                               UserData& userData ) const;
 };
 
 } // namespace Meshes
diff --git a/src/TNL/Meshes/GridDetails/Traverser_Grid1D_impl.h b/src/TNL/Meshes/GridDetails/Traverser_Grid1D_impl.h
index 7fd7c2e0e22502d52c84c37864a0de16d22b082b..d3d2a129cc3c888981e9a684cc435ba57979f9aa 100644
--- a/src/TNL/Meshes/GridDetails/Traverser_Grid1D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Traverser_Grid1D_impl.h
@@ -29,7 +29,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 1, Real, Device, Index >, GridEntity, 1 >::
 processBoundaryEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, Device >& userDataPointer ) const
+                         UserData& userData ) const
 {
    /****
     * Boundary cells
@@ -43,7 +43,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
            gridPointer,
            CoordinatesType( 0 ),
            gridPointer->getDimensions() - CoordinatesType( 1 ),
-           userDataPointer );
+           userData );
    }
    else //Distributed
    {
@@ -54,7 +54,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
               gridPointer,
               CoordinatesType( 0 ) + distributedGrid->getLowerOverlap(),
               CoordinatesType( 0 ) + distributedGrid->getLowerOverlap(),
-              userDataPointer );
+              userData );
        }
        
        if( neighbors[ Meshes::DistributedMeshes::Right ] == -1 )
@@ -63,7 +63,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
               gridPointer,
               gridPointer->getDimensions() - CoordinatesType( 1 ) - distributedGrid->getUpperOverlap(),
               gridPointer->getDimensions() - CoordinatesType( 1 ) - distributedGrid->getUpperOverlap(),
-              userDataPointer );
+              userData );
        }
    }
    
@@ -78,7 +78,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 1, Real, Device, Index >, GridEntity, 1 >::
 processInteriorEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, Device >& userDataPointer ) const
+                         UserData& userData ) const
 {
    /****
     * Interior cells
@@ -92,7 +92,7 @@ processInteriorEntities( const GridPointer& gridPointer,
            gridPointer,
            CoordinatesType( 1 ),
            gridPointer->getDimensions() - CoordinatesType( 2 ),
-           userDataPointer );   
+           userData );   
    }
    else //Distributed
    {
@@ -117,7 +117,7 @@ processInteriorEntities( const GridPointer& gridPointer,
           gridPointer,
           begin,
           end,
-          userDataPointer );
+          userData );
    }
    
 }
@@ -132,7 +132,7 @@ void
 Traverser< Meshes::Grid< 1, Real, Device, Index >, GridEntity, 1 >::
 processAllEntities(
    const GridPointer& gridPointer,
-   SharedPointer< UserData, Device >& userDataPointer ) const
+   UserData& userData ) const
 {
    /****
     * All cells
@@ -146,7 +146,7 @@ processAllEntities(
            gridPointer,
            CoordinatesType( 0 ),
            gridPointer->getDimensions() - CoordinatesType( 1 ),
-           userDataPointer );  
+           userData );  
    }
    else //Distributed
    {
@@ -157,7 +157,7 @@ processAllEntities(
           gridPointer,
           begin,
           end,
-          userDataPointer );
+          userData );
    }
 
 }
@@ -174,7 +174,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 1, Real, Device, Index >, GridEntity, 0 >::
 processBoundaryEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, Device >& userDataPointer ) const
+                         UserData& userData ) const
 {
    /****
     * Boundary vertices
@@ -185,7 +185,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0 ),
       gridPointer->getDimensions(),
-      userDataPointer );
+      userData );
 }
 
 template< typename Real,
@@ -197,7 +197,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 1, Real, Device, Index >, GridEntity, 0 >::
 processInteriorEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, Device >& userDataPointer ) const
+                         UserData& userData ) const
 {
    /****
     * Interior vertices
@@ -208,7 +208,7 @@ processInteriorEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 1 ),
       gridPointer->getDimensions() - CoordinatesType( 1 ),
-      userDataPointer );
+      userData );
 }
 
 template< typename Real,
@@ -221,7 +221,7 @@ void
 Traverser< Meshes::Grid< 1, Real, Device, Index >, GridEntity, 0 >::
 processAllEntities(
    const GridPointer& gridPointer,
-   SharedPointer< UserData, Device >& userDataPointer ) const
+   UserData& userData ) const
 {
    /****
     * All vertices
@@ -232,7 +232,7 @@ processAllEntities(
       gridPointer,
       CoordinatesType( 0 ),
       gridPointer->getDimensions(),
-      userDataPointer );
+      userData );
 }
 
 } // namespace Meshes
diff --git a/src/TNL/Meshes/GridDetails/Traverser_Grid2D.h b/src/TNL/Meshes/GridDetails/Traverser_Grid2D.h
index 5475fe4064a3d50af78062756874abe34998eb04..2e1f1a68d16eded5488d37605020af1d2731c315 100644
--- a/src/TNL/Meshes/GridDetails/Traverser_Grid2D.h
+++ b/src/TNL/Meshes/GridDetails/Traverser_Grid2D.h
@@ -11,7 +11,7 @@
 #pragma once
 
 #include <TNL/Meshes/Traverser.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 
 namespace TNL {
 namespace Meshes {   
@@ -24,7 +24,7 @@ class Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 2 >
 {
    public:
       using GridType = Meshes::Grid< 2, Real, Device, Index >;
-      using GridPointer = SharedPointer< GridType >;
+      using GridPointer = Pointers::SharedPointer< GridType >;
       using CoordinatesType = typename GridType::CoordinatesType;
       using DistributedGridType = Meshes::DistributedMeshes::DistributedMesh< GridType >;
       using SubdomainOverlapsType = typename DistributedGridType::SubdomainOverlapsType;
@@ -32,17 +32,16 @@ class Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 2 >
       template< typename UserData,
                 typename EntitiesProcessor >
       void processBoundaryEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, Device >& userDataPointer ) const;
+                                    UserData& userData ) const;
 
       template< typename UserData,
                 typename EntitiesProcessor >
       void processInteriorEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, Device >& userDataPointer ) const;
+                                    UserData& userData ) const;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processAllEntities( const GridPointer& gridPointer,
-                               SharedPointer< UserData, Device >& userDataPointer ) const;
- 
+                               UserData& userData ) const;
 };
 
 template< typename Real,
@@ -53,7 +52,7 @@ class Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 1 >
 {
    public:
       using GridType = Meshes::Grid< 2, Real, Device, Index >;
-      using GridPointer = SharedPointer< GridType >;
+      using GridPointer = Pointers::SharedPointer< GridType >;
       using CoordinatesType = typename GridType::CoordinatesType;
       using DistributedGridType = Meshes::DistributedMeshes::DistributedMesh< GridType >;
       using SubdomainOverlapsType = typename DistributedGridType::SubdomainOverlapsType;
@@ -61,18 +60,17 @@ class Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 1 >
       template< typename UserData,
                 typename EntitiesProcessor >
       void processBoundaryEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, Device >& userDataPointer ) const;
+                                    UserData& userData ) const;
 
       template< typename UserData,
                 typename EntitiesProcessor >
       void processInteriorEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, Device >& userDataPointer ) const;
+                                    UserData& userData ) const;
 
       template< typename UserData,
                 typename EntitiesProcessor >
       void processAllEntities( const GridPointer& gridPointer,
-                               SharedPointer< UserData, Device >& userDataPointer ) const;
- 
+                               UserData& userData ) const;
 };
 
 template< typename Real,
@@ -83,7 +81,7 @@ class Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 0 >
 {
    public:
       using GridType = Meshes::Grid< 2, Real, Device, Index >;
-      using GridPointer = SharedPointer< GridType >;
+      using GridPointer = Pointers::SharedPointer< GridType >;
       using CoordinatesType = typename GridType::CoordinatesType;
       using DistributedGridType = Meshes::DistributedMeshes::DistributedMesh< GridType >;
       using SubdomainOverlapsType = typename DistributedGridType::SubdomainOverlapsType;
@@ -91,17 +89,17 @@ class Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 0 >
       template< typename UserData,
                 typename EntitiesProcessor >
       void processBoundaryEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, Device >& userDataPointer ) const;
+                                    UserData& userData ) const;
 
       template< typename UserData,
                 typename EntitiesProcessor >
       void processInteriorEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, Device >& userDataPointer ) const;
-      
+                                    UserData& userData ) const;
+
       template< typename UserData,
                 typename EntitiesProcessor >
       void processAllEntities( const GridPointer& gridPointer,
-                               SharedPointer< UserData, Device >& userDataPointer ) const;
+                               UserData& userData ) const;
 };
 
 } // namespace Meshes
diff --git a/src/TNL/Meshes/GridDetails/Traverser_Grid2D_impl.h b/src/TNL/Meshes/GridDetails/Traverser_Grid2D_impl.h
index a41c2e08adb0849da7e9fcfee9cc0ea8a5bf88b6..23d93d7e0b968bfeb53be9fe922855d999d91837 100644
--- a/src/TNL/Meshes/GridDetails/Traverser_Grid2D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Traverser_Grid2D_impl.h
@@ -27,7 +27,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 2 >::
 processBoundaryEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, Device >& userDataPointer ) const
+                         UserData& userData ) const
 {
    /****
     * Boundary cells
@@ -41,7 +41,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
        gridPointer,
        CoordinatesType( 0, 0 ),
        gridPointer->getDimensions() - CoordinatesType( 1, 1 ),
-       userDataPointer,
+       userData,
        0 );
    }
    else //Distributed
@@ -56,7 +56,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
             gridPointer,
             begin,
             CoordinatesType( begin.x(), end.y() ),
-            userDataPointer,
+            userData,
             0 );
       }
        
@@ -66,7 +66,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
             gridPointer,
             CoordinatesType( end.x(), begin.y() ),
             end,
-            userDataPointer,
+            userData,
             0 );
       }
        
@@ -77,7 +77,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
             gridPointer,
             begin,
             CoordinatesType( end.x(), begin.y() ),
-            userDataPointer,
+            userData,
             0 );
       }
        
@@ -87,7 +87,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
             gridPointer,
             CoordinatesType( begin.x(), end.y() ),
             end,
-            userDataPointer,
+            userData,
             0 );
       }
    }
@@ -102,7 +102,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 2 >::
 processInteriorEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, Device >& userDataPointer ) const
+                         UserData& userData ) const
 {
    /****
     * Interior cells
@@ -116,7 +116,7 @@ processInteriorEntities( const GridPointer& gridPointer,
          gridPointer,
          CoordinatesType( 1, 1 ),
          gridPointer->getDimensions() - CoordinatesType( 2, 2 ),
-         userDataPointer,
+         userData,
          0 );
    }
    else // distributed
@@ -141,7 +141,7 @@ processInteriorEntities( const GridPointer& gridPointer,
          gridPointer,
          begin,
          end,
-         userDataPointer,
+         userData,
          0);
    }
 }
@@ -155,7 +155,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 2 >::
 processAllEntities( const GridPointer& gridPointer,
-                    SharedPointer< UserData, Device >& userDataPointer ) const
+                    UserData& userData ) const
 {
    /****
     * All cells
@@ -169,7 +169,7 @@ processAllEntities( const GridPointer& gridPointer,
          gridPointer,
          CoordinatesType( 0, 0 ),
          gridPointer->getDimensions() - CoordinatesType( 1, 1 ),
-         userDataPointer,
+         userData,
          0 );
    }
    else
@@ -182,7 +182,7 @@ processAllEntities( const GridPointer& gridPointer,
           gridPointer,
           begin,
           end,
-          userDataPointer,
+          userData,
           0);   
    }
 }
@@ -199,7 +199,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 1 >::
 processBoundaryEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, Device >& userDataPointer ) const
+                         UserData& userData ) const
 {
    /****
     * Boundary faces
@@ -210,7 +210,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 0 ),
       gridPointer->getDimensions() - CoordinatesType( 0, 1 ),
-      userDataPointer,
+      userData,
       1,
       CoordinatesType( 1, 0 ),
       CoordinatesType( 0, 1 ) );
@@ -219,7 +219,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 0 ),
       gridPointer->getDimensions() - CoordinatesType( 1, 0 ),
-      userDataPointer,
+      userData,
       0,
       CoordinatesType( 0, 1 ),
       CoordinatesType( 1, 0 ) );
@@ -234,7 +234,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 1 >::
 processInteriorEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, Device >& userDataPointer ) const
+                         UserData& userData ) const
 {
    /****
     * Interior faces
@@ -245,7 +245,7 @@ processInteriorEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 1, 0 ),
       gridPointer->getDimensions() - CoordinatesType( 1, 1 ),
-      userDataPointer,
+      userData,
       1,
       CoordinatesType( 1, 0 ),
       CoordinatesType( 0, 1 ) );
@@ -254,7 +254,7 @@ processInteriorEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 1 ),
       gridPointer->getDimensions() - CoordinatesType( 1, 1 ),
-      userDataPointer,
+      userData,
       0,
       CoordinatesType( 0, 1 ),
       CoordinatesType( 1, 0 ) );
@@ -269,7 +269,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 1 >::
 processAllEntities( const GridPointer& gridPointer,
-                    SharedPointer< UserData, Device >& userDataPointer ) const
+                    UserData& userData ) const
 {
    /****
     * All faces
@@ -280,7 +280,7 @@ processAllEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 0 ),
       gridPointer->getDimensions() - CoordinatesType( 0, 1 ),
-      userDataPointer,
+      userData,
       1,
       CoordinatesType( 1, 0 ),
       CoordinatesType( 0, 1 ) );
@@ -289,7 +289,7 @@ processAllEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 0 ),
       gridPointer->getDimensions() - CoordinatesType( 1, 0 ),
-      userDataPointer,
+      userData,
       0,
       CoordinatesType( 0, 1 ),
       CoordinatesType( 1, 0 ) );
@@ -304,7 +304,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 0 >::
 processBoundaryEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, Device >& userDataPointer ) const
+                         UserData& userData ) const
 {
    /****
     * Boundary vertices
@@ -315,7 +315,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 0 ),
       gridPointer->getDimensions(),
-      userDataPointer,
+      userData,
       0 );
 }
 
@@ -328,7 +328,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 0 >::
 processInteriorEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, Device >& userDataPointer ) const
+                         UserData& userData ) const
 {
    /****
     * Interior vertices
@@ -339,7 +339,7 @@ processInteriorEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 1, 1 ),
       gridPointer->getDimensions() - CoordinatesType( 1, 1 ),
-      userDataPointer,
+      userData,
       0 );
 }
  
@@ -352,7 +352,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 0 >::
 processAllEntities( const GridPointer& gridPointer,
-                    SharedPointer< UserData, Device >& userDataPointer ) const
+                    UserData& userData ) const
 {
    /****
     * All vertices
@@ -363,7 +363,7 @@ processAllEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 0 ),
       gridPointer->getDimensions(),
-      userDataPointer,
+      userData,
       0 );
 }
 
diff --git a/src/TNL/Meshes/GridDetails/Traverser_Grid3D.h b/src/TNL/Meshes/GridDetails/Traverser_Grid3D.h
index bed160edfaf2e98f03964c65d041f16a9a0324fc..d0eaa25767abb0a3e9d655245f2e94ff6a52918a 100644
--- a/src/TNL/Meshes/GridDetails/Traverser_Grid3D.h
+++ b/src/TNL/Meshes/GridDetails/Traverser_Grid3D.h
@@ -11,7 +11,7 @@
 #pragma once
 
 #include <TNL/Meshes/Traverser.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 
 namespace TNL {
 namespace Meshes {
@@ -24,7 +24,7 @@ class Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 3 >
 {
    public:
       using GridType = Meshes::Grid< 3, Real, Device, Index >;
-      using GridPointer = SharedPointer< GridType >;
+      using GridPointer = Pointers::SharedPointer< GridType >;
       using CoordinatesType = typename GridType::CoordinatesType;
       using DistributedGridType = Meshes::DistributedMeshes::DistributedMesh< GridType >;
       using SubdomainOverlapsType = typename DistributedGridType::SubdomainOverlapsType;
@@ -32,17 +32,16 @@ class Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 3 >
       template< typename UserData,
                 typename EntitiesProcessor >
       void processBoundaryEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, Device >& userDataPointer ) const;
+                                    UserData& userData ) const;
 
       template< typename UserData,
                 typename EntitiesProcessor >
       void processInteriorEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, Device >& userDataPointer ) const;
+                                    UserData& userData ) const;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processAllEntities( const GridPointer& gridPointer,
-                               SharedPointer< UserData, Device >& userDataPointer ) const;
- 
+                               UserData& userData ) const;
 };
 
 template< typename Real,
@@ -53,7 +52,7 @@ class Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 2 >
 {
    public:
       using GridType = Meshes::Grid< 3, Real, Device, Index >;
-      using GridPointer = SharedPointer< GridType >;
+      using GridPointer = Pointers::SharedPointer< GridType >;
       using CoordinatesType = typename GridType::CoordinatesType;
       using DistributedGridType = Meshes::DistributedMeshes::DistributedMesh< GridType >;
       using SubdomainOverlapsType = typename DistributedGridType::SubdomainOverlapsType;
@@ -61,17 +60,16 @@ class Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 2 >
       template< typename UserData,
                 typename EntitiesProcessor >
       void processBoundaryEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, Device >& userDataPointer ) const;
+                                    UserData& userData ) const;
 
       template< typename UserData,
                 typename EntitiesProcessor >
       void processInteriorEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, Device >& userDataPointer ) const;
+                                    UserData& userData ) const;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processAllEntities( const GridPointer& gridPointer,
-                               SharedPointer< UserData, Device >& userDataPointer ) const;
- 
+                               UserData& userData ) const;
 };
 
 template< typename Real,
@@ -82,7 +80,7 @@ class Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 1 >
 {
    public:
       using GridType = Meshes::Grid< 3, Real, Device, Index >;
-      using GridPointer = SharedPointer< GridType >;
+      using GridPointer = Pointers::SharedPointer< GridType >;
       using CoordinatesType = typename GridType::CoordinatesType;
       using DistributedGridType = Meshes::DistributedMeshes::DistributedMesh< GridType >;
       using SubdomainOverlapsType = typename DistributedGridType::SubdomainOverlapsType;
@@ -90,18 +88,17 @@ class Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 1 >
       template< typename UserData,
                 typename EntitiesProcessor >
       void processBoundaryEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, Device >& userDataPointer ) const;
+                                    UserData& userData ) const;
 
       template< typename UserData,
                 typename EntitiesProcessor >
       void processInteriorEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, Device >& userDataPointer ) const;
+                                    UserData& userData ) const;
 
       template< typename UserData,
                 typename EntitiesProcessor >
       void processAllEntities( const GridPointer& gridPointer,
-                               SharedPointer< UserData, Device >& userDataPointer ) const;
- 
+                               UserData& userData ) const;
 };
 
 template< typename Real,
@@ -112,7 +109,7 @@ class Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 0 >
 {
    public:
       using GridType = Meshes::Grid< 3, Real, Device, Index >;
-      using GridPointer = SharedPointer< GridType >;
+      using GridPointer = Pointers::SharedPointer< GridType >;
       using CoordinatesType = typename GridType::CoordinatesType;
       using DistributedGridType = Meshes::DistributedMeshes::DistributedMesh< GridType >;
       using SubdomainOverlapsType = typename DistributedGridType::SubdomainOverlapsType;
@@ -120,17 +117,17 @@ class Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 0 >
       template< typename UserData,
                 typename EntitiesProcessor >
       void processBoundaryEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, Device >& userDataPointer ) const;
+                                    UserData& userData ) const;
 
       template< typename UserData,
                 typename EntitiesProcessor >
       void processInteriorEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, Device >& userDataPointer ) const;
- 
+                                    UserData& userData ) const;
+
       template< typename UserData,
                 typename EntitiesProcessor >
       void processAllEntities( const GridPointer& gridPointer,
-                               SharedPointer< UserData, Device >& userDataPointer ) const;
+                               UserData& userData ) const;
 };
 
 } // namespace Meshes
diff --git a/src/TNL/Meshes/GridDetails/Traverser_Grid3D_impl.h b/src/TNL/Meshes/GridDetails/Traverser_Grid3D_impl.h
index 06cba35b5237849383598c57712c227515fcaaaa..e207d324d94893a4478caacef28f641a8df89daf 100644
--- a/src/TNL/Meshes/GridDetails/Traverser_Grid3D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Traverser_Grid3D_impl.h
@@ -29,7 +29,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 3 >::
 processBoundaryEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, Device >& userDataPointer ) const
+                         UserData& userData ) const
 {
    /****
     * Boundary cells
@@ -43,7 +43,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
           gridPointer,
           CoordinatesType( 0, 0, 0 ),
           gridPointer->getDimensions() - CoordinatesType( 1, 1, 1 ),
-          userDataPointer,
+          userData,
           0 );
    }
    else // distributed
@@ -58,7 +58,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
             gridPointer,
             begin,
             CoordinatesType( begin.x(), end.x(), end.y() ),
-            userDataPointer,
+            userData,
             0 );
       }
        
@@ -68,7 +68,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
             gridPointer,
             CoordinatesType( end.x() , begin.y(), begin.z() ),
             end,
-            userDataPointer,
+            userData,
             0 );
        }
        
@@ -78,7 +78,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
             gridPointer,
             begin,
             CoordinatesType( end.x(), begin.y(), end.z() ),
-            userDataPointer,
+            userData,
             0 );
       }
        
@@ -88,7 +88,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
             gridPointer,
             CoordinatesType( begin.x(), end.y(), begin.z() ),
             end,
-            userDataPointer,
+            userData,
             0 );
        }
        
@@ -98,7 +98,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
             gridPointer,
             begin,
             CoordinatesType( end.x(), end.y(), begin.z() ),
-            userDataPointer,
+            userData,
             0 );
       }
       
@@ -108,7 +108,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
             gridPointer,
             CoordinatesType( begin.x(), begin.y(), end.z() ),
             end,
-            userDataPointer,
+            userData,
             0 );
       } 
    }
@@ -123,7 +123,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 3 >::
 processInteriorEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, Device >& userDataPointer ) const
+                         UserData& userData ) const
 {
    /****
     * Interior cells
@@ -137,7 +137,7 @@ processInteriorEntities( const GridPointer& gridPointer,
          gridPointer,
          CoordinatesType( 1, 1, 1 ),
          gridPointer->getDimensions() - CoordinatesType( 2, 2, 2 ),
-         userDataPointer,
+         userData,
          0 );
    }
    else
@@ -168,7 +168,7 @@ processInteriorEntities( const GridPointer& gridPointer,
          gridPointer,
          begin,
          end,
-         userDataPointer,
+         userData,
          0);      
    }
 }
@@ -182,7 +182,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 3 >::
 processAllEntities( const GridPointer& gridPointer,
-                    SharedPointer< UserData, Device >& userDataPointer ) const
+                    UserData& userData ) const
 {
    /****
     * All cells
@@ -196,7 +196,7 @@ processAllEntities( const GridPointer& gridPointer,
          gridPointer,
          CoordinatesType( 0, 0, 0 ),
          gridPointer->getDimensions() - CoordinatesType( 1, 1, 1 ),
-         userDataPointer,
+         userData,
          0 );
    }
    else
@@ -208,7 +208,7 @@ processAllEntities( const GridPointer& gridPointer,
          gridPointer,
          begin,
          end,
-         userDataPointer,
+         userData,
          0 ); 
    }
 }
@@ -225,7 +225,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 2 >::
 processBoundaryEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, Device >& userDataPointer ) const
+                         UserData& userData ) const
 {
    /****
     * Boundary faces
@@ -236,7 +236,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 0, 0 ),
       gridPointer->getDimensions() - CoordinatesType( 0, 1, 1 ),
-      userDataPointer,
+      userData,
       2,
       CoordinatesType( 1, 0, 0 ),
       CoordinatesType( 0, 1, 1 ) );
@@ -245,7 +245,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 0, 0 ),
       gridPointer->getDimensions() - CoordinatesType( 1, 0, 1 ),
-      userDataPointer,
+      userData,
       1,
       CoordinatesType( 0, 1, 0 ),
       CoordinatesType( 1, 0, 1 ) );
@@ -254,7 +254,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 0, 0 ),
       gridPointer->getDimensions() - CoordinatesType( 1, 1, 0 ),
-      userDataPointer,
+      userData,
       0,
       CoordinatesType( 0, 0, 1 ),
       CoordinatesType( 1, 1, 0 ) );
@@ -269,7 +269,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 2 >::
 processInteriorEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, Device >& userDataPointer ) const
+                         UserData& userData ) const
 {
    /****
     * Interior faces
@@ -280,7 +280,7 @@ processInteriorEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 1, 0, 0 ),
       gridPointer->getDimensions() - CoordinatesType( 1, 1, 1 ),
-      userDataPointer,
+      userData,
       2,
       CoordinatesType( 1, 0, 0 ),
       CoordinatesType( 0, 1, 1 ) );
@@ -289,7 +289,7 @@ processInteriorEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 1, 0 ),
       gridPointer->getDimensions() - CoordinatesType( 1, 1, 1 ),
-      userDataPointer,
+      userData,
       1,
       CoordinatesType( 0, 1, 0 ),
       CoordinatesType( 1, 0, 1 ) );
@@ -298,7 +298,7 @@ processInteriorEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 0, 1 ),
       gridPointer->getDimensions() - CoordinatesType( 1, 1, 1 ),
-      userDataPointer,
+      userData,
       0,
       CoordinatesType( 0, 0, 1 ),
       CoordinatesType( 1, 1, 0 ) );
@@ -313,7 +313,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 2 >::
 processAllEntities( const GridPointer& gridPointer,
-                    SharedPointer< UserData, Device >& userDataPointer ) const
+                    UserData& userData ) const
 {
    /****
     * All faces
@@ -323,7 +323,7 @@ processAllEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 0, 0 ),
       gridPointer->getDimensions() - CoordinatesType( 0, 1, 1 ),
-      userDataPointer,
+      userData,
       2,
       CoordinatesType( 1, 0, 0 ),
       CoordinatesType( 0, 1, 1 ) );
@@ -332,7 +332,7 @@ processAllEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 0, 0 ),
       gridPointer->getDimensions() - CoordinatesType( 1, 0, 1 ),
-      userDataPointer,
+      userData,
       1,
       CoordinatesType( 0, 1, 0 ),
       CoordinatesType( 1, 0, 1 ) );
@@ -341,7 +341,7 @@ processAllEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 0, 0 ),
       gridPointer->getDimensions() - CoordinatesType( 1, 1, 0 ),
-      userDataPointer,
+      userData,
       0,
       CoordinatesType( 0, 0, 1 ),
       CoordinatesType( 1, 1, 0 ) );
@@ -359,7 +359,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 1 >::
 processBoundaryEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, Device >& userDataPointer ) const
+                         UserData& userData ) const
 {
    /****
     * Boundary edges
@@ -370,7 +370,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 0, 0 ),
       gridPointer->getDimensions() - CoordinatesType( 1, 0, 0 ),
-      userDataPointer,
+      userData,
       2,
       CoordinatesType( 0, 1, 1 ),
       CoordinatesType( 1, 0, 0 ) );
@@ -379,7 +379,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 0, 0 ),
       gridPointer->getDimensions() - CoordinatesType( 0, 1, 0 ),
-      userDataPointer,
+      userData,
       1,
       CoordinatesType( 1, 0, 1 ),
       CoordinatesType( 0, 1, 0 ) );
@@ -388,7 +388,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 0, 0 ),
       gridPointer->getDimensions() - CoordinatesType( 0, 0, 1 ),
-      userDataPointer,
+      userData,
       0,
       CoordinatesType( 1, 1, 0 ),
       CoordinatesType( 0, 0, 1 ) );
@@ -403,7 +403,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 1 >::
 processInteriorEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, Device >& userDataPointer ) const
+                         UserData& userData ) const
 {
    /****
     * Interior edges
@@ -414,7 +414,7 @@ processInteriorEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 1, 1 ),
       gridPointer->getDimensions() - CoordinatesType( 0, 1, 1 ),
-      userDataPointer,
+      userData,
       2,
       CoordinatesType( 0, 1, 1 ),
       CoordinatesType( 1, 0, 0 ) );
@@ -423,7 +423,7 @@ processInteriorEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 1, 0, 1 ),
       gridPointer->getDimensions() - CoordinatesType( 1, 0, 1 ),
-      userDataPointer,
+      userData,
       1,
       CoordinatesType( 1, 0, 1 ),
       CoordinatesType( 0, 1, 0 ) );
@@ -432,7 +432,7 @@ processInteriorEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 1, 1, 0 ),
       gridPointer->getDimensions() - CoordinatesType( 1, 1, 0 ),
-      userDataPointer,
+      userData,
       0,
       CoordinatesType( 1, 1, 0 ),
       CoordinatesType( 0, 0, 1 ) );
@@ -447,7 +447,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 1 >::
 processAllEntities( const GridPointer& gridPointer,
-                    SharedPointer< UserData, Device >& userDataPointer ) const
+                    UserData& userData ) const
 {
    /****
     * All edges
@@ -457,7 +457,7 @@ processAllEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 0, 0 ),
       gridPointer->getDimensions() - CoordinatesType( 1, 0, 0 ),
-      userDataPointer,
+      userData,
       2,
       CoordinatesType( 0, 1, 1 ),      
       CoordinatesType( 1, 0, 0 ) );
@@ -466,7 +466,7 @@ processAllEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 0, 0 ),
       gridPointer->getDimensions() - CoordinatesType( 0, 1, 0 ),
-      userDataPointer,
+      userData,
       1,
       CoordinatesType( 1, 0, 1 ),      
       CoordinatesType( 0, 1, 0 ) );
@@ -475,7 +475,7 @@ processAllEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 0, 0 ),
       gridPointer->getDimensions() - CoordinatesType( 0, 0, 1 ),
-      userDataPointer,
+      userData,
       0,
       CoordinatesType( 1, 1, 0 ),      
       CoordinatesType( 0, 0, 1 ) );
@@ -493,7 +493,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 0 >::
 processBoundaryEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, Device >& userDataPointer ) const
+                         UserData& userData ) const
 {
    /****
     * Boundary vertices
@@ -504,7 +504,7 @@ processBoundaryEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 0, 0 ),
       gridPointer->getDimensions(),
-      userDataPointer,
+      userData,
       0 );
 }
 
@@ -517,7 +517,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 0 >::
 processInteriorEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, Device >& userDataPointer ) const
+                         UserData& userData ) const
 {
    /****
     * Interior vertices
@@ -528,7 +528,7 @@ processInteriorEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 1, 1, 1 ),
       gridPointer->getDimensions() - CoordinatesType( 1, 1, 1 ),
-      userDataPointer,
+      userData,
       0 );
 }
  
@@ -541,7 +541,7 @@ template< typename Real,
 void
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 0 >::
 processAllEntities( const GridPointer& gridPointer,
-                    SharedPointer< UserData, Device >& userDataPointer ) const
+                    UserData& userData ) const
 {
    /****
     * All vertices
@@ -552,7 +552,7 @@ processAllEntities( const GridPointer& gridPointer,
       gridPointer,
       CoordinatesType( 0, 0, 0 ),
       gridPointer->getDimensions(),
-      userDataPointer,
+      userData,
       0 );
 }
 
diff --git a/src/TNL/Meshes/MeshDetails/EntityStorageRebinder.h b/src/TNL/Meshes/MeshDetails/EntityStorageRebinder.h
index c5c6516cc8936c953a71c265bc024a323a41afb7..7dff9d43e7f4a0c621efd26f8582dfaab541df01 100644
--- a/src/TNL/Meshes/MeshDetails/EntityStorageRebinder.h
+++ b/src/TNL/Meshes/MeshDetails/EntityStorageRebinder.h
@@ -27,7 +27,7 @@
 
 #include <TNL/Meshes/DimensionTag.h>
 #include <TNL/Meshes/Mesh.h>
-#include <TNL/DevicePointer.h>
+#include <TNL/Pointers/DevicePointer.h>
 #include <TNL/ParallelFor.h>
 #include <TNL/StaticFor.h>
 
@@ -57,8 +57,8 @@ public:
          const IndexType entitiesCount = mesh.template getEntitiesCount< DimensionTag::value >();
          auto& superentitiesStorage = mesh.template getSuperentityStorageNetwork< DimensionTag::value, SuperdimensionTag::value >();
          using Multimap = typename std::remove_reference< decltype(superentitiesStorage) >::type;
-         DevicePointer< Mesh > meshPointer( mesh );
-         DevicePointer< Multimap > superentitiesStoragePointer( superentitiesStorage );
+         Pointers::DevicePointer< Mesh > meshPointer( mesh );
+         Pointers::DevicePointer< Multimap > superentitiesStoragePointer( superentitiesStorage );
 
          auto kernel = [] __cuda_callable__
             ( IndexType i,
@@ -97,8 +97,8 @@ public:
          const IndexType entitiesCount = mesh.template getEntitiesCount< SuperdimensionTag::value >();
          auto& subentitiesStorage = mesh.template getSubentityStorageNetwork< SuperdimensionTag::value, DimensionTag::value >();
          using Multimap = typename std::remove_reference< decltype(subentitiesStorage) >::type;
-         DevicePointer< Mesh > meshPointer( mesh );
-         DevicePointer< Multimap > subentitiesStoragePointer( subentitiesStorage );
+         Pointers::DevicePointer< Mesh > meshPointer( mesh );
+         Pointers::DevicePointer< Multimap > subentitiesStoragePointer( subentitiesStorage );
 
          auto kernel = [] __cuda_callable__
             ( IndexType i,
diff --git a/src/TNL/Meshes/MeshDetails/IndexPermutationApplier.h b/src/TNL/Meshes/MeshDetails/IndexPermutationApplier.h
index 2bf9a007c6e76b532718a11174f8525c9fec0b25..0be4f06bf1ff7d67a21f4e570f60cb64b818bb03 100644
--- a/src/TNL/Meshes/MeshDetails/IndexPermutationApplier.h
+++ b/src/TNL/Meshes/MeshDetails/IndexPermutationApplier.h
@@ -138,7 +138,7 @@ public:
       auto kernel1 = [] __cuda_callable__
          ( IndexType i,
            const Mesh* mesh,
-           typename StorageArrayType::ElementType* entitiesArray,
+           typename StorageArrayType::ValueType* entitiesArray,
            const IndexType* perm )
       {
          entitiesArray[ i ] = mesh->template getEntity< Dimension >( perm[ i ] );
@@ -148,14 +148,14 @@ public:
       auto kernel2 = [] __cuda_callable__
          ( IndexType i,
            Mesh* mesh,
-           const typename StorageArrayType::ElementType* entitiesArray )
+           const typename StorageArrayType::ValueType* entitiesArray )
       {
          auto& entity = mesh->template getEntity< Dimension >( i );
          entity = entitiesArray[ i ];
          entity.setIndex( i );
       };
 
-      DevicePointer< Mesh > meshPointer( mesh );
+      Pointers::DevicePointer< Mesh > meshPointer( mesh );
       ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount,
                                        kernel1,
                                        &meshPointer.template getData< DeviceType >(),
diff --git a/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Initializer.h b/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Initializer.h
index a475ddcb4396bc4fadd5a50388379a0f40f6068b..918f4e025529dfde0117fbe2886d413e8bebde22 100644
--- a/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Initializer.h
+++ b/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Initializer.h
@@ -12,7 +12,7 @@
 
 #include <TNL/StaticFor.h>
 #include <TNL/ParallelFor.h>
-#include <TNL/DevicePointer.h>
+#include <TNL/Pointers/DevicePointer.h>
 #include <TNL/Meshes/DimensionTag.h>
 #include <TNL/Meshes/MeshDetails/traits/MeshEntityTraits.h>
 
@@ -141,7 +141,7 @@ public:
          };
 
          const GlobalIndexType facesCount = mesh.template getEntitiesCount< Mesh::getMeshDimension() - 1 >();
-         DevicePointer< Mesh > meshPointer( mesh );
+         Pointers::DevicePointer< Mesh > meshPointer( mesh );
          ParallelFor< DeviceType >::exec( (GlobalIndexType) 0, facesCount,
                                           kernel,
                                           &meshPointer.template modifyData< DeviceType >() );
diff --git a/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Layer.h b/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Layer.h
index 06401f1a351bc087922799f31472337cfac0d988..f743fd25e435ef32b8aa50a550a229772d08d89e 100644
--- a/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Layer.h
+++ b/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Layer.h
@@ -97,7 +97,7 @@ public:
    void updateBoundaryIndices( DimensionTag )
    {
       // Array does not have sum(), Vector of bools does not fit due to arithmetics
-      Containers::Vector< typename BoundaryTagsArray::ElementType, typename BoundaryTagsArray::DeviceType, typename BoundaryTagsArray::IndexType > _boundaryTagsVector;
+      Containers::Vector< typename BoundaryTagsArray::ValueType, typename BoundaryTagsArray::DeviceType, typename BoundaryTagsArray::IndexType > _boundaryTagsVector;
       _boundaryTagsVector.bind( boundaryTags.getData(), boundaryTags.getSize() );
       const GlobalIndexType boundaryEntities = _boundaryTagsVector.template sum< GlobalIndexType >();
       boundaryIndices.setSize( boundaryEntities );
diff --git a/src/TNL/Meshes/MeshDetails/Traverser_impl.h b/src/TNL/Meshes/MeshDetails/Traverser_impl.h
index 12b24868b0f6941b5844dfdeb9de919be5f514a0..5dedf58fd2ceea521e51ece53e42d0efd65caec1 100644
--- a/src/TNL/Meshes/MeshDetails/Traverser_impl.h
+++ b/src/TNL/Meshes/MeshDetails/Traverser_impl.h
@@ -15,7 +15,7 @@
 #include <TNL/Exceptions/CudaSupportMissing.h>
 
 namespace TNL {
-namespace Meshes {   
+namespace Meshes {
 
 template< typename Mesh,
           typename MeshEntity,
@@ -25,7 +25,7 @@ template< typename Mesh,
 void
 Traverser< Mesh, MeshEntity, EntitiesDimension >::
 processBoundaryEntities( const MeshPointer& meshPointer,
-                         SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                         UserData& userData ) const
 {
    auto entitiesCount = meshPointer->template getBoundaryEntitiesCount< EntitiesDimension >();
 #ifdef HAVE_OPENMP
@@ -35,7 +35,7 @@ processBoundaryEntities( const MeshPointer& meshPointer,
       const auto entityIndex = meshPointer->template getBoundaryEntityIndex< EntitiesDimension >( i );
       auto& entity = meshPointer->template getEntity< EntitiesDimension >( entityIndex );
       // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex
-      EntitiesProcessor::processEntity( *meshPointer, *userDataPointer, entity );
+      EntitiesProcessor::processEntity( *meshPointer, userData, entity );
    }
 }
 
@@ -47,7 +47,7 @@ template< typename Mesh,
 void
 Traverser< Mesh, MeshEntity, EntitiesDimension >::
 processInteriorEntities( const MeshPointer& meshPointer,
-                         SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                         UserData& userData ) const
 {
    auto entitiesCount = meshPointer->template getInteriorEntitiesCount< EntitiesDimension >();
 #ifdef HAVE_OPENMP
@@ -57,7 +57,7 @@ processInteriorEntities( const MeshPointer& meshPointer,
       const auto entityIndex = meshPointer->template getInteriorEntityIndex< EntitiesDimension >( i );
       auto& entity = meshPointer->template getEntity< EntitiesDimension >( entityIndex );
       // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex
-      EntitiesProcessor::processEntity( *meshPointer, *userDataPointer, entity );
+      EntitiesProcessor::processEntity( *meshPointer, userData, entity );
    }
 }
 
@@ -69,7 +69,7 @@ template< typename Mesh,
 void
 Traverser< Mesh, MeshEntity, EntitiesDimension >::
 processAllEntities( const MeshPointer& meshPointer,
-                    SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                    UserData& userData ) const
 {
    auto entitiesCount = meshPointer->template getEntitiesCount< EntitiesDimension >();
 #ifdef HAVE_OPENMP
@@ -78,7 +78,7 @@ processAllEntities( const MeshPointer& meshPointer,
    for( decltype(entitiesCount) entityIndex = 0; entityIndex < entitiesCount; entityIndex++ ) {
       auto& entity = meshPointer->template getEntity< EntitiesDimension >( entityIndex );
       // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex
-      EntitiesProcessor::processEntity( *meshPointer, *userDataPointer, entity );
+      EntitiesProcessor::processEntity( *meshPointer, userData, entity );
    }
 }
 
@@ -90,7 +90,7 @@ template< int EntitiesDimension,
           typename UserData >
 __global__ void
 MeshTraverserBoundaryEntitiesKernel( const Mesh* mesh,
-                                     UserData* userData,
+                                     UserData userData,
                                      typename Mesh::GlobalIndexType entitiesCount )
 {
    for( typename Mesh::GlobalIndexType i = blockIdx.x * blockDim.x + threadIdx.x;
@@ -100,7 +100,7 @@ MeshTraverserBoundaryEntitiesKernel( const Mesh* mesh,
       const auto entityIndex = mesh->template getBoundaryEntityIndex< EntitiesDimension >( i );
       auto& entity = mesh->template getEntity< EntitiesDimension >( entityIndex );
       // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex
-      EntitiesProcessor::processEntity( *mesh, *userData, entity );
+      EntitiesProcessor::processEntity( *mesh, userData, entity );
    }
 }
 
@@ -110,7 +110,7 @@ template< int EntitiesDimension,
           typename UserData >
 __global__ void
 MeshTraverserInteriorEntitiesKernel( const Mesh* mesh,
-                                     UserData* userData,
+                                     UserData userData,
                                      typename Mesh::GlobalIndexType entitiesCount )
 {
    for( typename Mesh::GlobalIndexType i = blockIdx.x * blockDim.x + threadIdx.x;
@@ -120,7 +120,7 @@ MeshTraverserInteriorEntitiesKernel( const Mesh* mesh,
       const auto entityIndex = mesh->template getInteriorEntityIndex< EntitiesDimension >( i );
       auto& entity = mesh->template getEntity< EntitiesDimension >( entityIndex );
       // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex
-      EntitiesProcessor::processEntity( *mesh, *userData, entity );
+      EntitiesProcessor::processEntity( *mesh, userData, entity );
    }
 }
 
@@ -130,7 +130,7 @@ template< int EntitiesDimension,
           typename UserData >
 __global__ void
 MeshTraverserAllEntitiesKernel( const Mesh* mesh,
-                                UserData* userData,
+                                UserData userData,
                                 typename Mesh::GlobalIndexType entitiesCount )
 {
    for( typename Mesh::GlobalIndexType entityIndex = blockIdx.x * blockDim.x + threadIdx.x;
@@ -139,7 +139,7 @@ MeshTraverserAllEntitiesKernel( const Mesh* mesh,
    {
       auto& entity = mesh->template getEntity< EntitiesDimension >( entityIndex );
       // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex
-      EntitiesProcessor::processEntity( *mesh, *userData, entity );
+      EntitiesProcessor::processEntity( *mesh, userData, entity );
    }
 }
 #endif
@@ -152,7 +152,7 @@ template< typename MeshConfig,
 void
 Traverser< Mesh< MeshConfig, Devices::Cuda >, MeshEntity, EntitiesDimension >::
 processBoundaryEntities( const MeshPointer& meshPointer,
-                         SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                         UserData& userData ) const
 {
 #ifdef HAVE_CUDA
    auto entitiesCount = meshPointer->template getBoundaryEntitiesCount< EntitiesDimension >();
@@ -166,7 +166,7 @@ processBoundaryEntities( const MeshPointer& meshPointer,
    MeshTraverserBoundaryEntitiesKernel< EntitiesDimension, EntitiesProcessor >
       <<< gridSize, blockSize >>>
       ( &meshPointer.template getData< Devices::Cuda >(),
-        &userDataPointer.template modifyData< Devices::Cuda >(),
+        userData,
         entitiesCount );
    cudaDeviceSynchronize();
    TNL_CHECK_CUDA_DEVICE;
@@ -183,7 +183,7 @@ template< typename MeshConfig,
 void
 Traverser< Mesh< MeshConfig, Devices::Cuda >, MeshEntity, EntitiesDimension >::
 processInteriorEntities( const MeshPointer& meshPointer,
-                         SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                         UserData& userData ) const
 {
 #ifdef HAVE_CUDA
    auto entitiesCount = meshPointer->template getInteriorEntitiesCount< EntitiesDimension >();
@@ -197,7 +197,7 @@ processInteriorEntities( const MeshPointer& meshPointer,
    MeshTraverserInteriorEntitiesKernel< EntitiesDimension, EntitiesProcessor >
       <<< gridSize, blockSize >>>
       ( &meshPointer.template getData< Devices::Cuda >(),
-        &userDataPointer.template modifyData< Devices::Cuda >(),
+        userData,
         entitiesCount );
    cudaDeviceSynchronize();
    TNL_CHECK_CUDA_DEVICE;
@@ -214,7 +214,7 @@ template< typename MeshConfig,
 void
 Traverser< Mesh< MeshConfig, Devices::Cuda >, MeshEntity, EntitiesDimension >::
 processAllEntities( const MeshPointer& meshPointer,
-                    SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                    UserData& userData ) const
 {
 #ifdef HAVE_CUDA
    auto entitiesCount = meshPointer->template getEntitiesCount< EntitiesDimension >();
@@ -228,7 +228,7 @@ processAllEntities( const MeshPointer& meshPointer,
    MeshTraverserAllEntitiesKernel< EntitiesDimension, EntitiesProcessor >
       <<< gridSize, blockSize >>>
       ( &meshPointer.template getData< Devices::Cuda >(),
-        &userDataPointer.template modifyData< Devices::Cuda >(),
+        userData,
         entitiesCount );
    cudaDeviceSynchronize();
    TNL_CHECK_CUDA_DEVICE;
diff --git a/src/TNL/Meshes/Traverser.h b/src/TNL/Meshes/Traverser.h
index ce0e0bf99d818b696c94ed8791dad5717dcb3cde..017084ae8d39f03927b5a038a62776abe7a1f588 100644
--- a/src/TNL/Meshes/Traverser.h
+++ b/src/TNL/Meshes/Traverser.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 #include <TNL/Meshes/Mesh.h>
 
 namespace TNL {
@@ -23,23 +23,23 @@ class Traverser
 {
    public:
       using MeshType = Mesh;
-      using MeshPointer = SharedPointer< MeshType >;
+      using MeshPointer = Pointers::SharedPointer<  MeshType >;
       using DeviceType = typename MeshType::DeviceType;
 
       template< typename UserData,
                 typename EntitiesProcessor >
       void processBoundaryEntities( const MeshPointer& meshPointer,
-                                    SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                                    UserData& userData ) const;
 
       template< typename UserData,
                 typename EntitiesProcessor >
       void processInteriorEntities( const MeshPointer& meshPointer,
-                                    SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                                    UserData& userData ) const;
 
       template< typename UserData,
                 typename EntitiesProcessor >
       void processAllEntities( const MeshPointer& meshPointer,
-                               SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                               UserData& userData ) const;
 };
 
 template< typename MeshConfig,
@@ -49,23 +49,23 @@ class Traverser< Mesh< MeshConfig, Devices::Cuda >, MeshEntity, EntitiesDimensio
 {
    public:
       using MeshType = Mesh< MeshConfig, Devices::Cuda >;
-      using MeshPointer = SharedPointer< MeshType >;
+      using MeshPointer = Pointers::SharedPointer<  MeshType >;
       using DeviceType = typename MeshType::DeviceType;
 
       template< typename UserData,
                 typename EntitiesProcessor >
       void processBoundaryEntities( const MeshPointer& meshPointer,
-                                    SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                                       UserData& userData ) const;
 
       template< typename UserData,
                 typename EntitiesProcessor >
       void processInteriorEntities( const MeshPointer& meshPointer,
-                                    SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                                    UserData& userData ) const;
 
       template< typename UserData,
                 typename EntitiesProcessor >
       void processAllEntities( const MeshPointer& meshPointer,
-                               SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                               UserData& userData ) const;
 };
 
 } // namespace Meshes
diff --git a/src/TNL/Meshes/TypeResolver/TypeResolver_impl.h b/src/TNL/Meshes/TypeResolver/TypeResolver_impl.h
index ed908244a965a5554664d85fbcdf51f412be741e..d5d85e75d84861999476c5d02a7d6ad723cd3954 100644
--- a/src/TNL/Meshes/TypeResolver/TypeResolver_impl.h
+++ b/src/TNL/Meshes/TypeResolver/TypeResolver_impl.h
@@ -155,6 +155,25 @@ loadMesh( const String& fileName,
    return true;
 }
 
+template< typename Problem,
+          typename MeshConfig,
+          typename Device >
+bool
+decomposeMesh( const Config::ParameterContainer& parameters,
+               const String& prefix,
+               Mesh< MeshConfig, Device >& mesh,
+               DistributedMeshes::DistributedMesh< Mesh< MeshConfig, Device > >& distributedMesh,
+               Problem& problem )
+{
+   using CommunicatorType = typename Problem::CommunicatorType;
+   if( CommunicatorType::isDistributed() )
+   {
+       std::cerr << "Distributed Mesh is not supported yet, only Distributed Grid is supported.";
+       return false;
+   }
+   return true;
+}
+
 template< typename CommunicatorType,
           typename MeshConfig >
 bool
@@ -175,24 +194,6 @@ loadMesh( const String& fileName,
    return true;
 }
 
-template< typename CommunicatorType,
-          typename MeshConfig,
-          typename Problem >
-bool
-decomposeMesh( const Config::ParameterContainer& parameters,
-               const String& prefix,
-               Mesh< MeshConfig, Devices::Cuda >& mesh,
-               DistributedMeshes::DistributedMesh< Mesh< MeshConfig, Devices::Cuda > >& distributedMesh,
-               Problem& problem )
-{
-   if( CommunicatorType::isDistributed() )
-   {
-       std::cerr << "Distributed Mesh is not supported yet, only Distributed Grid is supported.";
-       return false;
-   }
-   return true;
-}
-
 // Specializations for grids
 template< typename CommunicatorType,
           int Dimension,
@@ -253,7 +254,7 @@ decomposeMesh( const Config::ParameterContainer& parameters,
    using DistributedGridType = DistributedMeshes::DistributedMesh< GridType >;
    using SubdomainOverlapsType = typename DistributedGridType::SubdomainOverlapsType;
    using CommunicatorType = typename Problem::CommunicatorType;
-   
+
    if( CommunicatorType::isDistributed() )
    {
       SubdomainOverlapsType lower, upper;
@@ -266,6 +267,5 @@ decomposeMesh( const Config::ParameterContainer& parameters,
       return true;
 }
 
-
 } // namespace Meshes
 } // namespace TNL
diff --git a/src/TNL/Operators/Advection/LaxFridrichs.h b/src/TNL/Operators/Advection/LaxFridrichs.h
index 5bd51aa72efcaf9b9203f3f54894c9479282ce5f..d1fbd399e52737404063f6c016b19ac3743c8587 100644
--- a/src/TNL/Operators/Advection/LaxFridrichs.h
+++ b/src/TNL/Operators/Advection/LaxFridrichs.h
@@ -4,7 +4,7 @@
 #include <TNL/Containers/Vector.h>
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Functions/VectorField.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 
 namespace TNL {
    namespace Operators {
@@ -30,7 +30,7 @@ class LaxFridrichs< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index,
    public:
       
       typedef Meshes::Grid< 1, MeshReal, Device, MeshIndex > MeshType;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer<  MeshType > MeshPointer;
       static const int Dimension = MeshType::getMeshDimension();
       typedef typename MeshType::CoordinatesType CoordinatesType;
       typedef Real RealType;
@@ -39,7 +39,7 @@ class LaxFridrichs< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index,
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       typedef VelocityFunction VelocityFunctionType;
       typedef Functions::VectorField< Dimension, VelocityFunctionType > VelocityFieldType;
-      typedef SharedPointer< VelocityFieldType, DeviceType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer<  VelocityFieldType, DeviceType > VelocityFieldPointer;
       
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
@@ -122,7 +122,7 @@ class LaxFridrichs< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index,
    public:
       
       typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer<  MeshType > MeshPointer;
       static const int Dimension = MeshType::getMeshDimension();
       typedef typename MeshType::CoordinatesType CoordinatesType;
       typedef Real RealType;
@@ -131,7 +131,7 @@ class LaxFridrichs< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index,
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       typedef VelocityFunction VelocityFunctionType;
       typedef Functions::VectorField< Dimension, VelocityFunctionType > VelocityFieldType;
-      typedef SharedPointer< VelocityFieldType, DeviceType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer<  VelocityFieldType, DeviceType > VelocityFieldPointer;
       
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
@@ -220,7 +220,7 @@ class LaxFridrichs< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index,
    public:
       
       typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer<  MeshType > MeshPointer;
       static const int Dimension = MeshType::getMeshDimension();
       typedef typename MeshType::CoordinatesType CoordinatesType;
       typedef Real RealType;
@@ -229,7 +229,7 @@ class LaxFridrichs< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index,
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       typedef VelocityFunction VelocityFunctionType;
       typedef Functions::VectorField< Dimension, VelocityFunctionType > VelocityFieldType;
-      typedef SharedPointer< VelocityFieldType, DeviceType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer<  VelocityFieldType, DeviceType > VelocityFieldPointer;
       
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
diff --git a/src/TNL/Operators/Advection/Upwind.h b/src/TNL/Operators/Advection/Upwind.h
index a5e19ac10933f92f3d7d3be52985f28c587636ec..e41768e571082b9e7be7b547d915b4bf1e91340f 100644
--- a/src/TNL/Operators/Advection/Upwind.h
+++ b/src/TNL/Operators/Advection/Upwind.h
@@ -4,7 +4,7 @@
 #include <TNL/Containers/Vector.h>
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Functions/VectorField.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 
 namespace TNL {
    namespace Operators {
@@ -30,7 +30,7 @@ class Upwind< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index, Veloc
    public:
       
       typedef Meshes::Grid< 1, MeshReal, Device, MeshIndex > MeshType;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef typename MeshType::CoordinatesType CoordinatesType;
       typedef Real RealType;
@@ -39,7 +39,7 @@ class Upwind< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index, Veloc
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       typedef VelocityFunction VelocityFunctionType;
       typedef Functions::VectorField< Dimensions, VelocityFunctionType > VelocityFieldType;
-      typedef SharedPointer< VelocityFieldType, DeviceType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType, DeviceType > VelocityFieldPointer;
       
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
@@ -123,7 +123,7 @@ class Upwind< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index, Veloc
    public:
       
       typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef typename MeshType::CoordinatesType CoordinatesType;
       typedef Real RealType;
@@ -132,7 +132,7 @@ class Upwind< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index, Veloc
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       typedef VelocityFunction VelocityFunctionType;
       typedef Functions::VectorField< Dimensions, VelocityFunctionType > VelocityFieldType;
-      typedef SharedPointer< VelocityFieldType, DeviceType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType, DeviceType > VelocityFieldPointer;
       
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
@@ -225,7 +225,7 @@ class Upwind< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index, Veloc
    public:
       
       typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer< MeshType > MeshPointer;
       static const int Dimensions = MeshType::getMeshDimension();
       typedef typename MeshType::CoordinatesType CoordinatesType;
       typedef Real RealType;
@@ -234,7 +234,7 @@ class Upwind< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index, Veloc
       typedef Functions::MeshFunction< MeshType > MeshFunctionType;
       typedef VelocityFunction VelocityFunctionType;
       typedef Functions::VectorField< Dimensions, VelocityFunctionType > VelocityFieldType;
-      typedef SharedPointer< VelocityFieldType, DeviceType > VelocityFieldPointer;
+      typedef Pointers::SharedPointer< VelocityFieldType, DeviceType > VelocityFieldPointer;
       
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
diff --git a/src/TNL/Operators/DirichletBoundaryConditions.h b/src/TNL/Operators/DirichletBoundaryConditions.h
index 759db071cf8af223abedc6899e1787f45ab76ee3..31389407261bf598bee4de13fe58639e3ba33fda 100644
--- a/src/TNL/Operators/DirichletBoundaryConditions.h
+++ b/src/TNL/Operators/DirichletBoundaryConditions.h
@@ -39,7 +39,7 @@ class DirichletBoundaryConditions
       typedef typename MeshType::DeviceType DeviceType;
       typedef Index IndexType;
       
-      typedef SharedPointer< Mesh > MeshPointer;
+      typedef Pointers::SharedPointer<  Mesh > MeshPointer;
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef typename MeshType::PointType PointType;
 
diff --git a/src/TNL/Operators/OperatorComposition.h b/src/TNL/Operators/OperatorComposition.h
index e4730c2429c9f98b6f331bd7564fc78c226708d8..04ef38951ce02f9d645610656a487f4ae2bac1d5 100644
--- a/src/TNL/Operators/OperatorComposition.h
+++ b/src/TNL/Operators/OperatorComposition.h
@@ -50,7 +50,7 @@ class OperatorComposition
       typedef typename InnerOperator::IndexType IndexType;
       typedef ExactOperatorComposition< typename OuterOperator::ExactOperatorType,
                                            typename InnerOperator::ExactOperatorType > ExactOperatorType;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer<  MeshType > MeshPointer;
       
       static constexpr int getPreimageEntitiesDimension() { return InnerOperator::getImageEntitiesDimension(); };
       static constexpr int getImageEntitiesDimension() { return OuterOperator::getImageEntitiesDimension(); };
@@ -131,7 +131,7 @@ class OperatorComposition< OuterOperator, InnerOperator, void >
       typedef Functions::OperatorFunction< InnerOperator, ImageFunctionType > OuterOperatorFunction;
       typedef typename InnerOperator::RealType RealType;
       typedef typename InnerOperator::IndexType IndexType;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer<  MeshType > MeshPointer;
       
       OperatorComposition( const OuterOperator& outerOperator,
                               InnerOperator& innerOperator,
diff --git a/src/TNL/Operators/diffusion/OneSidedMeanCurvature.h b/src/TNL/Operators/diffusion/OneSidedMeanCurvature.h
index 91f9848cbc9bc63faca5538913f1ec4c9a1701b0..6e99d2f053bd40774356518ceac3e4428738eb0c 100644
--- a/src/TNL/Operators/diffusion/OneSidedMeanCurvature.h
+++ b/src/TNL/Operators/diffusion/OneSidedMeanCurvature.h
@@ -38,7 +38,7 @@ class OneSidedMeanCurvature
    public:
  
       typedef Mesh MeshType;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer<  MeshType > MeshPointer;
       typedef Real RealType;
       typedef Index IndexType;
       typedef FDMGradientNorm< MeshType, ForwardFiniteDifference, RealType, IndexType > GradientNorm;
diff --git a/src/TNL/Operators/geometric/CoFVMGradientNorm.h b/src/TNL/Operators/geometric/CoFVMGradientNorm.h
index 0380ecc223f8b1556cf379bfb2dd64a1197ae576..2af779a11fdb2664bdfb636425dc8218e09199c7 100644
--- a/src/TNL/Operators/geometric/CoFVMGradientNorm.h
+++ b/src/TNL/Operators/geometric/CoFVMGradientNorm.h
@@ -50,7 +50,7 @@ class CoFVMGradientNorm< Meshes::Grid< MeshDimension, MeshReal, Device, MeshInde
       typedef MeshEntitiesInterpolants< MeshType, MeshDimension - 1, MeshDimension > OuterOperator;
       typedef OperatorComposition< OuterOperator, InnerOperator > BaseType;
       typedef ExactGradientNorm< MeshDimension, RealType > ExactOperatorType;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer<  MeshType > MeshPointer;
          
       CoFVMGradientNorm( const OuterOperator& outerOperator,
                             InnerOperator& innerOperator,
diff --git a/src/TNL/Pointers/CMakeLists.txt b/src/TNL/Pointers/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c69336cd29044deb7909e0dcc12291ec6d1f56f
--- /dev/null
+++ b/src/TNL/Pointers/CMakeLists.txt
@@ -0,0 +1,26 @@
+SET( headers DevicePointer.h
+             SharedPointer.h
+             SharedPointerCuda.h
+             SharedPointerHost.h
+             SharedPointerMic.h
+             SmartPointer.h
+             SmartPointersRegister.h
+             UniquePointer.h
+   )
+   
+SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/TNL/Pointers )
+set( common_SOURCES
+     ${CURRENT_DIR}/SmartPointersRegister.cpp
+   )
+
+SET( tnl_pointers_SOURCES 
+     ${common_SOURCES}
+     PARENT_SCOPE )
+
+if( BUILD_CUDA )
+SET( tnl_pointers_CUDA__SOURCES
+     ${common_SOURCES} 
+     PARENT_SCOPE )
+endif() 
+
+INSTALL( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Pointers )
diff --git a/src/TNL/DevicePointer.h b/src/TNL/Pointers/DevicePointer.h
similarity index 98%
rename from src/TNL/DevicePointer.h
rename to src/TNL/Pointers/DevicePointer.h
index 956973529cbc39ad4e9bc42366ae118298b03ca2..194e68967ccc3368983ce32aeca22f3af1f4e2be 100644
--- a/src/TNL/DevicePointer.h
+++ b/src/TNL/Pointers/DevicePointer.h
@@ -14,14 +14,13 @@
 
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/SmartPointer.h>
-
-#include <cstring>
-
-#include "Devices/MIC.h"
+#include <TNL/Devices/MIC.h>
+#include <TNL/Pointers/SmartPointer.h>
 
+#include <cstring>  // std::memcpy, std::memcmp
 
 namespace TNL {
+namespace Pointers {
 
 /***
  * The DevicePointer is like SharedPointer, except it takes an existing host
@@ -705,7 +704,7 @@ class DevicePointer< Object, Devices::MIC > : public SmartPointer
          if( ! this->mic_pointer )
             return false;
          Devices::MIC::CopyToMIC((void*)this->mic_pointer,(void*)this->pointer,sizeof(ObjectType));
-                 
+
          // set last-sync state
          this->set_last_sync_state();
          Devices::MIC::insertSmartPointer( this );
@@ -754,15 +753,16 @@ class DevicePointer< Object, Devices::MIC > : public SmartPointer
 };
 #endif
 
+} // namespace Pointers
 
-#if  (!defined(NDEBUG)) && (!defined(HAVE_MIC)) 
+#if (!defined(NDEBUG)) && (!defined(HAVE_MIC))
 namespace Assert {
 
 template< typename Object, typename Device >
-struct Formatter< DevicePointer< Object, Device > >
+struct Formatter< Pointers::DevicePointer< Object, Device > >
 {
    static std::string
-   printToString( const DevicePointer< Object, Device >& value )
+   printToString( const Pointers::DevicePointer< Object, Device >& value )
    {
       ::std::stringstream ss;
       ss << "(DevicePointer< " << Object::getType() << ", " << Device::getDeviceType()
diff --git a/src/TNL/Pointers/SharedPointer.h b/src/TNL/Pointers/SharedPointer.h
new file mode 100644
index 0000000000000000000000000000000000000000..a82685f2a12e6a9f016682cbc4099941d65b4942
--- /dev/null
+++ b/src/TNL/Pointers/SharedPointer.h
@@ -0,0 +1,74 @@
+/***************************************************************************
+                          SharedPointer.h  -  description
+                             -------------------
+    begin                : May 6, 2016
+    copyright            : (C) 2016 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// Implemented by: Tomas Oberhuber, Jakub Klinkovsky
+
+#pragma once
+
+#include <cstring>
+#include <TNL/Assert.h>
+
+//#define TNL_DEBUG_SHARED_POINTERS
+
+#ifdef TNL_DEBUG_SHARED_POINTERS
+   #include <typeinfo>
+   #include <cxxabi.h>
+   #include <iostream>
+   #include <string>
+   #include <memory>
+   #include <cstdlib>
+
+   inline
+   std::string demangle(const char* mangled)
+   {
+      int status;
+      std::unique_ptr<char[], void (*)(void*)> result(
+         abi::__cxa_demangle(mangled, 0, 0, &status), std::free);
+      return result.get() ? std::string(result.get()) : "error occurred";
+   }
+#endif
+
+
+namespace TNL {
+namespace Pointers {
+
+template< typename Object,
+          typename Device = typename Object::DeviceType >
+class SharedPointer
+{
+   static_assert( ! std::is_same< Device, void >::value, "The device cannot be void. You need to specify the device explicitly in your code." );
+};
+
+} // namespace Pointers
+
+#if (!defined(NDEBUG)) && (!defined(HAVE_MIC))
+namespace Assert {
+
+template< typename Object, typename Device >
+struct Formatter< Pointers::SharedPointer< Object, Device > >
+{
+   static std::string
+   printToString( const Pointers::SharedPointer< Object, Device >& value )
+   {
+      ::std::stringstream ss;
+      ss << "(SharedPointer< " << Object::getType() << ", " << Device::getDeviceType()
+         << " > object at " << &value << ")";
+      return ss.str();
+   }
+};
+
+} // namespace Assert
+#endif
+
+} // namespace TNL
+
+#include <TNL/Pointers/SharedPointerHost.h>
+#include <TNL/Pointers/SharedPointerCuda.h>
+#include <TNL/Pointers/SharedPointerMic.h>
diff --git a/src/TNL/SharedPointer.h b/src/TNL/Pointers/SharedPointerCuda.h
similarity index 55%
rename from src/TNL/SharedPointer.h
rename to src/TNL/Pointers/SharedPointerCuda.h
index 2030e4898c196282b1310352405fc1a35218e69b..810d85e99125bea191cd112e88771b8ef2488322 100644
--- a/src/TNL/SharedPointer.h
+++ b/src/TNL/Pointers/SharedPointerCuda.h
@@ -1,8 +1,8 @@
 /***************************************************************************
-                          SharedPointer.h  -  description
+                          SharedPointerCuda.h  -  description
                              -------------------
-    begin                : May 6, 2016
-    copyright            : (C) 2016 by Tomas Oberhuber et al.
+    begin                : Aug 22, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
 
@@ -12,14 +12,14 @@
 
 #pragma once
 
-#include <TNL/Devices/Host.h>
-#include <TNL/Devices/Cuda.h>
-#include <TNL/Devices/MIC.h>
-#include <TNL/SmartPointer.h>
-
-#include <cstring>
+#include "SharedPointer.h"
 
+#include <TNL/Devices/Cuda.h>
+#include <TNL/Pointers/SmartPointer.h>
 
+#include <cstring>   // std::memcpy, std::memcmp
+#include <cstddef>   // std::nullptr_t
+#include <algorithm> // swap
 
 //#define TNL_DEBUG_SHARED_POINTERS
 
@@ -43,19 +43,13 @@
 
 
 namespace TNL {
+namespace Pointers {
 
-template< typename Object,
-          typename Device = typename Object::DeviceType >
-class SharedPointer
-{
-   static_assert( ! std::is_same< Device, void >::value, "The device cannot be void. You need to specify the device explicitly in your code." );
-};
+//#define HAVE_CUDA_UNIFIED_MEMORY
 
-/****
- * Specialization for Devices::Host
- */
+#ifdef HAVE_CUDA_UNIFIED_MEMORY
 template< typename Object >
-class SharedPointer< Object, Devices::Host > : public SmartPointer
+class SharedPointer< Object, Devices::Cuda > : public SmartPointer
 {
    private:
       // Convenient template alias for controlling the selection of copy- and
@@ -72,9 +66,13 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer
 
    public:
 
-      typedef Object ObjectType;
-      typedef Devices::Host DeviceType;
-      typedef SharedPointer< Object, Devices::Host > ThisType;
+      using ObjectType = Object;
+      using DeviceType = Devices::Cuda; 
+      using ThisType = SharedPointer<  Object, Devices::Cuda >;
+
+      SharedPointer( std::nullptr_t )
+      : pd( nullptr )
+      {}
 
       template< typename... Args >
       explicit  SharedPointer( Args... args )
@@ -96,7 +94,7 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer
       // conditional constructor for non-const -> const data
       template< typename Object_,
                 typename = typename Enabler< Object_ >::type >
-      SharedPointer( const SharedPointer< Object_, DeviceType >& pointer )
+      SharedPointer( const SharedPointer<  Object_, DeviceType >& pointer )
       : pd( (PointerData*) pointer.pd )
       {
          this->pd->counter += 1;
@@ -112,7 +110,7 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer
       // conditional constructor for non-const -> const data
       template< typename Object_,
                 typename = typename Enabler< Object_ >::type >
-      SharedPointer( SharedPointer< Object_, DeviceType >&& pointer )
+      SharedPointer( SharedPointer<  Object_, DeviceType >&& pointer )
       : pd( (PointerData*) pointer.pd )
       {
          pointer.pd = nullptr;
@@ -145,21 +143,25 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer
 
       const Object* operator->() const
       {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          return &this->pd->data;
       }
 
       Object* operator->()
       {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          return &this->pd->data;
       }
 
       const Object& operator *() const
       {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          return this->pd->data;
       }
 
       Object& operator *()
       {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          return this->pd->data;
       }
 
@@ -179,13 +181,15 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer
       __cuda_callable__
       const Object& getData() const
       {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          return this->pd->data;
-      }      
-      
+      }
+
       template< typename Device = Devices::Host >
       __cuda_callable__
       Object& modifyData()
       {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          return this->pd->data;
       }
 
@@ -194,18 +198,20 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer
       {
          this->free();
          this->pd = (PointerData*) ptr.pd;
-         this->pd->counter += 1;
+         if( this->pd != nullptr ) 
+            this->pd->counter += 1;
          return *this;
       }
 
       // conditional operator for non-const -> const data
       template< typename Object_,
                 typename = typename Enabler< Object_ >::type >
-      const ThisType& operator=( const SharedPointer< Object_, DeviceType >& ptr )
+      const ThisType& operator=( const SharedPointer<  Object_, DeviceType >& ptr )
       {
          this->free();
          this->pd = (PointerData*) ptr.pd;
-         this->pd->counter += 1;
+         if( this->pd != nullptr )
+            this->pd->counter += 1;
          return *this;
       }
 
@@ -221,7 +227,7 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer
       // conditional operator for non-const -> const data
       template< typename Object_,
                 typename = typename Enabler< Object_ >::type >
-      const ThisType& operator=( SharedPointer< Object_, DeviceType >&& ptr )
+      const ThisType& operator=( SharedPointer<  Object_, DeviceType >&& ptr )
       {
          this->free();
          this->pd = (PointerData*) ptr.pd;
@@ -233,12 +239,12 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer
       {
          return true;
       }
-      
+
       void clear()
       {
          this->free();
       }
-      
+
       void swap( ThisType& ptr2 )
       {
          std::swap( this->pd, ptr2.pd );
@@ -267,8 +273,14 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer
       template< typename... Args >
       bool allocate( Args... args )
       {
-         this->pd = new PointerData( args... );
-         return this->pd;
+#ifdef HAVE_CUDA
+         if( cudaMallocManaged( ( void** ) &this->pd, sizeof( PointerData ) != cudaSuccess ) )
+            return false;
+         new ( this->pd ) PointerData( args... );
+         return true;
+#else
+         return false;
+#endif
       }
 
       void free()
@@ -277,19 +289,19 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer
          {
             if( ! --this->pd->counter )
             {
-               delete this->pd;
+#ifdef HAVE_CUDA
+               cudaFree( this->pd );
+#endif
                this->pd = nullptr;
             }
          }
-
       }
 
       PointerData* pd;
 };
 
-/****
- * Specialization for CUDA
- */
+#else // HAVE_CUDA_UNIFIED_MEMORY
+
 template< typename Object >
 class SharedPointer< Object, Devices::Cuda > : public SmartPointer
 {
@@ -308,9 +320,14 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
 
    public:
 
-      typedef Object ObjectType;
-      typedef Devices::Cuda DeviceType;
-      typedef SharedPointer< Object, Devices::Cuda > ThisType;
+      using ObjectType = Object;
+      using DeviceType = Devices::Cuda; 
+      using ThisType = SharedPointer<  Object, Devices::Cuda >;
+
+      SharedPointer( std::nullptr_t )
+      : pd( nullptr ),
+        cuda_pointer( nullptr )
+      {}
 
       template< typename... Args >
       explicit  SharedPointer( Args... args )
@@ -331,7 +348,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
       // conditional constructor for non-const -> const data
       template< typename Object_,
                 typename = typename Enabler< Object_ >::type >
-      SharedPointer( const SharedPointer< Object_, DeviceType >& pointer )
+      SharedPointer( const SharedPointer<  Object_, DeviceType >& pointer )
       : pd( (PointerData*) pointer.pd ),
         cuda_pointer( pointer.cuda_pointer )
       {
@@ -350,7 +367,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
       // conditional constructor for non-const -> const data
       template< typename Object_,
                 typename = typename Enabler< Object_ >::type >
-      SharedPointer( SharedPointer< Object_, DeviceType >&& pointer )
+      SharedPointer( SharedPointer<  Object_, DeviceType >&& pointer )
       : pd( (PointerData*) pointer.pd ),
         cuda_pointer( pointer.cuda_pointer )
       {
@@ -389,22 +406,26 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
 
       const Object* operator->() const
       {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          return &this->pd->data;
       }
 
       Object* operator->()
       {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          this->pd->maybe_modified = true;
          return &this->pd->data;
       }
 
       const Object& operator *() const
       {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          return this->pd->data;
       }
 
       Object& operator *()
       {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          this->pd->maybe_modified = true;
          return this->pd->data;
       }
@@ -426,8 +447,8 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
       const Object& getData() const
       {
          static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, "Only Devices::Host or Devices::Cuda devices are accepted here." );
-         TNL_ASSERT( this->pd, );
-         TNL_ASSERT( this->cuda_pointer, );
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+         TNL_ASSERT_TRUE( this->cuda_pointer, "Attempt to dereference a null pointer" );
          if( std::is_same< Device, Devices::Host >::value )
             return this->pd->data;
          if( std::is_same< Device, Devices::Cuda >::value )
@@ -439,8 +460,8 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
       Object& modifyData()
       {
          static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, "Only Devices::Host or Devices::Cuda devices are accepted here." );
-         TNL_ASSERT( this->pd, );
-         TNL_ASSERT( this->cuda_pointer, );
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+         TNL_ASSERT_TRUE( this->cuda_pointer, "Attempt to dereference a null pointer" );
          if( std::is_same< Device, Devices::Host >::value )
          {
             this->pd->maybe_modified = true;
@@ -456,7 +477,8 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
          this->free();
          this->pd = (PointerData*) ptr.pd;
          this->cuda_pointer = ptr.cuda_pointer;
-         this->pd->counter += 1;
+         if( this->pd != nullptr )
+            this->pd->counter += 1;
 #ifdef TNL_DEBUG_SHARED_POINTERS
          std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
 #endif
@@ -466,12 +488,13 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
       // conditional operator for non-const -> const data
       template< typename Object_,
                 typename = typename Enabler< Object_ >::type >
-      const ThisType& operator=( const SharedPointer< Object_, DeviceType >& ptr )
+      const ThisType& operator=( const SharedPointer<  Object_, DeviceType >& ptr )
       {
          this->free();
          this->pd = (PointerData*) ptr.pd;
          this->cuda_pointer = ptr.cuda_pointer;
-         this->pd->counter += 1;
+         if( this->pd != nullptr )
+            this->pd->counter += 1;
 #ifdef TNL_DEBUG_SHARED_POINTERS
          std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
 #endif
@@ -495,7 +518,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
       // conditional operator for non-const -> const data
       template< typename Object_,
                 typename = typename Enabler< Object_ >::type >
-      const ThisType& operator=( SharedPointer< Object_, DeviceType >&& ptr )
+      const ThisType& operator=( SharedPointer<  Object_, DeviceType >&& ptr )
       {
          this->free();
          this->pd = (PointerData*) ptr.pd;
@@ -532,12 +555,12 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
          return false;
 #endif
       }
-      
+
       void clear()
       {
          this->free();
       }
-      
+
       void swap( ThisType& ptr2 )
       {
          std::swap( this->pd, ptr2.pd );
@@ -584,14 +607,14 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
 
       void set_last_sync_state()
       {
-         TNL_ASSERT( this->pd, );
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          std::memcpy( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( Object ) );
          this->pd->maybe_modified = false;
       }
 
       bool modified()
       {
-         TNL_ASSERT( this->pd, );
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          // optimization: skip bitwise comparison if we're sure that the data is the same
          if( ! this->pd->maybe_modified )
             return false;
@@ -624,362 +647,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
       // unable to dereference this-pd on the device
       Object* cuda_pointer;
 };
+#endif // HAVE_CUDA_UNIFIED_MEMORY
 
-#ifdef HAVE_MIC
-/****
- * Specialization for MIC
- */
-template< typename Object>
-class SharedPointer< Object, Devices::MIC > : public SmartPointer
-{
-   private:
-      // Convenient template alias for controlling the selection of copy- and
-      // move-constructors and assignment operators using SFINAE.
-      // The type Object_ is "enabled" iff Object_ and Object are not the same,
-      // but after removing const and volatile qualifiers they are the same.
-      template< typename Object_ >
-      using Enabler = std::enable_if< ! std::is_same< Object_, Object >::value &&
-                                      std::is_same< typename std::remove_cv< Object >::type, Object_ >::value >;
-
-      // friend class will be needed for templated assignment operators
-      template< typename Object_, typename Device_>
-      friend class SharedPointer;
-
-   public:
-
-      typedef Object ObjectType;
-      typedef Devices::MIC DeviceType;
-      typedef SharedPointer< Object, Devices::MIC> ThisType;
-
-      template< typename... Args >
-      explicit  SharedPointer( Args... args )
-      : pd( nullptr ),
-        mic_pointer( nullptr )
-      {
-            this->allocate( args... );
-      }
-
-      // this is needed only to avoid the default compiler-generated constructor
-      SharedPointer( const ThisType& pointer )
-      : pd( (PointerData*) pointer.pd ),
-        mic_pointer( pointer.mic_pointer )
-      {
-         this->pd->counter += 1;
-      }
-
-      // conditional constructor for non-const -> const data
-      template< typename Object_,
-                typename = typename Enabler< Object_ >::type >
-      SharedPointer( const SharedPointer< Object_, DeviceType >& pointer )
-      : pd( (PointerData*) pointer.pd ),
-        mic_pointer( pointer.mic_pointer )
-      {
-         this->pd->counter += 1;
-      }
-
-      // this is needed only to avoid the default compiler-generated constructor
-      SharedPointer( ThisType&& pointer )
-      : pd( (PointerData*) pointer.pd ),
-        mic_pointer( pointer.mic_pointer )
-      {
-         pointer.pd = nullptr;
-         pointer.mic_pointer = nullptr;
-      }
-
-      // conditional constructor for non-const -> const data
-      template< typename Object_,
-                typename = typename Enabler< Object_ >::type >
-      SharedPointer( SharedPointer< Object_, DeviceType >&& pointer )
-      : pd( (PointerData*) pointer.pd ),
-        mic_pointer( pointer.mic_pointer )
-      {
-         pointer.pd = nullptr;
-         pointer.mic_pointer = nullptr;
-      }
-
-      template< typename... Args >
-      bool recreate( Args... args )
-      {
-#ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Recreating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl;
-#endif
-         if( ! this->pd )
-            return this->allocate( args... );
-
-         if( this->pd->counter == 1 )
-         {
-            /****
-             * The object is not shared -> recreate it in-place, without reallocation
-             */
-            this->pd->data.~Object();
-            new ( &this->pd->data ) Object( args... );
-            Devices::MIC::CopyToMIC(this->mic_pointer,(void*) &this->pd->data,sizeof(Object));
-            this->set_last_sync_state();
-            return true;
-         }
-
-         // free will just decrement the counter
-         this->free();
-
-         return this->allocate( args... );
-      }
-
-      const Object* operator->() const
-      {
-         return &this->pd->data;
-      }
-
-      Object* operator->()
-      {
-         this->pd->maybe_modified = true;
-         return &this->pd->data;
-      }
-
-      const Object& operator *() const
-      {
-         return this->pd->data;
-      }
-
-      Object& operator *()
-      {
-         this->pd->maybe_modified = true;
-         return this->pd->data;
-      }
-
-      operator bool()
-      {
-         return this->pd;
-      }
-
-      template< typename Device = Devices::Host >
-      __cuda_callable__
-      const Object& getData() const
-      {
-         static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
-         TNL_ASSERT( this->pd, );
-         TNL_ASSERT( this->mic_pointer, );
-         if( std::is_same< Device, Devices::Host >::value )
-            return this->pd->data;
-         if( std::is_same< Device, Devices::MIC >::value )
-            return *( this->mic_pointer );
-
-      }
-
-      template< typename Device = Devices::Host >
-      __cuda_callable__
-      Object& modifyData()
-      {
-         static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
-         TNL_ASSERT( this->pd, );
-         TNL_ASSERT( this->mic_pointer, );
-         if( std::is_same< Device, Devices::Host >::value )
-         {
-            this->pd->maybe_modified = true;
-            return this->pd->data;
-         }
-         if( std::is_same< Device, Devices::MIC >::value )
-            return *( this->mic_pointer );
-
-      }
-
-      // this is needed only to avoid the default compiler-generated operator
-      const ThisType& operator=( const ThisType& ptr )
-      {
-         this->free();
-         this->pd = (PointerData*) ptr.pd;
-         this->mic_pointer = ptr.mic_pointer;
-         this->pd->counter += 1;
-#ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
-#endif
-         return *this;
-      }
-
-      // conditional operator for non-const -> const data
-      template< typename Object_,
-                typename = typename Enabler< Object_ >::type >
-      const ThisType& operator=( const SharedPointer< Object_, DeviceType >& ptr )
-      {
-         this->free();
-         this->pd = (PointerData*) ptr.pd;
-         this->mic_pointer = ptr.mic_pointer;
-         this->pd->counter += 1;
-#ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
-#endif
-         return *this;
-      }
-
-      // this is needed only to avoid the default compiler-generated operator
-      const ThisType& operator=( ThisType&& ptr )
-      {
-         this->free();
-         this->pd = (PointerData*) ptr.pd;
-         this->mic_pointer = ptr.mic_pointer;
-         ptr.pd = nullptr;
-         ptr.mic_pointer = nullptr;
-#ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
-#endif
-         return *this;
-      }
-
-      // conditional operator for non-const -> const data
-      template< typename Object_,
-                typename = typename Enabler< Object_ >::type >
-      const ThisType& operator=( SharedPointer< Object_, DeviceType >&& ptr )
-      {
-         this->free();
-         this->pd = (PointerData*) ptr.pd;
-         this->mic_pointer = ptr.mic_pointer;
-         ptr.pd = nullptr;
-         ptr.mic_pointer = nullptr;
-#ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
-#endif
-         return *this;
-      }
-
-      bool synchronize()
-      {
-         if( ! this->pd )
-            return true;
-
-         if( this->modified() )
-         {
-#ifdef TNL_DEBUG_SHARED_POINTERS
-            std::cerr << "Synchronizing shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(Object).name()) << std::endl;
-            std::cerr << "   ( " << sizeof( Object ) << " bytes, MIC adress " << this->mic_pointer << " )" << std::endl;
-#endif
-            TNL_ASSERT( this->mic_pointer, );
-            
-            Devices::MIC::CopyToMIC((void*)this->mic_pointer,(void*) &this->pd->data,sizeof(Object));    
-            this->set_last_sync_state();
-            return true;
-         }
-         return false; //??
-      }
-      
-      void clear()
-      {
-         this->free();
-      }
-
-      void swap( ThisType& ptr2 )
-      {
-         std::swap( this->pd, ptr2.pd );
-         std::swap( this->mic_pointer, ptr2.mic_pointer );
-      }
-
-      ~SharedPointer()
-      {
-         this->free();
-         Devices::MIC::removeSmartPointer( this );
-      }
-
-   protected:
-
-      struct PointerData
-      {
-         Object data;
-         uint8_t data_image[ sizeof(Object) ];
-         int counter;
-         bool maybe_modified;
-
-         template< typename... Args >
-         explicit PointerData( Args... args )
-         : data( args... ),
-           counter( 1 ),
-           maybe_modified( false )
-         {}
-      };
-
-      template< typename... Args >
-      bool allocate( Args... args )
-      {
-         this->pd = new PointerData( args... );
-         if( ! this->pd )
-            return false;
-         
-         mic_pointer=(Object*)Devices::MIC::AllocMIC(sizeof(Object));
-         Devices::MIC::CopyToMIC((void*)this->mic_pointer,(void*) &this->pd->data,sizeof(Object));
-         
-         if( ! this->mic_pointer )
-            return false;
-         // set last-sync state
-         this->set_last_sync_state();
-#ifdef TNL_DEBUG_SHARED_POINTERS
-         std::cerr << "Created shared pointer to " << demangle(typeid(ObjectType).name()) << " (mic_pointer = " << this->mic_pointer << ")" << std::endl;
-#endif
-         Devices::MIC::insertSmartPointer( this );
-         return true;
-      }
-
-      void set_last_sync_state()
-      {
-         TNL_ASSERT( this->pd, );
-         std::memcpy( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( Object ) );
-         this->pd->maybe_modified = false;
-      }
-
-      bool modified()
-      {
-         TNL_ASSERT( this->pd, );
-         // optimization: skip bitwise comparison if we're sure that the data is the same
-         if( ! this->pd->maybe_modified )
-            return false;
-         return std::memcmp( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( Object ) ) != 0;
-      }
-
-      void free()
-      {
-         if( this->pd )
-         {
-#ifdef TNL_DEBUG_SHARED_POINTERS
-            std::cerr << "Freeing shared pointer: counter = " << this->pd->counter << ", mic_pointer = " << this->mic_pointer << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
-#endif
-            if( ! --this->pd->counter )
-            {
-               delete this->pd;
-               this->pd = nullptr;
-               if( this->mic_pointer )
-               {
-                   Devices::MIC::FreeMIC((void*)mic_pointer);
-                   mic_pointer=nullptr;
-               }
-#ifdef TNL_DEBUG_SHARED_POINTERS
-               std::cerr << "...deleted data." << std::endl;
-#endif
-            }
-         }
-      }
-
-      PointerData* pd;
-
-      // cuda_pointer can't be part of PointerData structure, since we would be
-      // unable to dereference this-pd on the device -- Nevím zda to platí pro MIC, asi jo
-      Object* mic_pointer;
-};
-#endif
-
-
-#if  (!defined(NDEBUG)) && (!defined(HAVE_MIC)) 
-namespace Assert {
-
-template< typename Object, typename Device >
-struct Formatter< SharedPointer< Object, Device > >
-{
-   static std::string
-   printToString( const SharedPointer< Object, Device >& value )
-   {
-      ::std::stringstream ss;
-      ss << "(SharedPointer< " << Object::getType() << ", " << Device::getDeviceType()
-         << " > object at " << &value << ")";
-      return ss.str();
-   }
-};
-
-} // namespace Assert
-#endif
-
+} // namespace Pointers
 } // namespace TNL
diff --git a/src/TNL/Pointers/SharedPointerHost.h b/src/TNL/Pointers/SharedPointerHost.h
new file mode 100644
index 0000000000000000000000000000000000000000..f6c45cf710e6901c9a5f23ec4b02de295d48d62f
--- /dev/null
+++ b/src/TNL/Pointers/SharedPointerHost.h
@@ -0,0 +1,273 @@
+/***************************************************************************
+                          SharedPointerHost.h  -  description
+                             -------------------
+    begin                : Aug 22, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// Implemented by: Tomas Oberhuber, Jakub Klinkovsky
+
+#pragma once
+
+#include "SharedPointer.h"
+
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/CudaCallable.h>
+#include <TNL/Pointers/SmartPointer.h>
+
+#include <cstddef>   // std::nullptr_t
+#include <algorithm> // swap
+
+namespace TNL {
+namespace Pointers {
+
+template< typename Object >
+class SharedPointer< Object, Devices::Host > : public SmartPointer
+{
+   private:
+      // Convenient template alias for controlling the selection of copy- and
+      // move-constructors and assignment operators using SFINAE.
+      // The type Object_ is "enabled" iff Object_ and Object are not the same,
+      // but after removing const and volatile qualifiers they are the same.
+      template< typename Object_ >
+      using Enabler = std::enable_if< ! std::is_same< Object_, Object >::value &&
+                                      std::is_same< typename std::remove_cv< Object >::type, Object_ >::value >;
+
+      // friend class will be needed for templated assignment operators
+      template< typename Object_, typename Device_ >
+      friend class SharedPointer;
+
+   public:
+
+      using ObjectType = Object;
+      using DeviceType = Devices::Host; 
+      using ThisType = SharedPointer<  Object, Devices::Host >;
+
+      SharedPointer( std::nullptr_t )
+      : pd( nullptr )
+      {}
+
+      template< typename... Args >
+      explicit  SharedPointer( Args... args )
+      : pd( nullptr )
+      {
+#ifdef TNL_DEBUG_SHARED_POINTERS
+         std::cerr << "Creating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl;
+#endif
+         this->allocate( args... );
+      }
+
+      // this is needed only to avoid the default compiler-generated constructor
+      SharedPointer( const ThisType& pointer )
+      : pd( (PointerData*) pointer.pd )
+      {
+         this->pd->counter += 1;
+      }
+
+      // conditional constructor for non-const -> const data
+      template< typename Object_,
+                typename = typename Enabler< Object_ >::type >
+      SharedPointer( const SharedPointer<  Object_, DeviceType >& pointer )
+      : pd( (PointerData*) pointer.pd )
+      {
+         this->pd->counter += 1;
+      }
+
+      // this is needed only to avoid the default compiler-generated constructor
+      SharedPointer( ThisType&& pointer )
+      : pd( (PointerData*) pointer.pd )
+      {
+         pointer.pd = nullptr;
+      }
+
+      // conditional constructor for non-const -> const data
+      template< typename Object_,
+                typename = typename Enabler< Object_ >::type >
+      SharedPointer( SharedPointer<  Object_, DeviceType >&& pointer )
+      : pd( (PointerData*) pointer.pd )
+      {
+         pointer.pd = nullptr;
+      }
+
+      template< typename... Args >
+      bool recreate( Args... args )
+      {
+#ifdef TNL_DEBUG_SHARED_POINTERS
+         std::cerr << "Recreating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl;
+#endif
+         if( ! this->counter )
+            return this->allocate( args... );
+
+         if( *this->pd->counter == 1 )
+         {
+            /****
+             * The object is not shared -> recreate it in-place, without reallocation
+             */
+            this->pd->data.~ObjectType();
+            new ( this->pd->data ) ObjectType( args... );
+            return true;
+         }
+
+         // free will just decrement the counter
+         this->free();
+
+         return this->allocate( args... );
+      }
+
+      const Object* operator->() const
+      {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+         return &this->pd->data;
+      }
+
+      Object* operator->()
+      {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+         return &this->pd->data;
+      }
+
+      const Object& operator *() const
+      {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+         return this->pd->data;
+      }
+
+      Object& operator *()
+      {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+         return this->pd->data;
+      }
+
+      __cuda_callable__
+      operator bool() const
+      {
+         return this->pd;
+      }
+
+      __cuda_callable__
+      bool operator!() const
+      {
+         return ! this->pd;
+      }
+
+      template< typename Device = Devices::Host >
+      __cuda_callable__
+      const Object& getData() const
+      {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+         return this->pd->data;
+      }
+
+      template< typename Device = Devices::Host >
+      __cuda_callable__
+      Object& modifyData()
+      {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+         return this->pd->data;
+      }
+
+      // this is needed only to avoid the default compiler-generated operator
+      const ThisType& operator=( const ThisType& ptr )
+      {
+         this->free();
+         this->pd = (PointerData*) ptr.pd;
+         if( this->pd != nullptr )
+            this->pd->counter += 1;
+         return *this;
+      }
+
+      // conditional operator for non-const -> const data
+      template< typename Object_,
+                typename = typename Enabler< Object_ >::type >
+      const ThisType& operator=( const SharedPointer<  Object_, DeviceType >& ptr )
+      {
+         this->free();
+         this->pd = (PointerData*) ptr.pd;
+         if( this->pd != nullptr )
+            this->pd->counter += 1;
+         return *this;
+      }
+
+      // this is needed only to avoid the default compiler-generated operator
+      const ThisType& operator=( ThisType&& ptr )
+      {
+         this->free();
+         this->pd = (PointerData*) ptr.pd;
+         ptr.pd = nullptr;
+         return *this;
+      }
+
+      // conditional operator for non-const -> const data
+      template< typename Object_,
+                typename = typename Enabler< Object_ >::type >
+      const ThisType& operator=( SharedPointer<  Object_, DeviceType >&& ptr )
+      {
+         this->free();
+         this->pd = (PointerData*) ptr.pd;
+         ptr.pd = nullptr;
+         return *this;
+      }
+
+      bool synchronize()
+      {
+         return true;
+      }
+
+      void clear()
+      {
+         this->free();
+      }
+
+      void swap( ThisType& ptr2 )
+      {
+         std::swap( this->pd, ptr2.pd );
+      }
+      
+      ~SharedPointer()
+      {
+         this->free();
+      }
+
+
+   protected:
+
+      struct PointerData
+      {
+         Object data;
+         int counter;
+
+         template< typename... Args >
+         explicit PointerData( Args... args )
+         : data( args... ),
+           counter( 1 )
+         {}
+      };
+
+      template< typename... Args >
+      bool allocate( Args... args )
+      {
+         this->pd = new PointerData( args... );
+         return this->pd;
+      }
+
+      void free()
+      {
+         if( this->pd )
+         {
+            if( ! --this->pd->counter )
+            {
+               delete this->pd;
+               this->pd = nullptr;
+            }
+         }
+
+      }
+
+      PointerData* pd;
+};
+
+} // namespace Pointers
+} // namespace TNL
diff --git a/src/TNL/Pointers/SharedPointerMic.h b/src/TNL/Pointers/SharedPointerMic.h
new file mode 100644
index 0000000000000000000000000000000000000000..3acea10bc8eb14a8753fce3a598e62011a06edb7
--- /dev/null
+++ b/src/TNL/Pointers/SharedPointerMic.h
@@ -0,0 +1,373 @@
+/***************************************************************************
+                          SharedPointerMic.h  -  description
+                             -------------------
+    begin                : Aug 22, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// Implemented by: Tomas Oberhuber, Jakub Klinkovsky
+
+#pragma once
+
+#include "SharedPointer.h"
+
+#include <TNL/Devices/MIC.h>
+#include <TNL/Pointers/SmartPointer.h>
+
+#include <cstring>   // std::memcpy, std::memcmp
+#include <cstddef>   // std::nullptr_t
+#include <algorithm> // swap
+
+namespace TNL {
+namespace Pointers {
+
+#ifdef HAVE_MIC
+template< typename Object>
+class SharedPointer< Object, Devices::MIC > : public SmartPointer
+{
+   private:
+      // Convenient template alias for controlling the selection of copy- and
+      // move-constructors and assignment operators using SFINAE.
+      // The type Object_ is "enabled" iff Object_ and Object are not the same,
+      // but after removing const and volatile qualifiers they are the same.
+      template< typename Object_ >
+      using Enabler = std::enable_if< ! std::is_same< Object_, Object >::value &&
+                                      std::is_same< typename std::remove_cv< Object >::type, Object_ >::value >;
+
+      // friend class will be needed for templated assignment operators
+      template< typename Object_, typename Device_>
+      friend class SharedPointer;
+
+   public:
+
+      using ObjectType = Object;
+      using DeviceType = Devices::MIC; 
+      using ThisType = SharedPointer<  Object, Devices::MIC >;
+
+      SharedPointer( std::nullptr_t )
+      : pd( nullptr ),
+        mic_pointer( nullptr )
+      {}
+
+      template< typename... Args >
+      explicit  SharedPointer( Args... args )
+      : pd( nullptr ),
+        mic_pointer( nullptr )
+      {
+            this->allocate( args... );
+      }
+
+      // this is needed only to avoid the default compiler-generated constructor
+      SharedPointer( const ThisType& pointer )
+      : pd( (PointerData*) pointer.pd ),
+        mic_pointer( pointer.mic_pointer )
+      {
+         this->pd->counter += 1;
+      }
+
+      // conditional constructor for non-const -> const data
+      template< typename Object_,
+                typename = typename Enabler< Object_ >::type >
+      SharedPointer( const SharedPointer< Object_, DeviceType >& pointer )
+      : pd( (PointerData*) pointer.pd ),
+        mic_pointer( pointer.mic_pointer )
+      {
+         this->pd->counter += 1;
+      }
+
+      // this is needed only to avoid the default compiler-generated constructor
+      SharedPointer( ThisType&& pointer )
+      : pd( (PointerData*) pointer.pd ),
+        mic_pointer( pointer.mic_pointer )
+      {
+         pointer.pd = nullptr;
+         pointer.mic_pointer = nullptr;
+      }
+
+      // conditional constructor for non-const -> const data
+      template< typename Object_,
+                typename = typename Enabler< Object_ >::type >
+      SharedPointer( SharedPointer< Object_, DeviceType >&& pointer )
+      : pd( (PointerData*) pointer.pd ),
+        mic_pointer( pointer.mic_pointer )
+      {
+         pointer.pd = nullptr;
+         pointer.mic_pointer = nullptr;
+      }
+
+      template< typename... Args >
+      bool recreate( Args... args )
+      {
+#ifdef TNL_DEBUG_SHARED_POINTERS
+         std::cerr << "Recreating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl;
+#endif
+         if( ! this->pd )
+            return this->allocate( args... );
+
+         if( this->pd->counter == 1 )
+         {
+            /****
+             * The object is not shared -> recreate it in-place, without reallocation
+             */
+            this->pd->data.~Object();
+            new ( &this->pd->data ) Object( args... );
+            Devices::MIC::CopyToMIC(this->mic_pointer,(void*) &this->pd->data,sizeof(Object));
+            this->set_last_sync_state();
+            return true;
+         }
+
+         // free will just decrement the counter
+         this->free();
+
+         return this->allocate( args... );
+      }
+
+      const Object* operator->() const
+      {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+         return &this->pd->data;
+      }
+
+      Object* operator->()
+      {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+         this->pd->maybe_modified = true;
+         return &this->pd->data;
+      }
+
+      const Object& operator *() const
+      {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+         return this->pd->data;
+      }
+
+      Object& operator *()
+      {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+         this->pd->maybe_modified = true;
+         return this->pd->data;
+      }
+
+      operator bool()
+      {
+         return this->pd;
+      }
+
+      template< typename Device = Devices::Host >
+      __cuda_callable__
+      const Object& getData() const
+      {
+         static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+         TNL_ASSERT_TRUE( this->mic_pointer, "Attempt to dereference a null pointer" );
+         if( std::is_same< Device, Devices::Host >::value )
+            return this->pd->data;
+         if( std::is_same< Device, Devices::MIC >::value )
+            return *( this->mic_pointer );
+
+      }
+
+      template< typename Device = Devices::Host >
+      __cuda_callable__
+      Object& modifyData()
+      {
+         static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+         TNL_ASSERT_TRUE( this->mic_pointer, "Attempt to dereference a null pointer" );
+         if( std::is_same< Device, Devices::Host >::value )
+         {
+            this->pd->maybe_modified = true;
+            return this->pd->data;
+         }
+         if( std::is_same< Device, Devices::MIC >::value )
+            return *( this->mic_pointer );
+
+      }
+
+      // this is needed only to avoid the default compiler-generated operator
+      const ThisType& operator=( const ThisType& ptr )
+      {
+         this->free();
+         this->pd = (PointerData*) ptr.pd;
+         this->mic_pointer = ptr.mic_pointer;
+         if( this->pd != nullptr )
+            this->pd->counter += 1;
+#ifdef TNL_DEBUG_SHARED_POINTERS
+         std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
+#endif
+         return *this;
+      }
+
+      // conditional operator for non-const -> const data
+      template< typename Object_,
+                typename = typename Enabler< Object_ >::type >
+      const ThisType& operator=( const SharedPointer< Object_, DeviceType >& ptr )
+      {
+         this->free();
+         this->pd = (PointerData*) ptr.pd;
+         this->mic_pointer = ptr.mic_pointer;
+         if( this->pd != nullptr )
+            this->pd->counter += 1;
+#ifdef TNL_DEBUG_SHARED_POINTERS
+         std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
+#endif
+         return *this;
+      }
+
+      // this is needed only to avoid the default compiler-generated operator
+      const ThisType& operator=( ThisType&& ptr )
+      {
+         this->free();
+         this->pd = (PointerData*) ptr.pd;
+         this->mic_pointer = ptr.mic_pointer;
+         ptr.pd = nullptr;
+         ptr.mic_pointer = nullptr;
+#ifdef TNL_DEBUG_SHARED_POINTERS
+         std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
+#endif
+         return *this;
+      }
+
+      // conditional operator for non-const -> const data
+      template< typename Object_,
+                typename = typename Enabler< Object_ >::type >
+      const ThisType& operator=( SharedPointer< Object_, DeviceType >&& ptr )
+      {
+         this->free();
+         this->pd = (PointerData*) ptr.pd;
+         this->mic_pointer = ptr.mic_pointer;
+         ptr.pd = nullptr;
+         ptr.mic_pointer = nullptr;
+#ifdef TNL_DEBUG_SHARED_POINTERS
+         std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
+#endif
+         return *this;
+      }
+
+      bool synchronize()
+      {
+         if( ! this->pd )
+            return true;
+
+         if( this->modified() )
+         {
+#ifdef TNL_DEBUG_SHARED_POINTERS
+            std::cerr << "Synchronizing shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(Object).name()) << std::endl;
+            std::cerr << "   ( " << sizeof( Object ) << " bytes, MIC adress " << this->mic_pointer << " )" << std::endl;
+#endif
+            TNL_ASSERT( this->mic_pointer, );
+
+            Devices::MIC::CopyToMIC((void*)this->mic_pointer,(void*) &this->pd->data,sizeof(Object));
+            this->set_last_sync_state();
+            return true;
+         }
+         return false; //??
+      }
+
+      void clear()
+      {
+         this->free();
+      }
+
+      void swap( ThisType& ptr2 )
+      {
+         std::swap( this->pd, ptr2.pd );
+         std::swap( this->mic_pointer, ptr2.mic_pointer );
+      }
+
+      ~SharedPointer()
+      {
+         this->free();
+         Devices::MIC::removeSmartPointer( this );
+      }
+
+   protected:
+
+      struct PointerData
+      {
+         Object data;
+         uint8_t data_image[ sizeof(Object) ];
+         int counter;
+         bool maybe_modified;
+
+         template< typename... Args >
+         explicit PointerData( Args... args )
+         : data( args... ),
+           counter( 1 ),
+           maybe_modified( false )
+         {}
+      };
+
+      template< typename... Args >
+      bool allocate( Args... args )
+      {
+         this->pd = new PointerData( args... );
+         if( ! this->pd )
+            return false;
+
+         mic_pointer=(Object*)Devices::MIC::AllocMIC(sizeof(Object));
+         Devices::MIC::CopyToMIC((void*)this->mic_pointer,(void*) &this->pd->data,sizeof(Object));
+
+         if( ! this->mic_pointer )
+            return false;
+         // set last-sync state
+         this->set_last_sync_state();
+#ifdef TNL_DEBUG_SHARED_POINTERS
+         std::cerr << "Created shared pointer to " << demangle(typeid(ObjectType).name()) << " (mic_pointer = " << this->mic_pointer << ")" << std::endl;
+#endif
+         Devices::MIC::insertSmartPointer( this );
+         return true;
+      }
+
+      void set_last_sync_state()
+      {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+         std::memcpy( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( Object ) );
+         this->pd->maybe_modified = false;
+      }
+
+      bool modified()
+      {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+         // optimization: skip bitwise comparison if we're sure that the data is the same
+         if( ! this->pd->maybe_modified )
+            return false;
+         return std::memcmp( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( Object ) ) != 0;
+      }
+
+      void free()
+      {
+         if( this->pd )
+         {
+#ifdef TNL_DEBUG_SHARED_POINTERS
+            std::cerr << "Freeing shared pointer: counter = " << this->pd->counter << ", mic_pointer = " << this->mic_pointer << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
+#endif
+            if( ! --this->pd->counter )
+            {
+               delete this->pd;
+               this->pd = nullptr;
+               if( this->mic_pointer )
+               {
+                   Devices::MIC::FreeMIC((void*)mic_pointer);
+                   mic_pointer=nullptr;
+               }
+#ifdef TNL_DEBUG_SHARED_POINTERS
+               std::cerr << "...deleted data." << std::endl;
+#endif
+            }
+         }
+      }
+
+      PointerData* pd;
+
+      // cuda_pointer can't be part of PointerData structure, since we would be
+      // unable to dereference this-pd on the device -- Nevím zda to platí pro MIC, asi jo
+      Object* mic_pointer;
+};
+#endif
+
+} // namespace Pointers
+} // namespace TNL
diff --git a/src/TNL/Pointers/SmartPointer.h b/src/TNL/Pointers/SmartPointer.h
new file mode 100644
index 0000000000000000000000000000000000000000..dce0f41dfbee7601421deb055fcc026fdfb7e77e
--- /dev/null
+++ b/src/TNL/Pointers/SmartPointer.h
@@ -0,0 +1,19 @@
+/***************************************************************************
+                          SmartPointer.h  -  description
+                             -------------------
+    begin                : May 30, 2016
+    copyright            : (C) 2016 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+class SmartPointer
+{
+   public:
+
+      virtual bool synchronize() = 0;
+
+};
diff --git a/src/TNL/SmartPointersRegister.cpp b/src/TNL/Pointers/SmartPointersRegister.cpp
similarity index 97%
rename from src/TNL/SmartPointersRegister.cpp
rename to src/TNL/Pointers/SmartPointersRegister.cpp
index f2910ba36ecfde3fdf0f7c57cfcef61fa3c65a0e..cd57dfe3439b0846f65f0bf8bfaf573cfcbd6e91 100644
--- a/src/TNL/SmartPointersRegister.cpp
+++ b/src/TNL/Pointers/SmartPointersRegister.cpp
@@ -16,7 +16,7 @@
  ***************************************************************************/
 
 #include <iostream>
-#include <TNL/SmartPointersRegister.h>
+#include <TNL/Pointers/SmartPointersRegister.h>
 #include <TNL/Devices/Cuda.h>
 
 void SmartPointersRegister::insert( SmartPointer* pointer, int deviceId )
diff --git a/src/TNL/SmartPointersRegister.h b/src/TNL/Pointers/SmartPointersRegister.h
similarity index 51%
rename from src/TNL/SmartPointersRegister.h
rename to src/TNL/Pointers/SmartPointersRegister.h
index a2e9c43639501cf0d287e531cc65842c85b36c59..569d62138137459fa52137a7b2658b4216b61c3b 100644
--- a/src/TNL/SmartPointersRegister.h
+++ b/src/TNL/Pointers/SmartPointersRegister.h
@@ -1,12 +1,3 @@
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
 /***************************************************************************
                           SmartPointersRegister.h  -  description
                              -------------------
@@ -15,28 +6,29 @@
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
 
+/* See Copyright Notice in tnl/Copyright */
+
 #pragma once
 
 #include <unordered_set>
 #include <unordered_map>
-#include <TNL/SmartPointer.h>
+#include <TNL/Pointers/SmartPointer.h>
 #include <TNL/Assert.h>
 
 class SmartPointersRegister
-{   
-  
+{
+
    public:
-   
+
       void insert( SmartPointer* pointer, int deviceId );
-      
+
       void remove( SmartPointer* pointer, int deviceId );
-      
+
       bool synchronizeDevice( int deviceId );
-      
+
    protected:
-      
-      typedef std::unordered_set< SmartPointer* > SetType;   
-      
+
+      typedef std::unordered_set< SmartPointer* > SetType;
+
       std::unordered_map< int, SetType > pointersOnDevices;
 };
-
diff --git a/src/TNL/UniquePointer.h b/src/TNL/Pointers/UniquePointer.h
similarity index 77%
rename from src/TNL/UniquePointer.h
rename to src/TNL/Pointers/UniquePointer.h
index 606f86cc4326c57cd73f7efa525dbcc4b344306b..93a667c3553e65fc335c9a87e244d6e37dac536c 100644
--- a/src/TNL/UniquePointer.h
+++ b/src/TNL/Pointers/UniquePointer.h
@@ -14,55 +14,63 @@
 
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/SmartPointer.h>
+#include <TNL/Devices/MIC.h>
+#include <TNL/Pointers/SmartPointer.h>
 
-#include <cstring>
+#include <cstring>  // std::memcpy, std::memcmp
+#include <cstddef>  // std::nullptr_t
 
-#include "Devices/MIC.h"
-
-
-namespace TNL { 
+namespace TNL {
+namespace Pointers {
 
 template< typename Object, typename Device = typename Object::DeviceType >
 class UniquePointer
-{  
+{
 };
 
 template< typename Object >
 class UniquePointer< Object, Devices::Host > : public SmartPointer
 {
    public:
-      
+
       typedef Object ObjectType;
       typedef Devices::Host DeviceType;
       typedef UniquePointer< Object, Devices::Host > ThisType;
-         
+
+      UniquePointer( std::nullptr_t )
+      : pointer( nullptr )
+      {}
+
       template< typename... Args >
-      UniquePointer( const Args... args )
+      explicit  UniquePointer( const Args... args )
       {
          this->pointer = new Object( args... );
       }
-      
+
       const Object* operator->() const
       {
+         TNL_ASSERT_TRUE( this->pointer, "Attempt to dereference a null pointer" );
          return this->pointer;
       }
-      
+
       Object* operator->()
       {
+         TNL_ASSERT_TRUE( this->pointer, "Attempt to dereference a null pointer" );
          return this->pointer;
       }
-      
+
       const Object& operator *() const
       {
+         TNL_ASSERT_TRUE( this->pointer, "Attempt to dereference a null pointer" );
          return *( this->pointer );
       }
-      
+
       Object& operator *()
       {
+         TNL_ASSERT_TRUE( this->pointer, "Attempt to dereference a null pointer" );
          return *( this->pointer );
       }
-      
+
       __cuda_callable__
       operator bool() const
       {
@@ -78,15 +86,17 @@ class UniquePointer< Object, Devices::Host > : public SmartPointer
       template< typename Device = Devices::Host >
       const Object& getData() const
       {
+         TNL_ASSERT_TRUE( this->pointer, "Attempt to dereference a null pointer" );
          return *( this->pointer );
       }
 
       template< typename Device = Devices::Host >
       Object& modifyData()
       {
+         TNL_ASSERT_TRUE( this->pointer, "Attempt to dereference a null pointer" );
          return *( this->pointer );
       }
-      
+
       const ThisType& operator=( ThisType& ptr )
       {
          if( this->pointer )
@@ -95,26 +105,26 @@ class UniquePointer< Object, Devices::Host > : public SmartPointer
          ptr.pointer = nullptr;
          return *this;
       }
-      
+
       const ThisType& operator=( ThisType&& ptr )
       {
-         return this->operator=( ptr );         
-      }      
-      
+         return this->operator=( ptr );
+      }
+
       bool synchronize()
       {
          return true;
       }
-      
+
       ~UniquePointer()
       {
          if( this->pointer )
             delete this->pointer;
       }
 
-      
+
    protected:
-      
+
       Object* pointer;
 };
 
@@ -122,11 +132,16 @@ template< typename Object >
 class UniquePointer< Object, Devices::Cuda > : public SmartPointer
 {
    public:
-      
+
       typedef Object ObjectType;
       typedef Devices::Cuda DeviceType;
       typedef UniquePointer< Object, Devices::Cuda > ThisType;
-         
+
+      UniquePointer( std::nullptr_t )
+      : pd( nullptr ),
+        cuda_pointer( nullptr )
+      {}
+
       template< typename... Args >
       explicit  UniquePointer( const Args... args )
       : pd( nullptr ),
@@ -134,29 +149,33 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer
       {
          this->allocate( args... );
       }
-      
+
       const Object* operator->() const
       {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          return &this->pd->data;
       }
-      
+
       Object* operator->()
       {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          this->pd->maybe_modified = true;
          return &this->pd->data;
       }
-      
+
       const Object& operator *() const
       {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          return this->pd->data;
       }
-      
+
       Object& operator *()
       {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          this->pd->maybe_modified = true;
          return this->pd->data;
       }
-      
+
       __cuda_callable__
       operator bool() const
       {
@@ -169,24 +188,24 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer
          return ! this->pd;
       }
 
-      template< typename Device = Devices::Host >      
+      template< typename Device = Devices::Host >
       const Object& getData() const
       {
          static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, "Only Devices::Host or Devices::Cuda devices are accepted here." );
-         TNL_ASSERT( this->pd, );
-         TNL_ASSERT( this->cuda_pointer, );
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+         TNL_ASSERT_TRUE( this->cuda_pointer, "Attempt to dereference a null pointer" );
          if( std::is_same< Device, Devices::Host >::value )
             return this->pd->data;
          if( std::is_same< Device, Devices::Cuda >::value )
-            return *( this->cuda_pointer );            
+            return *( this->cuda_pointer );
       }
 
       template< typename Device = Devices::Host >
       Object& modifyData()
       {
          static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, "Only Devices::Host or Devices::Cuda devices are accepted here." );
-         TNL_ASSERT( this->pd, );
-         TNL_ASSERT( this->cuda_pointer, );
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+         TNL_ASSERT_TRUE( this->cuda_pointer, "Attempt to dereference a null pointer" );
          if( std::is_same< Device, Devices::Host >::value )
          {
             this->pd->maybe_modified = true;
@@ -195,7 +214,7 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer
          if( std::is_same< Device, Devices::Cuda >::value )
             return *( this->cuda_pointer );
       }
-      
+
       const ThisType& operator=( ThisType& ptr )
       {
          this->free();
@@ -205,12 +224,12 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer
          ptr.cuda_pointer = nullptr;
          return *this;
       }
-      
+
       const ThisType& operator=( ThisType&& ptr )
       {
          return this->operator=( ptr );
-      }      
-      
+      }
+
       bool synchronize()
       {
          if( ! this->pd )
@@ -225,17 +244,17 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer
             return true;
          }
          return true;
-#else         
+#else
          return false;
-#endif         
+#endif
       }
-            
+
       ~UniquePointer()
       {
          this->free();
          Devices::Cuda::removeSmartPointer( this );
       }
-      
+
    protected:
 
       struct PointerData
@@ -265,14 +284,14 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer
 
       void set_last_sync_state()
       {
-         TNL_ASSERT( this->pd, );
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          std::memcpy( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( ObjectType ) );
          this->pd->maybe_modified = false;
       }
 
       bool modified()
       {
-         TNL_ASSERT( this->pd, );
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          // optimization: skip bitwise comparison if we're sure that the data is the same
          if( ! this->pd->maybe_modified )
             return false;
@@ -286,7 +305,7 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer
          if( this->cuda_pointer )
             Devices::Cuda::freeFromDevice( this->cuda_pointer );
       }
-      
+
       PointerData* pd;
 
       // cuda_pointer can't be part of PointerData structure, since we would be
@@ -299,64 +318,73 @@ template< typename Object >
 class UniquePointer< Object, Devices::MIC > : public SmartPointer
 {
    public:
-      
+
       typedef Object ObjectType;
       typedef Devices::MIC DeviceType;
       typedef UniquePointer< Object, Devices::MIC > ThisType;
-         
+
+      UniquePointer( std::nullptr_t )
+      : pd( nullptr ),
+        mic_pointer( nullptr )
+      {}
+
       template< typename... Args >
       explicit  UniquePointer( const Args... args )
       : pd( nullptr ),
-        cuda_pointer( nullptr )
+        mic_pointer( nullptr )
       {
          this->allocate( args... );
       }
-      
+
       const Object* operator->() const
       {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          return &this->pd->data;
       }
-      
+
       Object* operator->()
       {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          this->pd->maybe_modified = true;
          return &this->pd->data;
       }
-      
+
       const Object& operator *() const
       {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          return this->pd->data;
       }
-      
+
       Object& operator *()
       {
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          this->pd->maybe_modified = true;
          return this->pd->data;
       }
-      
+
       operator bool()
       {
          return this->pd;
       }
 
-      template< typename Device = Devices::Host >      
+      template< typename Device = Devices::Host >
       const Object& getData() const
       {
          static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
-         TNL_ASSERT( this->pd, );
-         TNL_ASSERT( this->mic_pointer, );
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+         TNL_ASSERT_TRUE( this->mic_pointer, "Attempt to dereference a null pointer" );
          if( std::is_same< Device, Devices::Host >::value )
             return this->pd->data;
          if( std::is_same< Device, Devices::MIC >::value )
-            return *( this->mic_pointer );            
+            return *( this->mic_pointer );
       }
 
       template< typename Device = Devices::Host >
       Object& modifyData()
       {
          static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
-         TNL_ASSERT( this->pd, );
-         TNL_ASSERT( this->mic_pointer, );
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
+         TNL_ASSERT_TRUE( this->mic_pointer, "Attempt to dereference a null pointer" );
          if( std::is_same< Device, Devices::Host >::value )
          {
             this->pd->maybe_modified = true;
@@ -365,7 +393,7 @@ class UniquePointer< Object, Devices::MIC > : public SmartPointer
          if( std::is_same< Device, Devices::MIC >::value )
             return *( this->mic_pointer );
       }
-      
+
       const ThisType& operator=( ThisType& ptr )
       {
          this->free();
@@ -375,31 +403,31 @@ class UniquePointer< Object, Devices::MIC > : public SmartPointer
          ptr.mic_pointer = nullptr;
          return *this;
       }
-      
+
       const ThisType& operator=( ThisType&& ptr )
       {
          return this->operator=( ptr );
-      }      
-      
+      }
+
       bool synchronize()
       {
          if( ! this->pd )
             return true;
          if( this->modified() )
-         { 
-            Devices::MIC::CopyToMIC(this->mic_pointer,(void*) &this->pd->data,sizeof(Object));  
+         {
+            Devices::MIC::CopyToMIC(this->mic_pointer,(void*) &this->pd->data,sizeof(Object));
             this->set_last_sync_state();
             return true;
          }
          return true;//??
       }
-            
+
       ~UniquePointer()
       {
          this->free();
          Devices::MIC::removeSmartPointer( this );
       }
-      
+
    protected:
 
       struct PointerData
@@ -434,14 +462,14 @@ class UniquePointer< Object, Devices::MIC > : public SmartPointer
 
       void set_last_sync_state()
       {
-         TNL_ASSERT( this->pd, );
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          std::memcpy( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( ObjectType ) );
          this->pd->maybe_modified = false;
       }
 
       bool modified()
       {
-         TNL_ASSERT( this->pd, );
+         TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" );
          // optimization: skip bitwise comparison if we're sure that the data is the same
          if( ! this->pd->maybe_modified )
             return false;
@@ -455,23 +483,25 @@ class UniquePointer< Object, Devices::MIC > : public SmartPointer
          if( this->mic_pointer )
              Devices::MIC::FreeMIC(mic_pointer);
       }
-      
+
       PointerData* pd;
 
-      // cuda_pointer can't be part of PointerData structure, since we would be
+      // mic_pointer can't be part of PointerData structure, since we would be
       // unable to dereference this-pd on the device
       Object* mic_pointer;
 };
 #endif
 
-#if  (!defined(NDEBUG)) && (!defined(HAVE_MIC)) 
+} // namespace Pointers
+
+#if (!defined(NDEBUG)) && (!defined(HAVE_MIC))
 namespace Assert {
 
 template< typename Object, typename Device >
-struct Formatter< UniquePointer< Object, Device > >
+struct Formatter< Pointers::UniquePointer< Object, Device > >
 {
    static std::string
-   printToString( const UniquePointer< Object, Device >& value )
+   printToString( const Pointers::UniquePointer< Object, Device >& value )
    {
       ::std::stringstream ss;
       ss << "(UniquePointer< " << Object::getType() << ", " << Device::getDeviceType()
@@ -484,4 +514,3 @@ struct Formatter< UniquePointer< Object, Device > >
 #endif
 
 } // namespace TNL
-
diff --git a/src/TNL/Problems/HeatEquationEocProblem_impl.h b/src/TNL/Problems/HeatEquationEocProblem_impl.h
index 380f4bf0875fd61ea4e0e5196d111c748d841410..ae062df74ec825f124961b5ff33f2223c32b7d54 100644
--- a/src/TNL/Problems/HeatEquationEocProblem_impl.h
+++ b/src/TNL/Problems/HeatEquationEocProblem_impl.h
@@ -47,6 +47,12 @@ setup( const Config::ParameterContainer& parameters,
    if( ! this->boundaryConditionPointer->setup( this->getMesh(), parameters, prefix ) ||
        ! this->rightHandSidePointer->setup( parameters ) )
       return false;
+   this->explicitUpdater.setDifferentialOperator( this->differentialOperatorPointer );
+   this->explicitUpdater.setBoundaryConditions( this->boundaryConditionPointer );
+   this->explicitUpdater.setRightHandSide( this->rightHandSidePointer );
+   this->systemAssembler.setDifferentialOperator( this->differentialOperatorPointer );
+   this->systemAssembler.setBoundaryConditions( this->boundaryConditionPointer );
+   this->systemAssembler.setRightHandSide( this->rightHandSidePointer );   
    return true;
 }
 
diff --git a/src/TNL/Problems/HeatEquationProblem.h b/src/TNL/Problems/HeatEquationProblem.h
index 32b06ea9f1e323ae552486b23ab94eb7c3202756..a04e6ea2a9bdd0a26e8f06d25b00ab7055126fd3 100644
--- a/src/TNL/Problems/HeatEquationProblem.h
+++ b/src/TNL/Problems/HeatEquationProblem.h
@@ -48,12 +48,12 @@ class HeatEquationProblem : public PDEProblem< Mesh,
       typedef typename Mesh::DeviceType DeviceType;
       typedef typename DifferentialOperator::IndexType IndexType;
       typedef Functions::MeshFunction< Mesh > MeshFunctionType;
-      typedef SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer< MeshFunctionType, DeviceType > MeshFunctionPointer;
       typedef PDEProblem< Mesh, Communicator, RealType, DeviceType, IndexType > BaseType;
       typedef Matrices::SlicedEllpack< RealType, DeviceType, IndexType > MatrixType;
-      typedef SharedPointer< DifferentialOperator > DifferentialOperatorPointer;
-      typedef SharedPointer< BoundaryCondition > BoundaryConditionPointer;
-      typedef SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
+      typedef Pointers::SharedPointer<  DifferentialOperator > DifferentialOperatorPointer;
+      typedef Pointers::SharedPointer<  BoundaryCondition > BoundaryConditionPointer;
+      typedef Pointers::SharedPointer<  RightHandSide, DeviceType > RightHandSidePointer;
 
       using typename BaseType::MeshType;
       using typename BaseType::MeshPointer;
@@ -93,6 +93,9 @@ class HeatEquationProblem : public PDEProblem< Mesh,
                               const RealType& tau,
                               DofVectorPointer& _u,
                               DofVectorPointer& _fu );
+      
+      void applyBoundaryConditions( const RealType& time,
+                                    DofVectorPointer& dofs );      
 
       template< typename MatrixPointer >
       void assemblyLinearSystem( const RealType& time,
diff --git a/src/TNL/Problems/HeatEquationProblem_impl.h b/src/TNL/Problems/HeatEquationProblem_impl.h
index b39438b85e8cae705e55601f09800e120c97203c..03be60b3b1a39922ba7bd24b8e92264b3fb2dd5a 100644
--- a/src/TNL/Problems/HeatEquationProblem_impl.h
+++ b/src/TNL/Problems/HeatEquationProblem_impl.h
@@ -101,6 +101,12 @@ setup( const Config::ParameterContainer& parameters,
    if(param=="LocalCopy")
         distributedIOType=Meshes::DistributedMeshes::LocalCopy;
 
+   this->explicitUpdater.setDifferentialOperator( this->differentialOperatorPointer );
+   this->explicitUpdater.setBoundaryConditions( this->boundaryConditionPointer );
+   this->explicitUpdater.setRightHandSide( this->rightHandSidePointer );
+   this->systemAssembler.setDifferentialOperator( this->differentialOperatorPointer );
+   this->systemAssembler.setBoundaryConditions( this->boundaryConditionPointer );
+   this->systemAssembler.setRightHandSide( this->rightHandSidePointer );
    return true;
 }
 
@@ -177,7 +183,7 @@ setupLinearSystem( MatrixPointer& matrixPointer )
 {
    const IndexType dofs = this->getDofs();
    typedef typename MatrixPointer::ObjectType::CompressedRowLengthsVector CompressedRowLengthsVectorType;
-   SharedPointer< CompressedRowLengthsVectorType > rowLengthsPointer;
+   Pointers::SharedPointer<  CompressedRowLengthsVectorType > rowLengthsPointer;
    rowLengthsPointer->setSize( dofs );
    Matrices::MatrixSetter< MeshType, DifferentialOperator, BoundaryCondition, CompressedRowLengthsVectorType > matrixSetter;
    matrixSetter.template getCompressedRowLengths< typename Mesh::Cell >(
@@ -248,11 +254,22 @@ getExplicitUpdate( const RealType& time,
     */
    
    this->bindDofs( uDofs );
-   MeshFunctionPointer fuPointer( this->getMesh(), fuDofs );
-   this->explicitUpdater.setDifferentialOperator( this->differentialOperatorPointer ),
-   this->explicitUpdater.setBoundaryConditions( this->boundaryConditionPointer ),
-   this->explicitUpdater.setRightHandSide( this->rightHandSidePointer ),
-   this->explicitUpdater.template update< typename Mesh::Cell, Communicator >( time, tau, this->getMesh(), this->uPointer, fuPointer );
+   this->fuPointer->bind( this->getMesh(), *fuDofs );
+   this->explicitUpdater.template update< typename Mesh::Cell, Communicator >( time, tau, this->getMesh(), this->uPointer, this->fuPointer );
+}
+
+template< typename Mesh,
+          typename BoundaryCondition,
+          typename RightHandSide,
+          typename Communicator,
+          typename DifferentialOperator >
+void 
+HeatEquationProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >::
+applyBoundaryConditions( const RealType& time,
+                         DofVectorPointer& uDofs )
+{
+   this->bindDofs( uDofs );
+   this->explicitUpdater.template applyBoundaryConditions< typename Mesh::Cell >( this->getMesh(), time, this->uPointer );
 }
 
 template< typename Mesh,
@@ -270,9 +287,6 @@ assemblyLinearSystem( const RealType& time,
                       DofVectorPointer& bPointer )
 {
    this->bindDofs( dofsPointer );
-   this->systemAssembler.setDifferentialOperator( this->differentialOperatorPointer );
-   this->systemAssembler.setBoundaryConditions( this->boundaryConditionPointer );
-   this->systemAssembler.setRightHandSide( this->rightHandSidePointer );
    this->systemAssembler.template assembly< typename Mesh::Cell, typename MatrixPointer::ObjectType >( 
       time,
       tau,
diff --git a/src/TNL/Problems/PDEProblem.h b/src/TNL/Problems/PDEProblem.h
index 4848debd16516b51bf42871787089a478272b39b..51e56d44aa47e35689a46543e37d3c23cc9f2a7b 100644
--- a/src/TNL/Problems/PDEProblem.h
+++ b/src/TNL/Problems/PDEProblem.h
@@ -12,7 +12,7 @@
 
 #include <TNL/Problems/Problem.h>
 #include <TNL/Problems/CommonData.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 #include <TNL/Matrices/SlicedEllpack.h>
 #include <TNL/Solvers/PDE/TimeDependentPDESolver.h>
 
@@ -34,15 +34,15 @@ class PDEProblem : public Problem< Real, Device, Index >
       using typename BaseType::IndexType;
 
       using MeshType = Mesh;
-      using MeshPointer = SharedPointer< MeshType, DeviceType >;
+      using MeshPointer = Pointers::SharedPointer< MeshType, DeviceType >;
       using DistributedMeshType = Meshes::DistributedMeshes::DistributedMesh< MeshType >;
       using SubdomainOverlapsType = typename DistributedMeshType::SubdomainOverlapsType;
       using DofVectorType = Containers::Vector< RealType, DeviceType, IndexType>;
-      using DofVectorPointer = SharedPointer< DofVectorType, DeviceType >;
+      using DofVectorPointer = Pointers::SharedPointer< DofVectorType, DeviceType >;
       using MatrixType = Matrices::SlicedEllpack< RealType, DeviceType, IndexType >;
       using CommunicatorType = Communicator;
       using CommonDataType = CommonData;
-      using CommonDataPointer = SharedPointer< CommonDataType, DeviceType >;
+      using CommonDataPointer = Pointers::SharedPointer< CommonDataType, DeviceType >;
 
       static constexpr bool isTimeDependent() { return true; };
       
@@ -87,8 +87,8 @@ class PDEProblem : public Problem< Real, Device, Index >
                        const RealType& tau,
                        DofVectorPointer& dofs );
  
-      void setExplicitBoundaryConditions( const RealType& time,
-                                          DofVectorPointer& dofs );
+      void applyBoundaryConditions( const RealType& time,
+                                       DofVectorPointer& dofs );
 
       template< typename Matrix >
       void saveFailedLinearSystem( const Matrix& matrix,
diff --git a/src/TNL/Problems/PDEProblem_impl.h b/src/TNL/Problems/PDEProblem_impl.h
index 081aeaf424d771207c79b0913022f5b662fa5177..1e5260527132b01e67993a62e177e3588d57dda2 100644
--- a/src/TNL/Problems/PDEProblem_impl.h
+++ b/src/TNL/Problems/PDEProblem_impl.h
@@ -180,18 +180,6 @@ preIterate( const RealType& time,
    return true;
 }
 
-template< typename Mesh,
-          typename Communicator,
-          typename Real,
-          typename Device,
-          typename Index >
-void
-PDEProblem< Mesh, Communicator, Real, Device, Index >::
-setExplicitBoundaryConditions( const RealType& time,
-                               DofVectorPointer& dofs )
-{
-}
-
 template< typename Mesh,
           typename Communicator,
           typename Real,
diff --git a/src/TNL/SmartPointer.h b/src/TNL/SmartPointer.h
deleted file mode 100644
index 73293462783ab1875e72c543d96c0c3569b3505f..0000000000000000000000000000000000000000
--- a/src/TNL/SmartPointer.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-/***************************************************************************
-                          SmartPointer.h  -  description
-                             -------------------
-    begin                : May 30, 2016
-    copyright            : (C) 2016 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-#pragma once
-
-class SmartPointer
-{
-   public:
-      
-      virtual bool synchronize() = 0;      
-   
-};
-
diff --git a/src/TNL/Solvers/BuildConfigTags.h b/src/TNL/Solvers/BuildConfigTags.h
index 7824f4a18ed44b2cfbb17c1ded32179806bb6fa1..12a32bd977c479ee49887c5b9435362d04cb2aa3 100644
--- a/src/TNL/Solvers/BuildConfigTags.h
+++ b/src/TNL/Solvers/BuildConfigTags.h
@@ -12,18 +12,9 @@
 
 #include <TNL/Solvers/ODE/Merson.h>
 #include <TNL/Solvers/ODE/Euler.h>
-#include <TNL/Solvers/Linear/SOR.h>
-#include <TNL/Solvers/Linear/CG.h>
-#include <TNL/Solvers/Linear/BICGStab.h>
-#include <TNL/Solvers/Linear/BICGStabL.h>
-#include <TNL/Solvers/Linear/CWYGMRES.h>
-#include <TNL/Solvers/Linear/GMRES.h>
-#include <TNL/Solvers/Linear/TFQMR.h>
-#include <TNL/Solvers/Linear/UmfpackWrapper.h>
-#include <TNL/Solvers/Linear/Preconditioners/Dummy.h>
 
 namespace TNL {
-namespace Solvers {   
+namespace Solvers {
 
 class DefaultBuildConfigTag {};
 
@@ -85,92 +76,5 @@ public:
 
 template< typename ConfigTag, typename ExplicitSolver > struct ConfigTagExplicitSolver{ enum { enabled = true }; };
 
-/****
- * All semi-implicit solvers are enabled by default
- */
-class  SemiImplicitSORSolverTag
-{
-public:
-    template< typename Matrix,
-              typename Preconditioner = Linear::Preconditioners::Dummy< typename Matrix::RealType,
-                                                                        typename Matrix::DeviceType,
-                                                                        typename Matrix::IndexType > >
-    using Template = Linear::SOR< Matrix, Preconditioner >;
-};
-
-class  SemiImplicitCGSolverTag
-{
-public:
-    template< typename Matrix,
-              typename Preconditioner = Linear::Preconditioners::Dummy< typename Matrix::RealType,
-                                                                        typename Matrix::DeviceType,
-                                                                        typename Matrix::IndexType > >
-    using Template = Linear::CG< Matrix, Preconditioner >;
-};
-
-class  SemiImplicitBICGStabSolverTag
-{
-public:
-    template< typename Matrix,
-              typename Preconditioner = Linear::Preconditioners::Dummy< typename Matrix::RealType,
-                                                                        typename Matrix::DeviceType,
-                                                                        typename Matrix::IndexType > >
-    using Template = Linear::BICGStab< Matrix, Preconditioner >;
-};
-
-class  SemiImplicitBICGStabLSolverTag
-{
-public:
-    template< typename Matrix,
-              typename Preconditioner = Linear::Preconditioners::Dummy< typename Matrix::RealType,
-                                                                        typename Matrix::DeviceType,
-                                                                        typename Matrix::IndexType > >
-    using Template = Linear::BICGStabL< Matrix, Preconditioner >;
-};
-
-class  SemiImplicitCWYGMRESSolverTag
-{
-public:
-    template< typename Matrix,
-              typename Preconditioner = Linear::Preconditioners::Dummy< typename Matrix::RealType,
-                                                                        typename Matrix::DeviceType,
-                                                                        typename Matrix::IndexType > >
-    using Template = Linear::CWYGMRES< Matrix, Preconditioner >;
-};
-
-class  SemiImplicitGMRESSolverTag
-{
-public:
-    template< typename Matrix,
-              typename Preconditioner = Linear::Preconditioners::Dummy< typename Matrix::RealType,
-                                                                        typename Matrix::DeviceType,
-                                                                        typename Matrix::IndexType > >
-    using Template = Linear::GMRES< Matrix, Preconditioner >;
-};
-
-class  SemiImplicitTFQMRSolverTag
-{
-public:
-    template< typename Matrix,
-              typename Preconditioner = Linear::Preconditioners::Dummy< typename Matrix::RealType,
-                                                                        typename Matrix::DeviceType,
-                                                                        typename Matrix::IndexType > >
-    using Template = Linear::TFQMR< Matrix, Preconditioner >;
-};
-
-#ifdef HAVE_UMFPACK
-class  SemiImplicitUmfpackSolverTag
-{
-public:
-    template< typename Matrix,
-              typename Preconditioner = Linear::Preconditioners::Dummy< typename Matrix::RealType,
-                                                                        typename Matrix::DeviceType,
-                                                                        typename Matrix::IndexType > >
-    using Template = Linear::UmfpackWrapper< Matrix, Preconditioner >;
-};
-#endif
-
-template< typename ConfigTag, typename SemiImplicitSolver > struct ConfigTagSemiImplicitSolver{ enum { enabled = true }; };
-
 } // namespace Solvers
 } // namespace TNL
diff --git a/src/TNL/Solvers/CMakeLists.txt b/src/TNL/Solvers/CMakeLists.txt
index e648956cdb6f9535e77da8ae1e2a3f0e8b6a6818..e1df958fb903f52c9c8f03991a0fe08feb8af1f3 100644
--- a/src/TNL/Solvers/CMakeLists.txt
+++ b/src/TNL/Solvers/CMakeLists.txt
@@ -2,11 +2,11 @@ ADD_SUBDIRECTORY( Linear )
 ADD_SUBDIRECTORY( ODE )
 ADD_SUBDIRECTORY( PDE )
 
-
 SET( headers IterativeSolver.h
              IterativeSolver_impl.h
              BuildConfigTags.h
              FastBuildConfigTag.h
+             LinearSolverTypeResolver.h
              Solver.h
              Solver_impl.h
              SolverStarter.h
@@ -20,23 +20,4 @@ SET( headers IterativeSolver.h
              IterativeSolverMonitor.h
              IterativeSolverMonitor_impl.h )
 
-SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/TNL/Solvers )
-set( common_SOURCES ${CURRENT_DIR}/IterativeSolver_impl.cpp )
-
-if( BUILD_CUDA)
-      set( tnl_solvers_CUDA__SOURCES
-        ${tnl_solvers_linear_CUDA__SOURCES}
-        ${tnl_solvers_ode_CUDA__SOURCES}
-        ${tnl_solvers_pde_CUDA__SOURCES}
-        ${common_SOURCES}
-        PARENT_SCOPE )
-endif()
-
-set( tnl_solvers_SOURCES
-     ${tnl_solvers_linear_SOURCES}
-     ${tnl_solvers_ode_SOURCES}
-     ${tnl_solvers_pde_SOURCES}
-     ${common_SOURCES}
-     PARENT_SCOPE )
-   
 INSTALL( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Solvers )
diff --git a/src/TNL/Solvers/DummyProblem.h b/src/TNL/Solvers/DummyProblem.h
index f1f78d7b52b300268af4ddaaeed024db40800873..a0029cfea03555baed36e2da2f2e81e25d4b42c7 100644
--- a/src/TNL/Solvers/DummyProblem.h
+++ b/src/TNL/Solvers/DummyProblem.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Meshes/Grid.h>
@@ -33,7 +33,7 @@ class DummyProblem
       typedef Containers::Vector< Real, Device, Index > DofVectorType;
       typedef Meshes::Grid< 1, Real, Device, Index > MeshType;
       using CommonDataType = Problems::CommonData;
-      using CommonDataPointer = SharedPointer< CommonDataType, Device >;
+      using CommonDataPointer = Pointers::SharedPointer< CommonDataType, Device >;
       using CommunicatorType = Communicators::NoDistrCommunicator;
       
       static constexpr bool isTimeDependent(){ return true; };      
diff --git a/src/TNL/Solvers/IterativeSolver_impl.cpp b/src/TNL/Solvers/IterativeSolver_impl.cpp
deleted file mode 100644
index caf09d226b9a592872b0d41d6c4da9204d8bc211..0000000000000000000000000000000000000000
--- a/src/TNL/Solvers/IterativeSolver_impl.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-/***************************************************************************
-                          IterativeSolver_impl.cpp  -  description
-                             -------------------
-    begin                : Mar 17, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Solvers/IterativeSolver.h>
-
-namespace TNL {
-namespace Solvers {   
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-template class IterativeSolver< float,  int >;
-template class IterativeSolver< double, int >;
-template class IterativeSolver< float,  long int >;
-template class IterativeSolver< double, long int >;
-
-#ifdef HAVE_CUDA
-template class IterativeSolver< float,  int >;
-template class IterativeSolver< double, int >;
-template class IterativeSolver< float,  long int >;
-template class IterativeSolver< double, long int >;
-#endif
-
-#endif
-} // namespace Solvers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Solvers/IterativeSolver_impl.h b/src/TNL/Solvers/IterativeSolver_impl.h
index 8ca9f4eed110507dc2db783187419b6281d0da8e..e7430bb1051167ae2ce260a8179350a16b3fcbe4 100644
--- a/src/TNL/Solvers/IterativeSolver_impl.h
+++ b/src/TNL/Solvers/IterativeSolver_impl.h
@@ -224,22 +224,5 @@ void IterativeSolver< Real, Index> :: refreshSolverMonitor( bool force )
    }
 }
 
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-extern template class IterativeSolver< float,  int >;
-extern template class IterativeSolver< double, int >;
-extern template class IterativeSolver< float,  long int >;
-extern template class IterativeSolver< double, long int >;
-
-#ifdef HAVE_CUDA
-extern template class IterativeSolver< float,  int >;
-extern template class IterativeSolver< double, int >;
-extern template class IterativeSolver< float,  long int >;
-extern template class IterativeSolver< double, long int >;
-#endif
-
-#endif
-
 } // namespace Solvers
 } // namespace TNL
diff --git a/src/TNL/Solvers/Linear/BICGStab.h b/src/TNL/Solvers/Linear/BICGStab.h
index 02d17965e890123968bf6059082cba0164e16f71..03b5f70b74bee7d744be5219c0b6481e2e8b1576 100644
--- a/src/TNL/Solvers/Linear/BICGStab.h
+++ b/src/TNL/Solvers/Linear/BICGStab.h
@@ -10,38 +10,25 @@
 
 #pragma once
 
-#include <math.h>
-#include <TNL/Object.h>
-#include <TNL/SharedPointer.h>
+#include "LinearSolver.h"
+
 #include <TNL/Containers/Vector.h>
-#include <TNL/Solvers/Linear/Preconditioners/Dummy.h>
-#include <TNL/Solvers/IterativeSolver.h>
-#include <TNL/Solvers/Linear/LinearResidueGetter.h>
 
 namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix,
-          typename Preconditioner = Preconditioners::Dummy< typename Matrix :: RealType,
-                                                            typename Matrix :: DeviceType,
-                                                            typename Matrix :: IndexType> >
-
-class BICGStab : public Object,
-                 public IterativeSolver< typename Matrix :: RealType,
-                                         typename Matrix :: IndexType >
+template< typename Matrix >
+class BICGStab
+: public LinearSolver< Matrix >
 {
-   public:
-
-   typedef typename Matrix::RealType RealType;
-   typedef typename Matrix::IndexType IndexType;
-   typedef typename Matrix::DeviceType DeviceType;
-   typedef Matrix MatrixType;
-   typedef Preconditioner PreconditionerType;
-   typedef SharedPointer< const MatrixType, DeviceType > MatrixPointer;
-   typedef SharedPointer< const PreconditionerType, DeviceType > PreconditionerPointer;
-
-   BICGStab();
+   using Base = LinearSolver< Matrix >;
+public:
+   using RealType = typename Base::RealType;
+   using DeviceType = typename Base::DeviceType;
+   using IndexType = typename Base::IndexType;
+   using VectorViewType = typename Base::VectorViewType;
+   using ConstVectorViewType = typename Base::ConstVectorViewType;
 
    String getType() const;
 
@@ -49,26 +36,16 @@ class BICGStab : public Object,
                             const String& prefix = "" );
 
    bool setup( const Config::ParameterContainer& parameters,
-               const String& prefix = "" );
+               const String& prefix = "" ) override;
 
-   void setMatrix( const MatrixPointer& matrix );
-
-   void setPreconditioner( const PreconditionerPointer& preconditioner );
-
-   template< typename Vector,
-             typename ResidueGetter = LinearResidueGetter< Matrix, Vector >  >
-   bool solve( const Vector& b, Vector& x );
-
-   protected:
+   bool solve( ConstVectorViewType b, VectorViewType x ) override;
 
+protected:
    void setSize( IndexType size );
 
-   bool exact_residue;
+   bool exact_residue = false;
 
    Containers::Vector< RealType, DeviceType, IndexType > r, r_ast, p, s, Ap, As, M_tmp;
-
-   MatrixPointer matrix;
-   PreconditionerPointer preconditioner;
 };
 
 } // namespace Linear
diff --git a/src/TNL/Solvers/Linear/BICGStabL.h b/src/TNL/Solvers/Linear/BICGStabL.h
index 124f70839950e5565a5fb9c0a6931c08abce4509..77311c442c65de86c41a65ba786fb75b870932eb 100644
--- a/src/TNL/Solvers/Linear/BICGStabL.h
+++ b/src/TNL/Solvers/Linear/BICGStabL.h
@@ -41,40 +41,25 @@
 
 #pragma once
 
-#include <math.h>
-#include <TNL/Object.h>
-#include <TNL/SharedPointer.h>
+#include "LinearSolver.h"
+
 #include <TNL/Containers/Vector.h>
-#include <TNL/Solvers/Linear/Preconditioners/Dummy.h>
-#include <TNL/Solvers/IterativeSolver.h>
-#include <TNL/Solvers/Linear/LinearResidueGetter.h>
 
 namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix,
-          typename Preconditioner = Preconditioners::Dummy< typename Matrix :: RealType,
-                                                            typename Matrix :: DeviceType,
-                                                            typename Matrix :: IndexType> >
-
+template< typename Matrix >
 class BICGStabL
-   : public Object,
-     public IterativeSolver< typename Matrix :: RealType,
-                             typename Matrix :: IndexType >
+: public LinearSolver< Matrix >
 {
+   using Base = LinearSolver< Matrix >;
 public:
-   typedef typename Matrix::RealType RealType;
-   typedef typename Matrix::IndexType IndexType;
-   typedef typename Matrix::DeviceType DeviceType;
-   typedef Matrix MatrixType;
-   typedef Preconditioner PreconditionerType;
-   typedef SharedPointer< const MatrixType, DeviceType > MatrixPointer;
-   typedef SharedPointer< const PreconditionerType, DeviceType > PreconditionerPointer;
-   typedef Containers::Vector< RealType, DeviceType, IndexType > DeviceVector;
-   typedef Containers::Vector< RealType, Devices::Host, IndexType > HostVector;
-
-   BICGStabL();
+   using RealType = typename Base::RealType;
+   using DeviceType = typename Base::DeviceType;
+   using IndexType = typename Base::IndexType;
+   using VectorViewType = typename Base::VectorViewType;
+   using ConstVectorViewType = typename Base::ConstVectorViewType;
 
    String getType() const;
 
@@ -82,17 +67,14 @@ public:
                             const String& prefix = "" );
 
    bool setup( const Config::ParameterContainer& parameters,
-               const String& prefix = "" );
-
-   void setMatrix( const MatrixPointer& matrix );
+               const String& prefix = "" ) override;
 
-   void setPreconditioner( const PreconditionerPointer& preconditioner );
-
-   template< typename Vector,
-             typename ResidueGetter = LinearResidueGetter< Matrix, Vector >  >
-   bool solve( const Vector& b, Vector& x );
+   bool solve( ConstVectorViewType b, VectorViewType x ) override;
 
 protected:
+   using DeviceVector = Containers::Vector< RealType, DeviceType, IndexType >;
+   using HostVector = Containers::Vector< RealType, Devices::Host, IndexType >;
+
    void setSize( IndexType size );
 
    int ell = 1;
@@ -107,9 +89,6 @@ protected:
    HostVector T, sigma, g_0, g_1, g_2;
 
    IndexType size, ldSize;
-
-   MatrixPointer matrix;
-   PreconditionerPointer preconditioner;
 };
 
 } // namespace Linear
diff --git a/src/TNL/Solvers/Linear/BICGStabL_impl.h b/src/TNL/Solvers/Linear/BICGStabL_impl.h
index f448e7c8835c49dd72a31e4b8e689dd09d825f85..6606bddd561fa8a8d5b3ce5e8855b6e41541b546 100644
--- a/src/TNL/Solvers/Linear/BICGStabL_impl.h
+++ b/src/TNL/Solvers/Linear/BICGStabL_impl.h
@@ -20,74 +20,41 @@ namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix,
-          typename Preconditioner >
-BICGStabL< Matrix, Preconditioner >::BICGStabL()
-{
-   /****
-    * Clearing the shared pointer means that there is no
-    * preconditioner set.
-    */
-   this->preconditioner.clear();
-}
-
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 String
-BICGStabL< Matrix, Preconditioner >::getType() const
+BICGStabL< Matrix >::getType() const
 {
    return String( "BICGStabL< " ) +
           this->matrix -> getType() + ", " +
           this->preconditioner -> getType() + " >";
 }
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 void
-BICGStabL< Matrix, Preconditioner >::
+BICGStabL< Matrix >::
 configSetup( Config::ConfigDescription& config,
              const String& prefix )
 {
-   //IterativeSolver< RealType, IndexType >::configSetup( config, prefix );
    config.addEntry< int >( prefix + "bicgstab-ell", "Number of Bi-CG iterations before the MR part starts.", 1 );
    config.addEntry< bool >( prefix + "bicgstab-exact-residue", "Whether the BiCGstab should compute the exact residue in each step (true) or to use a cheap approximation (false).", false );
 }
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 bool
-BICGStabL< Matrix, Preconditioner >::
+BICGStabL< Matrix >::
 setup( const Config::ParameterContainer& parameters,
        const String& prefix )
 {
    ell = parameters.getParameter< int >( "bicgstab-ell" );
    exact_residue = parameters.getParameter< bool >( "bicgstab-exact-residue" );
-   return IterativeSolver< RealType, IndexType >::setup( parameters, prefix );
-}
-
-template< typename Matrix,
-          typename Preconditioner >
-void
-BICGStabL< Matrix, Preconditioner >::setMatrix( const MatrixPointer& matrix )
-{
-   this->matrix = matrix;
-}
-
-template< typename Matrix,
-          typename Preconditioner >
-void
-BICGStabL< Matrix, Preconditioner >::setPreconditioner( const PreconditionerPointer& preconditioner )
-{
-   this->preconditioner = preconditioner;
+   return LinearSolver< Matrix >::setup( parameters, prefix );
 }
 
-template< typename Matrix,
-          typename Preconditioner >
-   template< typename Vector, typename ResidueGetter >
+template< typename Matrix >
 bool
-BICGStabL< Matrix, Preconditioner >::solve( const Vector& b, Vector& x )
+BICGStabL< Matrix >::solve( ConstVectorViewType b, VectorViewType x )
 {
-   this->setSize( matrix->getRows() );
+   this->setSize( this->matrix->getRows() );
 
    RealType alpha, beta, gamma, rho_0, rho_1, omega, b_norm;
    DeviceVector r_0, r_j, r_i, u_0, Au, u;
@@ -96,26 +63,26 @@ BICGStabL< Matrix, Preconditioner >::solve( const Vector& b, Vector& x )
 
    auto matvec = [this]( const DeviceVector& src, DeviceVector& dst )
    {
-      if( preconditioner ) {
-         matrix->vectorProduct( src, M_tmp );
-         preconditioner->solve( M_tmp, dst );
+      if( this->preconditioner ) {
+         this->matrix->vectorProduct( src, M_tmp );
+         this->preconditioner->solve( M_tmp, dst );
       }
       else {
-         matrix->vectorProduct( src, dst );
+         this->matrix->vectorProduct( src, dst );
       }
    };
 
-   if( preconditioner ) {
-      preconditioner->solve( b, M_tmp );
+   if( this->preconditioner ) {
+      this->preconditioner->solve( b, M_tmp );
       b_norm = M_tmp.lpNorm( ( RealType ) 2.0 );
 
-      matrix->vectorProduct( x, M_tmp );
+      this->matrix->vectorProduct( x, M_tmp );
       M_tmp.addVector( b, 1.0, -1.0 );
-      preconditioner->solve( M_tmp, r_0 );
+      this->preconditioner->solve( M_tmp, r_0 );
    }
    else {
       b_norm = b.lpNorm( 2.0 );
-      matrix->vectorProduct( x, r_0 );
+      this->matrix->vectorProduct( x, r_0 );
       r_0.addVector( b, 1.0, -1.0 );
    }
 
@@ -274,13 +241,13 @@ BICGStabL< Matrix, Preconditioner >::solve( const Vector& b, Vector& x )
          /****
           * Compute the exact preconditioned residue into the 's' vector.
           */
-         if( preconditioner ) {
-            matrix->vectorProduct( x, M_tmp );
+         if( this->preconditioner ) {
+            this->matrix->vectorProduct( x, M_tmp );
             M_tmp.addVector( b, 1.0, -1.0 );
-            preconditioner->solve( M_tmp, res_tmp );
+            this->preconditioner->solve( M_tmp, res_tmp );
          }
          else {
-            matrix->vectorProduct( x, res_tmp );
+            this->matrix->vectorProduct( x, res_tmp );
             res_tmp.addVector( b, 1.0, -1.0 );
          }
          sigma[ 0 ] = res_tmp.lpNorm( 2.0 );
@@ -299,10 +266,9 @@ BICGStabL< Matrix, Preconditioner >::solve( const Vector& b, Vector& x )
    return this->checkConvergence();
 }
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 void
-BICGStabL< Matrix, Preconditioner >::setSize( IndexType size )
+BICGStabL< Matrix >::setSize( IndexType size )
 {
    this->size = ldSize = size;
    R.setSize( (ell + 1) * ldSize );
diff --git a/src/TNL/Solvers/Linear/BICGStab_impl.h b/src/TNL/Solvers/Linear/BICGStab_impl.h
index adf744b50da033cd102b778189eacd68a22f9a38..86702b310365e9cda06ca4435122ce3f329d5982 100644
--- a/src/TNL/Solvers/Linear/BICGStab_impl.h
+++ b/src/TNL/Solvers/Linear/BICGStab_impl.h
@@ -10,89 +10,59 @@
 
 #pragma once
 
+#include <cmath>
+
 #include "BICGStab.h"
 
 namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix,
-          typename Preconditioner >
-BICGStab< Matrix, Preconditioner > :: BICGStab()
-: exact_residue( false )
-{
-   /****
-    * Clearing the shared pointer means that there is no
-    * preconditioner set.
-    */
-   this->preconditioner.clear();   
-}
-
-template< typename Matrix,
-          typename Preconditioner >
-String BICGStab< Matrix, Preconditioner > :: getType() const
+template< typename Matrix >
+String BICGStab< Matrix > :: getType() const
 {
    return String( "BICGStab< " ) +
           this->matrix -> getType() + ", " +
           this->preconditioner -> getType() + " >";
 }
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 void
-BICGStab< Matrix, Preconditioner >::
+BICGStab< Matrix >::
 configSetup( Config::ConfigDescription& config,
              const String& prefix )
 {
-   //IterativeSolver< RealType, IndexType >::configSetup( config, prefix );
    config.addEntry< bool >( prefix + "bicgstab-exact-residue", "Whether the BiCGstab should compute the exact residue in each step (true) or to use a cheap approximation (false).", false );
 }
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 bool
-BICGStab< Matrix, Preconditioner >::
+BICGStab< Matrix >::
 setup( const Config::ParameterContainer& parameters,
        const String& prefix )
 {
    exact_residue = parameters.getParameter< bool >( "bicgstab-exact-residue" );
-   return IterativeSolver< RealType, IndexType >::setup( parameters, prefix );
-}
-
-template< typename Matrix,
-          typename Preconditioner >
-void BICGStab< Matrix, Preconditioner >::setMatrix( const MatrixPointer& matrix )
-{
-   this->matrix = matrix;
-}
-
-template< typename Matrix,
-          typename Preconditioner >
-void BICGStab< Matrix, Preconditioner > :: setPreconditioner( const PreconditionerPointer& preconditioner )
-{
-   this->preconditioner = preconditioner;
+   return LinearSolver< Matrix >::setup( parameters, prefix );
 }
 
-template< typename Matrix,
-          typename Preconditioner >
-   template< typename Vector, typename ResidueGetter >
-bool BICGStab< Matrix, Preconditioner >::solve( const Vector& b, Vector& x )
+template< typename Matrix >
+bool BICGStab< Matrix >::solve( ConstVectorViewType b, VectorViewType x )
 {
-   this->setSize( matrix->getRows() );
+   this->setSize( this->matrix->getRows() );
 
    RealType alpha, beta, omega, aux, rho, rho_old, b_norm;
 
-   if( preconditioner ) {
-      preconditioner->solve( b, M_tmp );
+   if( this->preconditioner ) {
+      this->preconditioner->solve( b, M_tmp );
       b_norm = M_tmp.lpNorm( ( RealType ) 2.0 );
 
-      matrix->vectorProduct( x, M_tmp );
+      this->matrix->vectorProduct( x, M_tmp );
       M_tmp.addVector( b, 1.0, -1.0 );
-      preconditioner->solve( M_tmp, r );
+      this->preconditioner->solve( M_tmp, r );
    }
    else {
       b_norm = b.lpNorm( 2.0 );
-      matrix->vectorProduct( x, r );
+      this->matrix->vectorProduct( x, r );
       r.addVector( b, 1.0, -1.0 );
    }
 
@@ -111,12 +81,12 @@ bool BICGStab< Matrix, Preconditioner >::solve( const Vector& b, Vector& x )
       /****
        * alpha_j = ( r_j, r^ast_0 ) / ( A * p_j, r^ast_0 )
        */
-      if( preconditioner ) {
-         matrix->vectorProduct( p, M_tmp );
-         preconditioner->solve( M_tmp, Ap );
+      if( this->preconditioner ) {
+         this->matrix->vectorProduct( p, M_tmp );
+         this->preconditioner->solve( M_tmp, Ap );
       }
       else {
-         matrix->vectorProduct( p, Ap );
+         this->matrix->vectorProduct( p, Ap );
       }
       aux = Ap.scalarProduct( r_ast );
       alpha = rho / aux;
@@ -129,12 +99,12 @@ bool BICGStab< Matrix, Preconditioner >::solve( const Vector& b, Vector& x )
       /****
        * omega_j = ( A s_j, s_j ) / ( A s_j, A s_j )
        */
-      if( preconditioner ) {
-         matrix->vectorProduct( s, M_tmp );
-         preconditioner->solve( M_tmp, As );
+      if( this->preconditioner ) {
+         this->matrix->vectorProduct( s, M_tmp );
+         this->preconditioner->solve( M_tmp, As );
       }
       else {
-         matrix->vectorProduct( s, As );
+         this->matrix->vectorProduct( s, As );
       }
       aux = As.lpNorm( 2.0 );
       omega = As.scalarProduct( s ) / ( aux * aux );
@@ -165,13 +135,13 @@ bool BICGStab< Matrix, Preconditioner >::solve( const Vector& b, Vector& x )
          /****
           * Compute the exact preconditioned residue into the 's' vector.
           */
-         if( preconditioner ) {
-            matrix->vectorProduct( x, M_tmp );
+         if( this->preconditioner ) {
+            this->matrix->vectorProduct( x, M_tmp );
             M_tmp.addVector( b, 1.0, -1.0 );
-            preconditioner->solve( M_tmp, s );
+            this->preconditioner->solve( M_tmp, s );
          }
          else {
-            matrix->vectorProduct( x, s );
+            this->matrix->vectorProduct( x, s );
             s.addVector( b, 1.0, -1.0 );
          }
          const RealType residue = s.lpNorm( 2.0 );
@@ -190,9 +160,8 @@ bool BICGStab< Matrix, Preconditioner >::solve( const Vector& b, Vector& x )
    return this->checkConvergence();
 }
 
-template< typename Matrix,
-          typename Preconditioner >
-void BICGStab< Matrix, Preconditioner > :: setSize( IndexType size )
+template< typename Matrix >
+void BICGStab< Matrix > :: setSize( IndexType size )
 {
    r.setSize( size );
    r_ast.setSize( size );
diff --git a/src/TNL/Solvers/Linear/CG.h b/src/TNL/Solvers/Linear/CG.h
index 670303873d401e489ae55740b35905ab1914371b..146dd7947226f700257bc24e6a3985cf941094de 100644
--- a/src/TNL/Solvers/Linear/CG.h
+++ b/src/TNL/Solvers/Linear/CG.h
@@ -10,63 +10,34 @@
 
 #pragma once
 
-#include <math.h>
-#include <TNL/Object.h>
-#include <TNL/SharedPointer.h>
+#include "LinearSolver.h"
+
 #include <TNL/Containers/Vector.h>
-#include <TNL/Solvers/Linear/Preconditioners/Dummy.h>
-#include <TNL/Solvers/IterativeSolver.h>
-#include <TNL/Solvers/Linear/LinearResidueGetter.h>
 
 namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix,
-          typename Preconditioner = Preconditioners::Dummy< typename Matrix :: RealType,
-                                                            typename Matrix :: DeviceType,
-                                                            typename Matrix :: IndexType> >
-class CG : public Object,
-           public IterativeSolver< typename Matrix :: RealType,
-                                   typename Matrix :: IndexType >
+template< typename Matrix >
+class CG
+: public LinearSolver< Matrix >
 {
-   public:
-
-   typedef typename Matrix::RealType RealType;
-   typedef typename Matrix::IndexType IndexType;
-   typedef typename Matrix::DeviceType DeviceType;
-   typedef Matrix MatrixType;
-   typedef Preconditioner PreconditionerType;
-   typedef SharedPointer< const MatrixType, DeviceType > MatrixPointer;
-   typedef SharedPointer< const PreconditionerType, DeviceType > PreconditionerPointer;
-
+   using Base = LinearSolver< Matrix >;
+public:
+   using RealType = typename Base::RealType;
+   using DeviceType = typename Base::DeviceType;
+   using IndexType = typename Base::IndexType;
+   using VectorViewType = typename Base::VectorViewType;
+   using ConstVectorViewType = typename Base::ConstVectorViewType;
 
-   CG();
- 
    String getType() const;
 
-   static void configSetup( Config::ConfigDescription& config,
-                            const String& prefix = "" );
-
-   bool setup( const Config::ParameterContainer& parameters,
-               const String& prefix = "" );
-
-   void setMatrix( const MatrixPointer& matrix );
-
-   void setPreconditioner( const PreconditionerPointer& preconditioner );
-
-   template< typename Vector,
-             typename ResidueGetter = LinearResidueGetter< Matrix, Vector >  >
-   bool solve( const Vector& b, Vector& x );
-
-   protected:
+   bool solve( ConstVectorViewType b, VectorViewType x ) override;
 
+protected:
    void setSize( IndexType size );
 
    Containers::Vector< RealType, DeviceType, IndexType >  r, new_r, p, Ap;
-
-   MatrixPointer matrix;
-   PreconditionerPointer preconditioner;
 };
 
 } // namespace Linear
diff --git a/src/TNL/Solvers/Linear/CG_impl.h b/src/TNL/Solvers/Linear/CG_impl.h
index 4c11c224e5c0b4f73c45a82146862473b9fc6488..8889451f2d32809a7dcf2a1dcb245b8ecfb25cf9 100644
--- a/src/TNL/Solvers/Linear/CG_impl.h
+++ b/src/TNL/Solvers/Linear/CG_impl.h
@@ -12,72 +12,26 @@
 
 #include "CG.h"
 
+#include <TNL/Solvers/Linear/LinearResidueGetter.h>
+
 namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix,
-          typename Preconditioner >
-CG< Matrix, Preconditioner > :: CG()
-{
-   /****
-    * Clearing the shared pointer means that there is no
-    * preconditioner set.
-    */
-   this->preconditioner.clear();   
-}
-
-template< typename Matrix,
-           typename Preconditioner >
-String CG< Matrix, Preconditioner > :: getType() const
+template< typename Matrix >
+String CG< Matrix > :: getType() const
 {
    return String( "CG< " ) +
           this->matrix -> getType() + ", " +
           this->preconditioner -> getType() + " >";
 }
 
-template< typename Matrix,
-          typename Preconditioner >
-void
-CG< Matrix, Preconditioner >::
-configSetup( Config::ConfigDescription& config,
-             const String& prefix )
-{
-   //IterativeSolver< RealType, IndexType >::configSetup( config, prefix );
-}
-
-template< typename Matrix,
-          typename Preconditioner >
-bool
-CG< Matrix, Preconditioner >::
-setup( const Config::ParameterContainer& parameters,
-       const String& prefix )
-{
-   return IterativeSolver< RealType, IndexType >::setup( parameters, prefix );
-}
-
-template< typename Matrix,
-          typename Preconditioner >
-void CG< Matrix, Preconditioner >::setMatrix( const MatrixPointer& matrix )
-{
-   this->matrix = matrix;
-}
-
-template< typename Matrix,
-          typename Preconditioner >
-void CG< Matrix, Preconditioner > :: setPreconditioner( const PreconditionerPointer& preconditioner )
-{
-   this->preconditioner = preconditioner;
-}
-
-template< typename Matrix,
-          typename Preconditioner >
-   template< typename Vector, typename ResidueGetter >
+template< typename Matrix >
 bool
-CG< Matrix, Preconditioner >::
-solve( const Vector& b, Vector& x )
+CG< Matrix >::
+solve( ConstVectorViewType b, VectorViewType x )
 {
-   this->setSize( matrix->getRows() );
+   this->setSize( this->matrix->getRows() );
 
    this->resetIterations();
    this->setResidue( this->getConvergenceResidue() + 1.0 );
@@ -107,12 +61,12 @@ solve( const Vector& b, Vector& x )
        */
       if( s2 == 0.0 ) alpha = 0.0;
       else alpha = s1 / s2;
- 
+
       /****
        * 2. x_{j+1} = x_j + \alpha_j p_j
        */
       x.addVector( p, alpha );
-      
+
       /****
        * 3. r_{j+1} = r_j - \alpha_j A * p_j
        */
@@ -140,18 +94,17 @@ solve( const Vector& b, Vector& x )
        * 6. r_{j+1} = new_r
        */
       new_r.swap( r );
- 
+
       if( this->getIterations() % 10 == 0 )
-         this->setResidue( ResidueGetter::getResidue( *matrix, x, b, bNorm ) );
+         this->setResidue( LinearResidueGetter::getResidue( *this->matrix, x, b, bNorm ) );
    }
-   this->setResidue( ResidueGetter::getResidue( *matrix, x, b, bNorm ) );
+   this->setResidue( LinearResidueGetter::getResidue( *this->matrix, x, b, bNorm ) );
    this->refreshSolverMonitor( true );
    return this->checkConvergence();
 }
 
-template< typename Matrix,
-          typename Preconditioner >
-void CG< Matrix, Preconditioner > :: setSize( IndexType size )
+template< typename Matrix >
+void CG< Matrix > :: setSize( IndexType size )
 {
    r.setSize( size );
    new_r.setSize( size );
diff --git a/src/TNL/Solvers/Linear/CMakeLists.txt b/src/TNL/Solvers/Linear/CMakeLists.txt
index 3598f8dc6d9863fec4dec15a2d5cef8130e6cc99..f44f0a095ffc5f305b91dfb162e336b407095eae 100644
--- a/src/TNL/Solvers/Linear/CMakeLists.txt
+++ b/src/TNL/Solvers/Linear/CMakeLists.txt
@@ -13,6 +13,7 @@ SET( headers BICGStab.h
              Jacobi.h
              LinearResidueGetter.h
              LinearResidueGetter_impl.h
+             LinearSolver.h
              SOR.h
              SOR_impl.h
              TFQMR.h
diff --git a/src/TNL/Solvers/Linear/CWYGMRES.h b/src/TNL/Solvers/Linear/CWYGMRES.h
index fd7b4dbef3af1fe71202d6942403ba244a2d43d3..1cccc132dd1818d152afe960ecada4d359eecec1 100644
--- a/src/TNL/Solvers/Linear/CWYGMRES.h
+++ b/src/TNL/Solvers/Linear/CWYGMRES.h
@@ -12,41 +12,25 @@
 
 #pragma once
 
-#include <math.h>
-#include <TNL/Object.h>
-#include <TNL/SharedPointer.h>
+#include "LinearSolver.h"
+
 #include <TNL/Containers/Vector.h>
-#include <TNL/Solvers/Linear/Preconditioners/Dummy.h>
-#include <TNL/Solvers/IterativeSolver.h>
-#include <TNL/Solvers/Linear/LinearResidueGetter.h>
 
 namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix,
-          typename Preconditioner = Preconditioners::Dummy< typename Matrix::RealType,
-                                                            typename Matrix::DeviceType,
-                                                            typename Matrix::IndexType> >
+template< typename Matrix >
 class CWYGMRES
-: public Object,
-  public IterativeSolver< typename Matrix::RealType,
-                          typename Matrix::IndexType >
+: public LinearSolver< Matrix >
 {
+   using Base = LinearSolver< Matrix >;
 public:
-   typedef typename Matrix::RealType RealType;
-   typedef typename Matrix::IndexType IndexType;
-   typedef typename Matrix::DeviceType DeviceType;
-   typedef Matrix MatrixType;
-   typedef Preconditioner PreconditionerType;
-   typedef SharedPointer< const MatrixType, DeviceType > MatrixPointer;
-   typedef SharedPointer< const PreconditionerType, DeviceType > PreconditionerPointer;
-   typedef Containers::Vector< RealType, DeviceType, IndexType > DeviceVector;
-   typedef Containers::Vector< RealType, Devices::Host, IndexType > HostVector;
-
-   CWYGMRES();
-
-   ~CWYGMRES();
+   using RealType = typename Base::RealType;
+   using DeviceType = typename Base::DeviceType;
+   using IndexType = typename Base::IndexType;
+   using VectorViewType = typename Base::VectorViewType;
+   using ConstVectorViewType = typename Base::ConstVectorViewType;
 
    String getType() const;
 
@@ -54,19 +38,16 @@ public:
                             const String& prefix = "" );
 
    bool setup( const Config::ParameterContainer& parameters,
-               const String& prefix = "" );
+               const String& prefix = "" ) override;
 
    void setRestarting( IndexType rest );
 
-   void setMatrix( const MatrixPointer& matrix );
-
-   void setPreconditioner( const PreconditionerPointer& preconditioner );
-
-   template< typename Vector,
-             typename ResidueGetter = LinearResidueGetter< Matrix, Vector >  >
-   bool solve( const Vector& b, Vector& x );
+   bool solve( ConstVectorViewType b, VectorViewType x ) override;
 
 protected:
+   using DeviceVector = Containers::Vector< RealType, DeviceType, IndexType >;
+   using HostVector = Containers::Vector< RealType, Devices::Host, IndexType >;
+
    void hauseholder_generate( DeviceVector& Y,
                               HostVector& T,
                               const int& i,
@@ -119,10 +100,12 @@ protected:
    // host-only storage for Givens rotations and the least squares problem
    HostVector cs, sn, H, s;
 
-   IndexType size, ldSize, restarting_min, restarting_max, restarting_step_min, restarting_step_max;
-
-   MatrixPointer matrix;
-   PreconditionerPointer preconditioner;
+   IndexType size = 0;
+   IndexType ldSize = 0;
+   IndexType restarting_min = 10;
+   IndexType restarting_max = 10;
+   IndexType restarting_step_min = 3;
+   IndexType restarting_step_max = 3;
 };
 
 } // namespace Linear
diff --git a/src/TNL/Solvers/Linear/CWYGMRES_impl.h b/src/TNL/Solvers/Linear/CWYGMRES_impl.h
index 4563411b5e860f88cefaa1d14b96f1b4ac247855..1f7d06c7221e1bb7588e817bf7940d7e099fbbe3 100644
--- a/src/TNL/Solvers/Linear/CWYGMRES_impl.h
+++ b/src/TNL/Solvers/Linear/CWYGMRES_impl.h
@@ -13,6 +13,7 @@
 #pragma once
 
 #include <type_traits>
+#include <cmath>
 
 #include <TNL/Exceptions/CudaSupportMissing.h>
 #include <TNL/Containers/Algorithms/Multireduction.h>
@@ -24,35 +25,9 @@ namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix,
-          typename Preconditioner >
-CWYGMRES< Matrix, Preconditioner >::
-CWYGMRES()
-: size( 0 ),
-  ldSize( 0 ),
-  restarting_min( 10 ),
-  restarting_max( 10 ),
-  restarting_step_min( 3 ),
-  restarting_step_max( 3 )
-{
-   /****
-    * Clearing the shared pointer means that there is no
-    * preconditioner set.
-    */
-   this->preconditioner.clear();   
-}
-
-template< typename Matrix,
-          typename Preconditioner >
-CWYGMRES< Matrix, Preconditioner >::
-~CWYGMRES()
-{
-}
-
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 String
-CWYGMRES< Matrix, Preconditioner >::
+CWYGMRES< Matrix >::
 getType() const
 {
    return String( "CWYGMRES< " ) +
@@ -60,39 +35,34 @@ getType() const
           this->preconditioner -> getType() + " >";
 }
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 void
-CWYGMRES< Matrix, Preconditioner >::
+CWYGMRES< Matrix >::
 configSetup( Config::ConfigDescription& config,
              const String& prefix )
 {
-   //IterativeSolver< RealType, IndexType >::configSetup( config, prefix );
    config.addEntry< int >( prefix + "gmres-restarting-min", "Minimal number of iterations after which the GMRES restarts.", 10 );
    config.addEntry< int >( prefix + "gmres-restarting-max", "Maximal number of iterations after which the GMRES restarts.", 10 );
    config.addEntry< int >( prefix + "gmres-restarting-step-min", "Minimal adjusting step for the adaptivity of the GMRES restarting parameter.", 3 );
    config.addEntry< int >( prefix + "gmres-restarting-step-max", "Maximal adjusting step for the adaptivity of the GMRES restarting parameter.", 3 );
 }
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 bool
-CWYGMRES< Matrix, Preconditioner >::
+CWYGMRES< Matrix >::
 setup( const Config::ParameterContainer& parameters,
        const String& prefix )
 {
-   IterativeSolver< RealType, IndexType >::setup( parameters, prefix );
    restarting_min = parameters.getParameter< int >( "gmres-restarting-min" );
    this->setRestarting( parameters.getParameter< int >( "gmres-restarting-max" ) );
    restarting_step_min = parameters.getParameter< int >( "gmres-restarting-step-min" );
    restarting_step_max = parameters.getParameter< int >( "gmres-restarting-step-max" );
-   return true;
+   return LinearSolver< Matrix >::setup( parameters, prefix );
 }
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 void
-CWYGMRES< Matrix, Preconditioner >::
+CWYGMRES< Matrix >::
 setRestarting( IndexType rest )
 {
    if( size != 0 )
@@ -100,32 +70,12 @@ setRestarting( IndexType rest )
    restarting_max = rest;
 }
 
-template< typename Matrix,
-          typename Preconditioner >
-void
-CWYGMRES< Matrix, Preconditioner >::
-setMatrix( const MatrixPointer& matrix )
-{
-   this->matrix = matrix;
-}
-
-template< typename Matrix,
-          typename Preconditioner >
-void
-CWYGMRES< Matrix, Preconditioner >::
-setPreconditioner( const PreconditionerPointer& preconditioner )
-{
-   this->preconditioner = preconditioner;
-}
-
-template< typename Matrix,
-          typename Preconditioner >
-   template< typename Vector, typename ResidueGetter >
+template< typename Matrix >
 bool
-CWYGMRES< Matrix, Preconditioner >::
-solve( const Vector& b, Vector& x )
+CWYGMRES< Matrix >::
+solve( ConstVectorViewType b, VectorViewType x )
 {
-   TNL_ASSERT_TRUE( matrix, "No matrix was set in CWYGMRES. Call setMatrix() before solve()." );
+   TNL_ASSERT_TRUE( this->matrix, "No matrix was set in CWYGMRES. Call setMatrix() before solve()." );
    if( restarting_min <= 0 || restarting_max <= 0 || restarting_min > restarting_max )
    {
       std::cerr << "Wrong value for the GMRES restarting parameters: r_min = " << restarting_min
@@ -138,25 +88,25 @@ solve( const Vector& b, Vector& x )
                 << ", d_max = " << restarting_step_max << std::endl;
       return false;
    }
-   setSize( matrix -> getRows(), restarting_max );
+   setSize( this->matrix->getRows(), restarting_max );
 
    RealType normb( 0.0 ), beta( 0.0 );
    /****
     * 1. Solve r from M r = b - A x_0
     */
-   if( preconditioner )
+   if( this->preconditioner )
    {
       this->preconditioner->solve( b, _M_tmp );
       normb = _M_tmp.lpNorm( ( RealType ) 2.0 );
 
-      matrix->vectorProduct( x, _M_tmp );
+      this->matrix->vectorProduct( x, _M_tmp );
       _M_tmp.addVector( b, ( RealType ) 1.0, -1.0 );
 
       this->preconditioner->solve( _M_tmp, r );
    }
    else
    {
-      matrix->vectorProduct( x, r );
+      this->matrix->vectorProduct( x, r );
       normb = b.lpNorm( ( RealType ) 2.0 );
       r.addVector( b, ( RealType ) 1.0, -1.0 );
    }
@@ -255,13 +205,13 @@ solve( const Vector& b, Vector& x )
             /****
              * Solve w from M w = A v_i
              */
-            if( preconditioner )
+            if( this->preconditioner )
             {
-               matrix->vectorProduct( vi, _M_tmp );
+               this->matrix->vectorProduct( vi, _M_tmp );
                this->preconditioner->solve( _M_tmp, w );
             }
             else
-                matrix -> vectorProduct( vi, w );
+                this->matrix->vectorProduct( vi, w );
 
             /****
              * Apply all previous Hauseholder transformations, using the compact WY representation:
@@ -335,15 +285,15 @@ solve( const Vector& b, Vector& x )
        * r = M.solve(b - A * x);
        */
       const RealType beta_old = beta;
-      if( preconditioner )
+      if( this->preconditioner )
       {
-         matrix->vectorProduct( x, _M_tmp );
+         this->matrix->vectorProduct( x, _M_tmp );
          _M_tmp.addVector( b, ( RealType ) 1.0, -1.0 );
-         preconditioner->solve( _M_tmp, r );
+         this->preconditioner->solve( _M_tmp, r );
       }
       else
       {
-         matrix->vectorProduct( x, r );
+         this->matrix->vectorProduct( x, r );
          r.addVector( b, ( RealType ) 1.0, -1.0 );
       }
       beta = r.lpNorm( ( RealType ) 2.0 );
@@ -387,10 +337,9 @@ copyTruncatedVectorKernel( DestinationElement* destination,
 }
 #endif
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 void
-CWYGMRES< Matrix, Preconditioner >::
+CWYGMRES< Matrix >::
 hauseholder_generate( DeviceVector& Y,
                       HostVector& T,
                       const int& i,
@@ -471,10 +420,9 @@ hauseholder_generate( DeviceVector& Y,
    }
 }
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 void
-CWYGMRES< Matrix, Preconditioner >::
+CWYGMRES< Matrix >::
 hauseholder_apply_trunc( HostVector& out,
                          DeviceVector& Y,
                          HostVector& T,
@@ -505,10 +453,9 @@ hauseholder_apply_trunc( HostVector& out,
    }
 }
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 void
-CWYGMRES< Matrix, Preconditioner >::
+CWYGMRES< Matrix >::
 hauseholder_cwy( DeviceVector& v,
                  DeviceVector& Y,
                  HostVector& T,
@@ -542,10 +489,9 @@ hauseholder_cwy( DeviceVector& v,
    v.setElement( i, 1.0 + v.getElement( i ) );
 }
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 void
-CWYGMRES< Matrix, Preconditioner >::
+CWYGMRES< Matrix >::
 hauseholder_cwy_transposed( DeviceVector& z,
                             DeviceVector& Y,
                             HostVector& T,
@@ -584,11 +530,10 @@ hauseholder_cwy_transposed( DeviceVector& z,
                                          1.0, z.getData() );
 }
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
    template< typename Vector >
 void
-CWYGMRES< Matrix, Preconditioner >::
+CWYGMRES< Matrix >::
 update( IndexType k,
         IndexType m,
         const HostVector& H,
@@ -628,10 +573,9 @@ update( IndexType k,
                                          1.0, x.getData() );
 }
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 void
-CWYGMRES< Matrix, Preconditioner >::
+CWYGMRES< Matrix >::
 generatePlaneRotation( RealType& dx,
                        RealType& dy,
                        RealType& cs,
@@ -657,9 +601,8 @@ generatePlaneRotation( RealType& dx,
       }
 }
 
-template< typename Matrix,
-          typename Preconditioner >
-void CWYGMRES< Matrix, Preconditioner > ::
+template< typename Matrix >
+void CWYGMRES< Matrix > ::
 applyPlaneRotation( RealType& dx,
                     RealType& dy,
                     RealType& cs,
@@ -670,9 +613,8 @@ applyPlaneRotation( RealType& dx,
    dx = temp;
 }
 
-template< typename Matrix,
-          typename Preconditioner >
-void CWYGMRES< Matrix, Preconditioner > :: setSize( IndexType _size, IndexType m )
+template< typename Matrix >
+void CWYGMRES< Matrix > :: setSize( IndexType _size, IndexType m )
 {
    if( size == _size && restarting_max == m )
       return;
diff --git a/src/TNL/Solvers/Linear/GMRES.h b/src/TNL/Solvers/Linear/GMRES.h
index cedeef83c626663f9aac0b376c01735f3758f3c1..05c171f622a34e2715105c29882821cd8d46368b 100644
--- a/src/TNL/Solvers/Linear/GMRES.h
+++ b/src/TNL/Solvers/Linear/GMRES.h
@@ -10,37 +10,25 @@
 
 #pragma once
 
-#include <math.h>
-#include <TNL/Object.h>
-#include <TNL/SharedPointer.h>
+#include "LinearSolver.h"
+
 #include <TNL/Containers/Vector.h>
-#include <TNL/Solvers/Linear/Preconditioners/Dummy.h>
-#include <TNL/Solvers/IterativeSolver.h>
-#include <TNL/Solvers/Linear/LinearResidueGetter.h>
 
 namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix,
-          typename Preconditioner = Preconditioners::Dummy< typename Matrix :: RealType,
-                                                            typename Matrix :: DeviceType,
-                                                            typename Matrix :: IndexType> >
+template< typename Matrix >
 class GMRES
-: public Object,
-  public IterativeSolver< typename Matrix :: RealType,
-                          typename Matrix :: IndexType >
+: public LinearSolver< Matrix >
 {
+   using Base = LinearSolver< Matrix >;
 public:
-   typedef typename Matrix::RealType RealType;
-   typedef typename Matrix::IndexType IndexType;
-   typedef typename Matrix::DeviceType DeviceType;
-   typedef Matrix MatrixType;
-   typedef Preconditioner PreconditionerType;
-   typedef SharedPointer< const MatrixType, DeviceType > MatrixPointer;
-   typedef SharedPointer< const PreconditionerType, DeviceType > PreconditionerPointer;
-
-   GMRES();
+   using RealType = typename Base::RealType;
+   using DeviceType = typename Base::DeviceType;
+   using IndexType = typename Base::IndexType;
+   using VectorViewType = typename Base::VectorViewType;
+   using ConstVectorViewType = typename Base::ConstVectorViewType;
 
    String getType() const;
 
@@ -48,19 +36,11 @@ public:
                             const String& prefix = "" );
 
    bool setup( const Config::ParameterContainer& parameters,
-               const String& prefix = "" );
+               const String& prefix = "" ) override;
 
    void setRestarting( IndexType rest );
 
-   void setMatrix( const MatrixPointer& matrix );
-
-   void setPreconditioner( const PreconditionerPointer& preconditioner );
-
-   template< typename Vector,
-             typename ResidueGetter = LinearResidueGetter< Matrix, Vector > >
-   bool solve( const Vector& b, Vector& x );
-
-   ~GMRES();
+   bool solve( ConstVectorViewType b, VectorViewType x ) override;
 
 protected:
    template< typename VectorT >
@@ -87,11 +67,11 @@ protected:
    Containers::Vector< RealType, DeviceType, IndexType > _r, w, _v, _M_tmp;
    Containers::Vector< RealType, Devices::Host, IndexType > _s, _cs, _sn, _H;
 
-   IndexType size, restarting_min, restarting_max, restarting_step_min, restarting_step_max;
-
-   MatrixPointer matrix;
-   
-   PreconditionerPointer preconditioner;
+   IndexType size = 0;
+   IndexType restarting_min = 10;
+   IndexType restarting_max = 10;
+   IndexType restarting_step_min = 3;
+   IndexType restarting_step_max = 3;
 };
 
 } // namespace Linear
diff --git a/src/TNL/Solvers/Linear/GMRES_impl.h b/src/TNL/Solvers/Linear/GMRES_impl.h
index d3a20175926d9023228152f91a24ac04154fc80c..220cecf23e792ddff0fcfd30b8f3d14f560d8f78 100644
--- a/src/TNL/Solvers/Linear/GMRES_impl.h
+++ b/src/TNL/Solvers/Linear/GMRES_impl.h
@@ -10,40 +10,17 @@
 
 #pragma once
 
+#include <cmath>
+
 #include "GMRES.h"
 
 namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix,
-           typename Preconditioner >
-GMRES< Matrix, Preconditioner >::
-GMRES()
-: size( 0 ),
-  restarting_min( 10 ),
-  restarting_max( 10 ),
-  restarting_step_min( 3 ),
-  restarting_step_max( 3 )
-{
-   /****
-    * Clearing the shared pointer means that there is no
-    * preconditioner set.
-    */
-   this->preconditioner.clear();
-}
-
-template< typename Matrix,
-          typename Preconditioner >
-GMRES< Matrix, Preconditioner >::
-~GMRES()
-{
-}
-
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 String
-GMRES< Matrix, Preconditioner >::
+GMRES< Matrix >::
 getType() const
 {
    return String( "GMRES< " ) +
@@ -51,39 +28,34 @@ getType() const
           this->preconditioner -> getType() + " >";
 }
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 void
-GMRES< Matrix, Preconditioner >::
+GMRES< Matrix >::
 configSetup( Config::ConfigDescription& config,
              const String& prefix )
 {
-   //IterativeSolver< RealType, IndexType >::configSetup( config, prefix );
    config.addEntry< int >( prefix + "gmres-restarting-min", "Minimal number of iterations after which the GMRES restarts.", 10 );
    config.addEntry< int >( prefix + "gmres-restarting-max", "Maximal number of iterations after which the GMRES restarts.", 10 );
    config.addEntry< int >( prefix + "gmres-restarting-step-min", "Minimal adjusting step for the adaptivity of the GMRES restarting parameter.", 3 );
    config.addEntry< int >( prefix + "gmres-restarting-step-max", "Maximal adjusting step for the adaptivity of the GMRES restarting parameter.", 3 );
 }
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 bool
-GMRES< Matrix, Preconditioner >::
+GMRES< Matrix >::
 setup( const Config::ParameterContainer& parameters,
        const String& prefix )
 {
-   IterativeSolver< RealType, IndexType >::setup( parameters, prefix );
    restarting_min = parameters.getParameter< int >( "gmres-restarting-min" );
    this->setRestarting( parameters.getParameter< int >( "gmres-restarting-max" ) );
    restarting_step_min = parameters.getParameter< int >( "gmres-restarting-step-min" );
    restarting_step_max = parameters.getParameter< int >( "gmres-restarting-step-max" );
-   return true;
+   return LinearSolver< Matrix >::setup( parameters, prefix );
 }
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 void
-GMRES< Matrix, Preconditioner >::
+GMRES< Matrix >::
 setRestarting( IndexType rest )
 {
    if( size != 0 )
@@ -91,32 +63,12 @@ setRestarting( IndexType rest )
    restarting_max = rest;
 }
 
-template< typename Matrix,
-          typename Preconditioner >
-void
-GMRES< Matrix, Preconditioner >::
-setMatrix( const MatrixPointer& matrix )
-{
-   this->matrix = matrix;
-}
-
-template< typename Matrix,
-          typename Preconditioner >
-void
-GMRES< Matrix, Preconditioner >::
-setPreconditioner( const PreconditionerPointer& preconditioner )
-{
-   this->preconditioner = preconditioner;
-}
-
-template< typename Matrix,
-          typename Preconditioner >
-   template< typename Vector, typename ResidueGetter >
+template< typename Matrix >
 bool
-GMRES< Matrix, Preconditioner >::
-solve( const Vector& b, Vector& x )
+GMRES< Matrix >::
+solve( ConstVectorViewType b, VectorViewType x )
 {
-   TNL_ASSERT_TRUE( matrix, "No matrix was set in GMRES. Call setMatrix() before solve()." );
+   TNL_ASSERT_TRUE( this->matrix, "No matrix was set in GMRES. Call setMatrix() before solve()." );
    if( restarting_min <= 0 || restarting_max <= 0 || restarting_min > restarting_max )
    {
       std::cerr << "Wrong value for the GMRES restarting parameters: r_min = " << restarting_min
@@ -129,7 +81,7 @@ solve( const Vector& b, Vector& x )
                 << ", d_max = " << restarting_step_max << std::endl;
       return false;
    }
-   setSize( matrix -> getRows(), restarting_max );
+   setSize( this->matrix->getRows(), restarting_max );
 
    IndexType _size = size;
  
@@ -145,19 +97,19 @@ solve( const Vector& b, Vector& x )
    /****
     * 1. Solve r from M r = b - A x_0
     */
-   if( preconditioner )
+   if( this->preconditioner )
    {
       this->preconditioner->solve( b, _M_tmp );
       normb = _M_tmp.lpNorm( ( RealType ) 2.0 );
 
-      matrix -> vectorProduct( x, _M_tmp );
+      this->matrix->vectorProduct( x, _M_tmp );
       _M_tmp.addVector( b, ( RealType ) 1.0, -1.0 );
 
       this->preconditioner->solve( _M_tmp, _r );
    }
    else
    {
-      matrix -> vectorProduct( x, _r );
+      this->matrix->vectorProduct( x, _r );
       normb = b.lpNorm( ( RealType ) 2.0 );
       _r.addVector( b, ( RealType ) 1.0, -1.0 );
    }
@@ -232,13 +184,13 @@ solve( const Vector& b, Vector& x )
          /****
           * Solve w from M w = A v_i
           */
-         if( preconditioner )
+         if( this->preconditioner )
          {
-            matrix->vectorProduct( vi, _M_tmp );
+            this->matrix->vectorProduct( vi, _M_tmp );
             this->preconditioner->solve( _M_tmp, w );
          }
          else
-             matrix->vectorProduct( vi, w );
+             this->matrix->vectorProduct( vi, w );
  
          //cout << " i = " << i << " vi = " << vi << std::endl;
 
@@ -320,16 +272,16 @@ solve( const Vector& b, Vector& x )
        */
       const RealType beta_old = beta;
       beta = 0.0;
-      if( preconditioner )
+      if( this->preconditioner )
       {
-         matrix -> vectorProduct( x, _M_tmp );
+         this->matrix->vectorProduct( x, _M_tmp );
          _M_tmp.addVector( b, ( RealType ) 1.0, -1.0 );
-         preconditioner -> solve( _M_tmp, _r );
+         this->preconditioner->solve( _M_tmp, _r );
          beta = _r.lpNorm( ( RealType ) 2.0 );
       }
       else
       {
-         matrix -> vectorProduct( x, _r );
+         this->matrix->vectorProduct( x, _r );
          _r.addVector( b, ( RealType ) 1.0, -1.0 );
          beta = _r.lpNorm( ( RealType ) 2.0 );
       }
@@ -347,11 +299,10 @@ solve( const Vector& b, Vector& x )
    return this->checkConvergence();
 }
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
    template< typename VectorT >
 void
-GMRES< Matrix, Preconditioner >::
+GMRES< Matrix >::
 update( IndexType k,
         IndexType m,
         const Containers::Vector< RealType, Devices::Host, IndexType >& H,
@@ -383,10 +334,9 @@ update( IndexType k,
    }
 }
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 void
-GMRES< Matrix, Preconditioner >::
+GMRES< Matrix >::
 generatePlaneRotation( RealType& dx,
                        RealType& dy,
                        RealType& cs,
@@ -412,10 +362,9 @@ generatePlaneRotation( RealType& dx,
       }
 }
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 void
-GMRES< Matrix, Preconditioner >::
+GMRES< Matrix >::
 applyPlaneRotation( RealType& dx,
                     RealType& dy,
                     RealType& cs,
@@ -426,10 +375,9 @@ applyPlaneRotation( RealType& dx,
    dx = temp;
 }
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 void
-GMRES< Matrix, Preconditioner >::
+GMRES< Matrix >::
 setSize( IndexType _size, IndexType m )
 {
    if( size == _size && restarting_max == m )
diff --git a/src/TNL/Solvers/Linear/Jacobi.h b/src/TNL/Solvers/Linear/Jacobi.h
index 176eb625b54fb0c103f445531ec9144f726fac03..5288726713525cbc5911d497d219ee266bd89243 100644
--- a/src/TNL/Solvers/Linear/Jacobi.h
+++ b/src/TNL/Solvers/Linear/Jacobi.h
@@ -8,37 +8,30 @@
 
 #pragma once
 
-#include <TNL/Object.h>
-#include <TNL/Solvers/IterativeSolver.h>
-#include <TNL/Solvers/Linear/Preconditioners/Dummy.h>
+#include "LinearSolver.h"
+
+#include <TNL/Containers/Vector.h>
 #include <TNL/Solvers/Linear/LinearResidueGetter.h>
 
 namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix,
-          typename Preconditioner = Preconditioners::Dummy< typename Matrix::RealType,
-                                                            typename Matrix::DeviceType,
-                                                            typename Matrix::IndexType> >
-class Jacobi : public Object,
-               public IterativeSolver< typename Matrix::RealType,
-                                       typename Matrix::IndexType >
+template< typename Matrix >
+class Jacobi
+: public LinearSolver< Matrix >
 {
-   public:
-
-   typedef typename Matrix::RealType RealType;
-   typedef typename Matrix::IndexType IndexType;
-   typedef typename Matrix::DeviceType DeviceType;
-   typedef Matrix MatrixType;
-   typedef Preconditioner PreconditionerType;
-
-
-   Jacobi() : omega(0) {}
+   using Base = LinearSolver< Matrix >;
+public:
+   using RealType = typename Base::RealType;
+   using DeviceType = typename Base::DeviceType;
+   using IndexType = typename Base::IndexType;
+   using VectorViewType = typename Base::VectorViewType;
+   using ConstVectorViewType = typename Base::ConstVectorViewType;
 
    String getType() const
    {
-      return String( "Jacobi< " ) + this->matrix->getType() + ", " +   this->preconditioner->getType() + " >";
+      return String( "Jacobi< " ) + this->matrix->getType() + ", " + this->preconditioner->getType() + " >";
    }
 
    static void configSetup( Config::ConfigDescription& config,
@@ -48,42 +41,32 @@ class Jacobi : public Object,
    }
 
    bool setup( const Config::ParameterContainer& parameters,
-               const String& prefix = "" )
+               const String& prefix = "" ) override
    {
-      IterativeSolver< RealType, IndexType >::setup( parameters, prefix );
       this->setOmega( parameters.getParameter< double >( prefix + "jacobi-omega" ) );
       if( this->omega <= 0.0 || this->omega > 2.0 )
       {
          std::cerr << "Warning: The Jacobi method parameter omega is out of interval (0,2). The value is " << this->omega << " the method will not converge." << std::endl;
       }
-      return true;
+      return LinearSolver< Matrix >::setup( parameters, prefix );
    }
 
-   void setOmega( const RealType& omega )
+   void setOmega( RealType omega )
    {
       this->omega = omega;
    }
 
-   const RealType& getOmega() const
+   RealType getOmega() const
    {
       return omega;
    }
 
-   void setMatrix( const MatrixType& matrix )
+   bool solve( ConstVectorViewType b, VectorViewType x ) override
    {
-      this->matrix = &matrix;
-   }
+      const IndexType size = this->matrix->getRows();
 
-   void setPreconditioner( const Preconditioner& preconditioner )
-   {
-      this->preconditioner = &preconditioner;
-   }
-
-   template< typename Vector,
-             typename ResidueGetter = LinearResidueGetter< Matrix, Vector > >
-   bool solve( const Vector& b, Vector& x, Vector& aux)
-   {
-      const IndexType size = matrix->getRows();
+      Containers::Vector< RealType, DeviceType, IndexType > aux;
+      aux.setSize( size );
 
       this->resetIterations();
       this->setResidue( this->getConvergenceResidue() + 1.0 );
@@ -93,21 +76,18 @@ class Jacobi : public Object,
       while( this->nextIteration() )
       {
          for( IndexType row = 0; row < size; row ++ )
-            matrix->performJacobiIteration( b, row, x, aux, this->getOmega() );
-        for( IndexType row = 0; row < size; row ++ )
-            matrix->performJacobiIteration( b, row, aux, x, this->getOmega() );
-         this->setResidue( ResidueGetter::getResidue( *matrix, x, b, bNorm ) );
+            this->matrix->performJacobiIteration( b, row, x, aux, this->getOmega() );
+         for( IndexType row = 0; row < size; row ++ )
+            this->matrix->performJacobiIteration( b, row, aux, x, this->getOmega() );
+         this->setResidue( LinearResidueGetter::getResidue( *this->matrix, x, b, bNorm ) );
       }
-      this->setResidue( ResidueGetter::getResidue( *matrix, x, b, bNorm ) );
+      this->setResidue( LinearResidueGetter::getResidue( *this->matrix, x, b, bNorm ) );
       this->refreshSolverMonitor();
       return this->checkConvergence();
    }
 
-   protected:
-
-      RealType omega;
-      const MatrixType* matrix;
-      const PreconditionerType* preconditioner;
+protected:
+   RealType omega = 0.0;
 };
 
 } // namespace Linear
diff --git a/src/TNL/Solvers/Linear/LinearResidueGetter.h b/src/TNL/Solvers/Linear/LinearResidueGetter.h
index be889504f1f06f87e59bd923f40f0fb999dff8ca..8a25c32092e86ff51f3a353aa240c3a9e4454662 100644
--- a/src/TNL/Solvers/Linear/LinearResidueGetter.h
+++ b/src/TNL/Solvers/Linear/LinearResidueGetter.h
@@ -12,22 +12,17 @@
 
 namespace TNL {
 namespace Solvers {
-namespace Linear {   
+namespace Linear {
 
-template< typename Matrix, typename Vector >
 class LinearResidueGetter
 {
-   public:
-
-      typedef Matrix MatrixType;
-      typedef typename MatrixType::RealType RealType;
-      typedef typename MatrixType::DeviceType DeviceType;
-      typedef typename MatrixType::IndexType IndexType;
-
-   static RealType getResidue( const Matrix& matrix,
-                               const Vector& x,
-                               const Vector& b,
-                               RealType bNorm = 0 );
+public:
+   template< typename Matrix, typename Vector1, typename Vector2 >
+   static typename Matrix::RealType
+   getResidue( const Matrix& matrix,
+               const Vector1& x,
+               const Vector2& b,
+               typename Matrix::RealType bNorm = 0 );
 };
 
 } // namespace Linear
diff --git a/src/TNL/Solvers/Linear/LinearResidueGetter_impl.h b/src/TNL/Solvers/Linear/LinearResidueGetter_impl.h
index db81e861f7cd850ed151e305eb81c6915def7f4f..987c3cfca85a84f742081647076e972651b59553 100644
--- a/src/TNL/Solvers/Linear/LinearResidueGetter_impl.h
+++ b/src/TNL/Solvers/Linear/LinearResidueGetter_impl.h
@@ -16,17 +16,20 @@
 
 namespace TNL {
 namespace Solvers {
-namespace Linear {   
+namespace Linear {
 
-template< typename Matrix, typename Vector>
-typename LinearResidueGetter< Matrix, Vector >::RealType
-LinearResidueGetter< Matrix, Vector >::
+template< typename Matrix, typename Vector1, typename Vector2 >
+typename Matrix::RealType
+LinearResidueGetter::
 getResidue( const Matrix& matrix,
-            const Vector& x,
-            const Vector& b,
-            RealType bNorm )
+            const Vector1& x,
+            const Vector2& b,
+            typename Matrix::RealType bNorm )
 {
-   const IndexType size = matrix.getRows();   
+   using RealType = typename Matrix::RealType;
+   using IndexType = typename Matrix::IndexType;
+
+   const IndexType size = matrix.getRows();
    RealType res( 0.0 );
    if( bNorm == 0.0 )
       bNorm = b.lpNorm( 2.0 );
diff --git a/src/TNL/Solvers/Linear/LinearSolver.h b/src/TNL/Solvers/Linear/LinearSolver.h
new file mode 100644
index 0000000000000000000000000000000000000000..7cf9e9665a456fa3dd325518124558c13110cb49
--- /dev/null
+++ b/src/TNL/Solvers/Linear/LinearSolver.h
@@ -0,0 +1,73 @@
+/***************************************************************************
+                          LinearSolver.h  -  description
+                             -------------------
+    begin                : Sep 1, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// Implemented by: Jakub Klinkovský
+
+#pragma once
+
+#include <type_traits>  // std::add_const
+#include <memory>  // std::shared_ptr
+
+#include <TNL/Solvers/IterativeSolver.h>
+#include <TNL/Solvers/Linear/Preconditioners/Preconditioner.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Pointers/SharedPointer.h>
+
+namespace TNL {
+namespace Solvers {
+namespace Linear {
+
+template< typename Matrix >
+class LinearSolver
+: public IterativeSolver< typename Matrix::RealType, typename Matrix::IndexType >
+{
+public:
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using VectorViewType = Containers::VectorView< RealType, DeviceType, IndexType >;
+   using ConstVectorViewType = Containers::VectorView< typename std::add_const< RealType >::type, DeviceType, IndexType >;
+   using MatrixType = Matrix;
+   using MatrixPointer = Pointers::SharedPointer< typename std::add_const< MatrixType >::type >;
+   using PreconditionerType = Preconditioners::Preconditioner< MatrixType >;
+   using PreconditionerPointer = std::shared_ptr< typename std::add_const< PreconditionerType >::type >;
+
+   static void configSetup( Config::ConfigDescription& config,
+                            const String& prefix = "" )
+   {}
+
+   virtual bool setup( const Config::ParameterContainer& parameters,
+                       const String& prefix = "" )
+   {
+      return IterativeSolver< RealType, IndexType >::setup( parameters, prefix );
+   }
+
+   void setMatrix( const MatrixPointer& matrix )
+   {
+      this->matrix = matrix;
+   }
+
+   void setPreconditioner( const PreconditionerPointer& preconditioner )
+   {
+      this->preconditioner = preconditioner;
+   }
+
+   virtual bool solve( ConstVectorViewType b, VectorViewType x ) = 0;
+
+   virtual ~LinearSolver() {}
+
+protected:
+   MatrixPointer matrix = nullptr;
+   PreconditionerPointer preconditioner = nullptr;
+};
+
+} // namespace Linear
+} // namespace Solvers
+} // namespace TNL
diff --git a/src/TNL/Solvers/Linear/Preconditioners/CMakeLists.txt b/src/TNL/Solvers/Linear/Preconditioners/CMakeLists.txt
index 33e88a03d4043692f41334bb239f16acc49aadd4..3fe13dd45ecc5328c044259dc498ef16621e78fc 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/CMakeLists.txt
+++ b/src/TNL/Solvers/Linear/Preconditioners/CMakeLists.txt
@@ -1,8 +1,10 @@
-SET( headers Dummy.h
+SET( headers Preconditioner.h
              Diagonal.h
              Diagonal_impl.h
              ILU0.h
              ILU0_impl.h
+             ILUT.h
+             ILUT_impl.h
    )
-   
+
 INSTALL( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Solvers/Linear/Preconditioners )
diff --git a/src/TNL/Solvers/Linear/Preconditioners/Diagonal.h b/src/TNL/Solvers/Linear/Preconditioners/Diagonal.h
index ecef1953923b3705de08ea5dd3c4d544dc6fd184..fb5f534f1a26ea35ac483b4ba68b2c61a94b6780 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/Diagonal.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/Diagonal.h
@@ -12,7 +12,8 @@
 
 #pragma once
 
-#include <TNL/Object.h>
+#include "Preconditioner.h"
+
 #include <TNL/Containers/Vector.h>
 
 namespace TNL {
@@ -20,27 +21,29 @@ namespace Solvers {
 namespace Linear {
 namespace Preconditioners {
 
-template< typename Real, typename Device, typename Index >
+template< typename Matrix >
 class Diagonal
+: public Preconditioner< Matrix >
 {
-   public:
-   typedef Real RealType;
-   typedef Device DeviceType;
-   typedef Index IndexType;
-   typedef Containers::Vector< Real, Device, Index > VectorType;
+public:
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using typename Preconditioner< Matrix >::VectorViewType;
+   using typename Preconditioner< Matrix >::ConstVectorViewType;
+   using typename Preconditioner< Matrix >::MatrixPointer;
+   using VectorType = Containers::Vector< RealType, DeviceType, IndexType >;
 
-   template< typename Matrix >
-   void update( const Matrix& matrix );
+   virtual void update( const MatrixPointer& matrixPointer ) override;
 
-   template< typename Vector1, typename Vector2 >
-   bool solve( const Vector1& b, Vector2& x ) const;
+   virtual bool solve( ConstVectorViewType b, VectorViewType x ) const override;
 
    String getType() const
    {
       return String( "Diagonal" );
    }
 
-   protected:
+protected:
    VectorType diagonal;
 };
 
diff --git a/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h b/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h
index 0c77239443c054c4429416c5c654ab53d8419a9e..897eb006ae87b17c22f56cc135f444ecf80fb420 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h
@@ -17,7 +17,7 @@
 namespace TNL {
 namespace Solvers {
 namespace Linear {
-namespace Preconditioners {   
+namespace Preconditioners {
 
 #ifdef HAVE_CUDA
 template< typename Real, typename Index, typename Matrix >
@@ -49,37 +49,36 @@ __global__ void elementwiseVectorDivisionKernel( const Real* left,
 }
 #endif
 
-template< typename Real, typename Device, typename Index >
-   template< typename MatrixPointer >
+template< typename Matrix >
 void
-Diagonal< Real, Device, Index >::
-update( const MatrixPointer& matrix )
+Diagonal< Matrix >::
+update( const MatrixPointer& matrixPointer )
 {
 //  std::cout << getType() << "->setMatrix()" << std::endl;
 
-   TNL_ASSERT_GT( matrix->getRows(), 0, "empty matrix" );
-   TNL_ASSERT_EQ( matrix->getRows(), matrix->getColumns(), "matrix must be square" );
+   TNL_ASSERT_GT( matrixPointer->getRows(), 0, "empty matrix" );
+   TNL_ASSERT_EQ( matrixPointer->getRows(), matrixPointer->getColumns(), "matrix must be square" );
 
-   if( diagonal.getSize() != matrix->getRows() )
-      diagonal.setSize( matrix->getRows() );
+   if( diagonal.getSize() != matrixPointer->getRows() )
+      diagonal.setSize( matrixPointer->getRows() );
 
    if( std::is_same< DeviceType, Devices::Host >::value )
    {
       for( int i = 0; i < diagonal.getSize(); i++ ) {
-         diagonal[ i ] = matrix->getElement( i, i );
+         diagonal[ i ] = matrixPointer->getElement( i, i );
       }
    }
    if( std::is_same< DeviceType, Devices::Cuda >::value )
    {
 #ifdef HAVE_CUDA
-      const Index& size = diagonal.getSize();
+      const IndexType& size = diagonal.getSize();
       dim3 cudaBlockSize( 256 );
       dim3 cudaBlocks;
-      cudaBlocks.x = min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x ) );      
+      cudaBlocks.x = min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x ) );
 
       Devices::Cuda::synchronizeDevice();
       matrixDiagonalToVectorKernel<<< cudaBlocks, cudaBlockSize >>>(
-            &matrix.template getData< Devices::Cuda >(),
+            &matrixPointer.template getData< Devices::Cuda >(),
             diagonal.getData(),
             size );
       TNL_CHECK_CUDA_DEVICE;
@@ -87,11 +86,10 @@ update( const MatrixPointer& matrix )
    }
 }
 
-template< typename Real, typename Device, typename Index >
-   template< typename Vector1, typename Vector2 >
+template< typename Matrix >
 bool
-Diagonal< Real, Device, Index >::
-solve( const Vector1& b, Vector2& x ) const
+Diagonal< Matrix >::
+solve( ConstVectorViewType b, VectorViewType x ) const
 {
    if( std::is_same< DeviceType, Devices::Host >::value )
    {
@@ -102,10 +100,10 @@ solve( const Vector1& b, Vector2& x ) const
    if( std::is_same< DeviceType, Devices::Cuda >::value )
    {
 #ifdef HAVE_CUDA
-      const Index& size = diagonal.getSize();
+      const IndexType& size = diagonal.getSize();
       dim3 cudaBlockSize( 256 );
       dim3 cudaBlocks;
-      cudaBlocks.x = min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x ) );      
+      cudaBlocks.x = min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x ) );
 
       elementwiseVectorDivisionKernel<<< cudaBlocks, cudaBlockSize >>>(
             b.getData(),
diff --git a/src/TNL/Solvers/Linear/Preconditioners/Dummy.h b/src/TNL/Solvers/Linear/Preconditioners/Dummy.h
deleted file mode 100644
index 2a7283b22bd31f881b2d8a320b09ddc50b3bbe8a..0000000000000000000000000000000000000000
--- a/src/TNL/Solvers/Linear/Preconditioners/Dummy.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/***************************************************************************
-                          Dummy.h  -  description
-                             -------------------
-    begin                : Oct 19, 2012
-    copyright            : (C) 2012 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <TNL/Object.h>
-#include <TNL/SharedPointer.h>
-
-namespace TNL {
-namespace Solvers {
-namespace Linear {
-namespace Preconditioners {   
-
-template< typename Real, typename Device, typename Index >
-class Dummy
-{
-   public:
-
-   template< typename Matrix >
-   void update( const Matrix& matrix ) {}
-
-   template< typename Vector1, typename Vector2 >
-   bool solve( const Vector1& b, Vector2& x ) const
-   {
-      TNL_ASSERT_TRUE( false, "The solve() method of a dummy preconditioner should not be called." );
-      return true;
-   }
-
-   String getType() const
-   {
-      return String( "Dummy" );
-   }
-};
-
-template< typename LinearSolver, typename Preconditioner >
-class SolverStarterSolverPreconditionerSetter
-{
-   public:
-       
-      static void run( LinearSolver& solver,
-                       SharedPointer< Preconditioner, typename LinearSolver::DeviceType >& preconditioner )
-      {
-         solver.setPreconditioner( preconditioner );
-      }
-};
-
-template< typename LinearSolver, typename Real, typename Device, typename Index >
-class SolverStarterSolverPreconditionerSetter< LinearSolver, Dummy< Real, Device, Index > >
-{
-   public:
-
-      typedef Device DeviceType;
-      typedef Dummy< Real, DeviceType, Index > PreconditionerType;
-   
-      static void run( LinearSolver& solver,
-                       SharedPointer< PreconditionerType, typename LinearSolver::DeviceType >& preconditioner )
-      {
-         // do nothing
-      }
-};
-
-} // namespace Preconditioners
-} // namespace Linear
-} // namespace Solvers
-} // namespace TNL
diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
index cf44431849bf5732a2fd87efd664dccc0c6f5fc0..b94d7905263a353a40faa5be495486fa00625e6b 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
@@ -14,7 +14,8 @@
 
 #include <type_traits>
 
-#include <TNL/Object.h>
+#include "Preconditioner.h"
+
 #include <TNL/Containers/Vector.h>
 #include <TNL/Matrices/CSR.h>
 
@@ -27,62 +28,68 @@ namespace Solvers {
 namespace Linear {
 namespace Preconditioners {
 
-template< typename Real, typename Device, typename Index >
-class ILU0
+// implementation template
+template< typename Matrix, typename Real, typename Device, typename Index >
+class ILU0_impl
 {};
 
-template< typename Real, typename Index >
-class ILU0< Real, Devices::Host, Index >
+// actual template to be used by users
+template< typename Matrix >
+class ILU0
+: public ILU0_impl< Matrix, typename Matrix::RealType, typename Matrix::DeviceType, typename Matrix::IndexType >
 {
 public:
-   typedef Real RealType;
-   typedef Devices::Host DeviceType;
-   typedef Index IndexType;
-
-   template< typename MatrixPointer >
-   void update( const MatrixPointer& matrixPointer );
-
-   template< typename Vector1, typename Vector2 >
-   bool solve( const Vector1& b, Vector2& x ) const;
-
    String getType() const
    {
       return String( "ILU0" );
    }
+};
+
+template< typename Matrix, typename Real, typename Index >
+class ILU0_impl< Matrix, Real, Devices::Host, Index >
+: public Preconditioner< Matrix >
+{
+public:
+   using RealType = Real;
+   using DeviceType = Devices::Host;
+   using IndexType = Index;
+   using typename Preconditioner< Matrix >::VectorViewType;
+   using typename Preconditioner< Matrix >::ConstVectorViewType;
+   using typename Preconditioner< Matrix >::MatrixPointer;
+
+   virtual void update( const MatrixPointer& matrixPointer ) override;
+
+   virtual bool solve( ConstVectorViewType b, VectorViewType x ) const override;
 
 protected:
-//   Matrices::CSR< RealType, DeviceType, IndexType > A;
    Matrices::CSR< RealType, DeviceType, IndexType > L;
    Matrices::CSR< RealType, DeviceType, IndexType > U;
 };
 
-template<>
-class ILU0< double, Devices::Cuda, int >
+template< typename Matrix >
+class ILU0_impl< Matrix, double, Devices::Cuda, int >
+: public Preconditioner< Matrix >
 {
 public:
    using RealType = double;
    using DeviceType = Devices::Cuda;
    using IndexType = int;
+   using typename Preconditioner< Matrix >::VectorViewType;
+   using typename Preconditioner< Matrix >::ConstVectorViewType;
+   using typename Preconditioner< Matrix >::MatrixPointer;
 
-   ILU0()
+   ILU0_impl()
    {
 #if defined(HAVE_CUDA) && defined(HAVE_CUSPARSE)
       cusparseCreate( &handle );
 #endif
    }
 
-   template< typename MatrixPointer >
-   void update( const MatrixPointer& matrixPointer );
+   virtual void update( const MatrixPointer& matrixPointer ) override;
 
-   template< typename Vector1, typename Vector2 >
-   bool solve( const Vector1& b, Vector2& x ) const;
+   virtual bool solve( ConstVectorViewType b, VectorViewType x ) const override;
 
-   String getType() const
-   {
-      return String( "ILU0" );
-   }
-
-   ~ILU0()
+   ~ILU0_impl()
    {
 #if defined(HAVE_CUDA) && defined(HAVE_CUSPARSE)
       resetMatrices();
@@ -145,59 +152,48 @@ protected:
    }
 
    // TODO: extend Matrices::copySparseMatrix accordingly
-   template< typename Matrix,
-             typename = typename std::enable_if< ! std::is_same< DeviceType, typename Matrix::DeviceType >::value >::type >
-   void copyMatrix( const Matrix& matrix )
+   template< typename MatrixT,
+             typename = typename std::enable_if< ! std::is_same< DeviceType, typename MatrixT::DeviceType >::value >::type >
+   void copyMatrix( const MatrixT& matrix )
    {
-      typename Matrix::CudaType A_tmp;
+      typename MatrixT::CudaType A_tmp;
       A_tmp = matrix;
       Matrices::copySparseMatrix( A, A_tmp );
    }
 
-   template< typename Matrix,
-             typename = typename std::enable_if< std::is_same< DeviceType, typename Matrix::DeviceType >::value >::type,
+   template< typename MatrixT,
+             typename = typename std::enable_if< std::is_same< DeviceType, typename MatrixT::DeviceType >::value >::type,
              typename = void >
-   void copyMatrix( const Matrix& matrix )
+   void copyMatrix( const MatrixT& matrix )
    {
       Matrices::copySparseMatrix( A, matrix );
    }
 #endif
 };
 
-#ifdef HAVE_MIC
-template< typename Real, typename Index >
-class ILU0< Real, Devices::MIC, Index >
+template< typename Matrix, typename Real, typename Index >
+class ILU0_impl< Matrix, Real, Devices::MIC, Index >
+: public Preconditioner< Matrix >
 {
 public:
-   typedef Real RealType;
-   typedef Devices::MIC DeviceType;
-   typedef Index IndexType;
-
-   template< typename MatrixPointer >
-   void update( const MatrixPointer& matrixPointer )
+   using RealType = Real;
+   using DeviceType = Devices::MIC;
+   using IndexType = Index;
+   using typename Preconditioner< Matrix >::VectorViewType;
+   using typename Preconditioner< Matrix >::ConstVectorViewType;
+   using typename Preconditioner< Matrix >::MatrixPointer;
+
+   virtual void update( const MatrixPointer& matrixPointer ) override
    {
       throw std::runtime_error("Not Iplemented yet for MIC");
    }
 
-   template< typename Vector1, typename Vector2 >
-   bool solve( const Vector1& b, Vector2& x ) const
+   virtual bool solve( ConstVectorViewType b, VectorViewType x ) const override
    {
       throw std::runtime_error("Not Iplemented yet for MIC");
    }
-
-   String getType() const
-   {
-      return String( "ILU0" );
-   }
-
-protected:
-//   Matrices::CSR< RealType, DeviceType, IndexType > A;
-   Matrices::CSR< RealType, DeviceType, IndexType > L;
-   Matrices::CSR< RealType, DeviceType, IndexType > U;
 };
 
-#endif
-
 } // namespace Preconditioners
 } // namespace Linear
 } // namespace Solvers
diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h b/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h
index 0ce43ed8b6a9598e97ad1221e5ebee10f94bfcb2..2b2244df365cacfc0779b037b79c80cbd833b784 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h
@@ -19,12 +19,11 @@
 namespace TNL {
 namespace Solvers {
 namespace Linear {
-namespace Preconditioners {   
+namespace Preconditioners {
 
-template< typename Real, typename Index >
-   template< typename MatrixPointer >
+template< typename Matrix, typename Real, typename Index >
 void
-ILU0< Real, Devices::Host, Index >::
+ILU0_impl< Matrix, Real, Devices::Host, Index >::
 update( const MatrixPointer& matrixPointer )
 {
    TNL_ASSERT_GT( matrixPointer->getRows(), 0, "empty matrix" );
@@ -79,7 +78,7 @@ update( const MatrixPointer& matrixPointer )
          const auto L_i = L.getRow( i );
          const auto U_i = U.getRow( N - 1 - i );
 
-         // loop for k = 0, ..., i - 2; but only over the non-zero entries
+         // loop for k = 0, ..., i - 1; but only over the non-zero entries
          for( IndexType c_k = 0; c_k < L_entries; c_k++ ) {
             const auto k = L_i.getElementColumn( c_k );
 
@@ -103,11 +102,10 @@ update( const MatrixPointer& matrixPointer )
    }
 }
 
-template< typename Real, typename Index >
-   template< typename Vector1, typename Vector2 >
+template< typename Matrix, typename Real, typename Index >
 bool
-ILU0< Real, Devices::Host, Index >::
-solve( const Vector1& b, Vector2& x ) const
+ILU0_impl< Matrix, Real, Devices::Host, Index >::
+solve( ConstVectorViewType b, VectorViewType x ) const
 {
    TNL_ASSERT_EQ( b.getSize(), L.getRows(), "wrong size of the right hand side" );
    TNL_ASSERT_EQ( x.getSize(), L.getRows(), "wrong size of the solution vector" );
@@ -154,9 +152,9 @@ solve( const Vector1& b, Vector2& x ) const
 }
 
 
-   template< typename MatrixPointer >
+template< typename Matrix >
 void
-ILU0< double, Devices::Cuda, int >::
+ILU0_impl< Matrix, double, Devices::Cuda, int >::
 update( const MatrixPointer& matrixPointer )
 {
 #ifdef HAVE_CUDA
@@ -263,10 +261,10 @@ update( const MatrixPointer& matrixPointer )
 #endif
 }
 
-   template< typename Vector1, typename Vector2 >
+template< typename Matrix >
 bool
-ILU0< double, Devices::Cuda, int >::
-solve( const Vector1& b, Vector2& x ) const
+ILU0_impl< Matrix, double, Devices::Cuda, int >::
+solve( ConstVectorViewType b, VectorViewType x ) const
 {
 #ifdef HAVE_CUDA
 #ifdef HAVE_CUSPARSE
diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILUT.h b/src/TNL/Solvers/Linear/Preconditioners/ILUT.h
new file mode 100644
index 0000000000000000000000000000000000000000..f9147c7b88972bc2d4e409fe08c14614ed8bc274
--- /dev/null
+++ b/src/TNL/Solvers/Linear/Preconditioners/ILUT.h
@@ -0,0 +1,129 @@
+/***************************************************************************
+                          ILUT.h  -  description
+                             -------------------
+    begin                : Aug 31, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// Implemented by: Jakub Klinkovsky
+
+#pragma once
+
+#include "Preconditioner.h"
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Matrices/CSR.h>
+
+namespace TNL {
+namespace Solvers {
+namespace Linear {
+namespace Preconditioners {
+
+// implementation template
+template< typename Matrix, typename Real, typename Device, typename Index >
+class ILUT_impl
+{};
+
+// actual template to be used by users
+template< typename Matrix >
+class ILUT
+: public ILUT_impl< Matrix, typename Matrix::RealType, typename Matrix::DeviceType, typename Matrix::IndexType >
+{
+public:
+   String getType() const
+   {
+      return String( "ILUT" );
+   }
+
+   static void configSetup( Config::ConfigDescription& config,
+                            const String& prefix = "" )
+   {
+      config.addEntry< int >( prefix + "ilut-p", "Number of additional non-zero entries to allocate on each row of the factors L and U.", 0 );
+      config.addEntry< double >( prefix + "ilut-threshold", "Threshold for droppping small entries.", 1e-4 );
+   }
+};
+
+template< typename Matrix, typename Real, typename Index >
+class ILUT_impl< Matrix, Real, Devices::Host, Index >
+: public Preconditioner< Matrix >
+{
+public:
+   using RealType = Real;
+   using DeviceType = Devices::Host;
+   using IndexType = Index;
+   using typename Preconditioner< Matrix >::VectorViewType;
+   using typename Preconditioner< Matrix >::ConstVectorViewType;
+   using typename Preconditioner< Matrix >::MatrixPointer;
+   using VectorType = Containers::Vector< RealType, DeviceType, IndexType >;
+
+   bool setup( const Config::ParameterContainer& parameters,
+               const String& prefix = "" ) override;
+
+   virtual void update( const MatrixPointer& matrixPointer ) override;
+
+   virtual bool solve( ConstVectorViewType b, VectorViewType x ) const override;
+
+protected:
+   Index p = 0;
+   Real tau = 1e-4;
+
+   // The factors L and U are stored separately and the rows of U are reversed.
+   Matrices::CSR< RealType, DeviceType, IndexType > L;
+   Matrices::CSR< RealType, DeviceType, IndexType > U;
+};
+
+template< typename Matrix, typename Real, typename Index >
+class ILUT_impl< Matrix, Real, Devices::Cuda, Index >
+: public Preconditioner< Matrix >
+{
+public:
+   using RealType = Real;
+   using DeviceType = Devices::Cuda;
+   using IndexType = Index;
+   using typename Preconditioner< Matrix >::VectorViewType;
+   using typename Preconditioner< Matrix >::ConstVectorViewType;
+   using typename Preconditioner< Matrix >::MatrixPointer;
+
+   virtual void update( const MatrixPointer& matrixPointer ) override
+   {
+      throw std::runtime_error("Not Iplemented yet for CUDA");
+   }
+
+   virtual bool solve( ConstVectorViewType b, VectorViewType x ) const override
+   {
+      throw std::runtime_error("Not Iplemented yet for CUDA");
+   }
+};
+
+template< typename Matrix, typename Real, typename Index >
+class ILUT_impl< Matrix, Real, Devices::MIC, Index >
+: public Preconditioner< Matrix >
+{
+public:
+   using RealType = Real;
+   using DeviceType = Devices::MIC;
+   using IndexType = Index;
+   using typename Preconditioner< Matrix >::VectorViewType;
+   using typename Preconditioner< Matrix >::ConstVectorViewType;
+   using typename Preconditioner< Matrix >::MatrixPointer;
+
+   virtual void update( const MatrixPointer& matrixPointer ) override
+   {
+      throw std::runtime_error("Not Iplemented yet for MIC");
+   }
+
+   virtual bool solve( ConstVectorViewType b, VectorViewType x ) const override
+   {
+      throw std::runtime_error("Not Iplemented yet for MIC");
+   }
+};
+
+} // namespace Preconditioners
+} // namespace Linear
+} // namespace Solvers
+} // namespace TNL
+
+#include <TNL/Solvers/Linear/Preconditioners/ILUT_impl.h>
diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILUT_impl.h b/src/TNL/Solvers/Linear/Preconditioners/ILUT_impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..bb08ffb1440d79ea8b4e9b354e7088b0b3194245
--- /dev/null
+++ b/src/TNL/Solvers/Linear/Preconditioners/ILUT_impl.h
@@ -0,0 +1,299 @@
+/***************************************************************************
+                          ILUT_impl.h  -  description
+                             -------------------
+    begin                : Aug 31, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// Implemented by: Jakub Klinkovsky
+
+#pragma once
+
+#include <vector>
+
+#include "ILUT.h"
+#include <TNL/Timer.h>
+
+namespace TNL {
+namespace Solvers {
+namespace Linear {
+namespace Preconditioners {
+
+template< typename Matrix, typename Real, typename Index >
+bool
+ILUT_impl< Matrix, Real, Devices::Host, Index >::
+setup( const Config::ParameterContainer& parameters,
+       const String& prefix )
+{
+   p = parameters.getParameter< int >( "ilut-p" );
+   tau = parameters.getParameter< double >( "ilut-threshold" );
+   return true;
+}
+
+template< typename Matrix, typename Real, typename Index >
+void
+ILUT_impl< Matrix, Real, Devices::Host, Index >::
+update( const MatrixPointer& matrixPointer )
+{
+   TNL_ASSERT_GT( matrixPointer->getRows(), 0, "empty matrix" );
+   TNL_ASSERT_EQ( matrixPointer->getRows(), matrixPointer->getColumns(), "matrix must be square" );
+
+   const IndexType N = matrixPointer->getRows();
+
+   L.setDimensions( N, N );
+   U.setDimensions( N, N );
+
+   Timer timer_total, timer_rowlengths, timer_copy_into_w, timer_k_loop, timer_dropping, timer_copy_into_LU;
+
+   timer_total.start();
+
+   // compute row lengths
+   timer_rowlengths.start();
+   typename decltype(L)::CompressedRowLengthsVector L_rowLengths;
+   typename decltype(U)::CompressedRowLengthsVector U_rowLengths;
+   L_rowLengths.setSize( N );
+   U_rowLengths.setSize( N );
+   for( IndexType i = 0; i < N; i++ ) {
+      const auto row = matrixPointer->getRow( i );
+      const auto max_length = matrixPointer->getRowLength( i );
+      IndexType L_entries = 0;
+      IndexType U_entries = 0;
+      for( IndexType j = 0; j < max_length; j++ ) {
+         const auto column = row.getElementColumn( j );
+         if( column < i )
+            L_entries++;
+         else if( column < N )
+            U_entries++;
+         else
+            break;
+      }
+      // store p additional entries in each factor
+      L_rowLengths[ i ] = L_entries + p;
+      U_rowLengths[ N - 1 - i ] = U_entries + p;
+   }
+   L.setCompressedRowLengths( L_rowLengths );
+   U.setCompressedRowLengths( U_rowLengths );
+   timer_rowlengths.stop();
+
+   // intermediate full vector for the i-th row of A
+   VectorType w;
+   w.setSize( N );
+   w.setValue( 0.0 );
+
+   // intermediate vectors for sorting and keeping only the largest values
+//   using Pair = std::pair< IndexType, RealType >;
+   struct Triplet {
+      IndexType column;
+      RealType value;
+      RealType abs_value;
+      Triplet(IndexType column, RealType value, RealType abs_value) : column(column), value(value), abs_value(abs_value) {}
+   };
+   auto cmp_abs_value = []( const Triplet& a, const Triplet& b ){ return a.abs_value < b.abs_value; };
+   std::vector< Triplet > heap_L, heap_U;
+   auto cmp_column = []( const Triplet& a, const Triplet& b ){ return a.column < b.column; };
+   std::vector< Triplet > values_L, values_U;
+
+//   std::cout << "N = " << N << std::endl;
+
+   // Incomplete LU factorization with threshold
+   // (see Saad - Iterative methods for sparse linear systems, section 10.4)
+   for( IndexType i = 0; i < N; i++ ) {
+      const auto max_length = matrixPointer->getRowLength( i );
+      const auto A_i = matrixPointer->getRow( i );
+
+      RealType A_i_norm = 0.0;
+
+      // copy A_i into the full vector w
+      timer_copy_into_w.start();
+      for( IndexType c_j = 0; c_j < max_length; c_j++ ) {
+         const auto j = A_i.getElementColumn( c_j );
+         // handle ellpack dummy entries
+         if( j >= N ) break;
+         w[ j ] = A_i.getElementValue( c_j );
+
+         // running computation of norm
+         A_i_norm += w[ j ] * w[ j ];
+      }
+      timer_copy_into_w.stop();
+
+      // compute relative tolerance
+      A_i_norm = std::sqrt( A_i_norm );
+      const RealType tau_i = tau * A_i_norm;
+
+      // loop for k = 0, ..., i - 1; but only over the non-zero entries of w
+      timer_k_loop.start();
+      for( IndexType k = 0; k < i; k++ ) {
+         RealType w_k = w[ k ];
+         if( w_k == 0.0 )
+            continue;
+
+         w_k /= matrixPointer->getElementFast( k, k );
+
+         // apply dropping rule to w_k
+         if( std::abs( w_k ) < tau_i )
+            w_k = 0.0;
+
+         w[ k ] = w_k;
+
+         if( w_k != 0.0 ) {
+            // w := w - w_k * U_k
+            const auto U_k = U.getRow( N - 1 - k );
+            // loop for j = 0, ..., N-1; but only over the non-zero entries
+            for( Index c_j = 0; c_j < U_rowLengths[ N - 1 - k ]; c_j++ ) {
+               const auto j = U_k.getElementColumn( c_j );
+               // skip dropped entries
+               if( j >= N ) break;
+               w[ j ] -= w_k * U_k.getElementValue( c_j );
+            }
+         }
+      }
+      timer_k_loop.stop();
+
+      // apply dropping rule to the row w
+      // (we drop all values under threshold and keep nl(i) + p largest values in L
+      // and nu(i) + p largest values in U; see Saad (2003) for reference)
+      // TODO: refactoring!!! (use the quick-split strategy, constructing the heap is not necessary)
+      timer_dropping.start();
+      for( IndexType j = 0; j < N; j++ ) {
+         const RealType w_j_abs = std::abs( w[ j ] );
+         // ignore small values
+         if( w_j_abs < tau_i )
+            continue;
+         // push into the heaps for L or U
+         if( j < i ) {
+            heap_L.push_back( Triplet( j, w[ j ], w_j_abs ) );
+            std::push_heap( heap_L.begin(), heap_L.end(), cmp_abs_value );
+         }
+         else {
+            heap_U.push_back( Triplet( j, w[ j ], w_j_abs ) );
+            std::push_heap( heap_U.begin(), heap_U.end(), cmp_abs_value );
+         }
+      }
+      // extract values for L and U
+      for( IndexType c_j = 0; c_j < L_rowLengths[ i ] && c_j < heap_L.size(); c_j++ ) {
+         // move the largest to the end
+         std::pop_heap( heap_L.begin(), heap_L.end(), cmp_abs_value );
+         // move the triplet from one vector into another
+         const auto largest = heap_L.back();
+         heap_L.pop_back();
+         values_L.push_back( largest );
+      }
+      for( IndexType c_j = 0; c_j < U_rowLengths[ N - 1 - i ] && c_j < heap_U.size(); c_j++ ) {
+         // move the largest to the end
+         std::pop_heap( heap_U.begin(), heap_U.end(), cmp_abs_value );
+         // move the triplet from one vector into another
+         const auto largest = heap_U.back();
+         heap_U.pop_back();
+         values_U.push_back( largest );
+      }
+      // sort by column index to make it insertable into the sparse matrix
+      std::sort( values_L.begin(), values_L.end(), cmp_column );
+      std::sort( values_U.begin(), values_U.end(), cmp_column );
+      timer_dropping.stop();
+
+//      std::cout << "i = " << i << ", L_rowLengths[ i ] = " << L_rowLengths[ i ] << ", U_rowLengths[ i ] = " << U_rowLengths[ N - 1 - i ] << std::endl;
+
+      timer_copy_into_LU.start();
+
+      // the row L_i might be empty
+      if( values_L.size() ) {
+         // L_ij = w_j for j = 0, ..., i - 1
+         auto L_i = L.getRow( i );
+         for( IndexType c_j = 0; c_j < values_L.size(); c_j++ ) {
+            const auto j = values_L[ c_j ].column;
+//            std::cout << "c_j = " << c_j << ", j = " << j << std::endl;
+            L_i.setElement( c_j, j, values_L[ c_j ].value );
+         }
+      }
+
+      // U_ij = w_j for j = i, ..., N - 1
+      auto U_i = U.getRow( N - 1 - i );
+      for( IndexType c_j = 0; c_j < values_U.size(); c_j++ ) {
+         const auto j = values_U[ c_j ].column;
+//         std::cout << "c_j = " << c_j << ", j = " << j << std::endl;
+         U_i.setElement( c_j, j, values_U[ c_j ].value );
+      }
+
+      timer_copy_into_LU.stop();
+
+      // reset w
+      w.setValue( 0.0 );
+
+      heap_L.clear();
+      heap_U.clear();
+      values_L.clear();
+      values_U.clear();
+   }
+
+   timer_total.stop();
+
+   std::cout << "ILUT::update statistics:\n";
+   std::cout << "\ttimer_total:        " << timer_total.getRealTime()         << " s\n";
+   std::cout << "\ttimer_rowlengths:   " << timer_rowlengths.getRealTime()    << " s\n";
+   std::cout << "\ttimer_copy_into_w:  " << timer_copy_into_w.getRealTime()   << " s\n";
+   std::cout << "\ttimer_k_loop:       " << timer_k_loop.getRealTime()        << " s\n";
+   std::cout << "\ttimer_dropping:     " << timer_dropping.getRealTime()      << " s\n";
+   std::cout << "\ttimer_copy_into_LU: " << timer_copy_into_LU.getRealTime()  << " s\n";
+   std::cout << std::flush;
+}
+
+template< typename Matrix, typename Real, typename Index >
+bool
+ILUT_impl< Matrix, Real, Devices::Host, Index >::
+solve( ConstVectorViewType b, VectorViewType x ) const
+{
+   TNL_ASSERT_EQ( b.getSize(), L.getRows(), "wrong size of the right hand side" );
+   TNL_ASSERT_EQ( x.getSize(), L.getRows(), "wrong size of the solution vector" );
+
+   const IndexType N = x.getSize();
+
+   // Step 1: solve y from Ly = b
+   for( IndexType i = 0; i < N; i++ ) {
+      x[ i ] = b[ i ];
+
+      const auto L_entries = L.getRowLength( i );
+
+      // this condition is to avoid segfaults on empty L.getRow( i )
+      if( L_entries > 0 ) {
+         const auto L_i = L.getRow( i );
+
+         // loop for j = 0, ..., i - 1; but only over the non-zero entries
+         for( IndexType c_j = 0; c_j < L_entries; c_j++ ) {
+            const auto j = L_i.getElementColumn( c_j );
+            // we might have allocated more space than was actually needed due to dropping
+            if( j >= N ) break;
+            x[ i ] -= L_i.getElementValue( c_j ) * x[ j ];
+         }
+      }
+   }
+
+   // Step 2: solve x from Ux = y
+   for( IndexType i = N - 1; i >= 0; i-- ) {
+      const IndexType U_idx = N - 1 - i;
+
+      const auto U_entries = U.getRowLength( U_idx );
+      const auto U_i = U.getRow( U_idx );
+
+      const auto U_ii = U_i.getElementValue( 0 );
+
+      // loop for j = i+1, ..., N-1; but only over the non-zero entries
+      for( IndexType c_j = 1; c_j < U_entries ; c_j++ ) {
+         const auto j = U_i.getElementColumn( c_j );
+         // we might have allocated more space than was actually needed due to dropping
+         if( j >= N ) break;
+         x[ i ] -= U_i.getElementValue( c_j ) * x[ j ];
+      }
+
+      x[ i ] /= U_ii;
+   }
+
+   return true;
+}
+
+} // namespace Preconditioners
+} // namespace Linear
+} // namespace Solvers
+} // namespace TNL
diff --git a/src/TNL/Solvers/Linear/Preconditioners/Preconditioner.h b/src/TNL/Solvers/Linear/Preconditioners/Preconditioner.h
new file mode 100644
index 0000000000000000000000000000000000000000..cda81802da2c92c8e23d5e42537196a0625d0b2e
--- /dev/null
+++ b/src/TNL/Solvers/Linear/Preconditioners/Preconditioner.h
@@ -0,0 +1,66 @@
+/***************************************************************************
+                          Dummy.h  -  description
+                             -------------------
+    begin                : Oct 19, 2012
+    copyright            : (C) 2012 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <type_traits>  // std::add_const
+
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Pointers/SharedPointer.h>
+#include <TNL/Config/ParameterContainer.h>
+
+namespace TNL {
+namespace Solvers {
+namespace Linear {
+namespace Preconditioners {
+
+template< typename Matrix >
+class Preconditioner
+{
+public:
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using VectorViewType = Containers::VectorView< RealType, DeviceType, IndexType >;
+   using ConstVectorViewType = Containers::VectorView< typename std::add_const< RealType >::type, DeviceType, IndexType >;
+   using MatrixType = Matrix;
+   using MatrixPointer = Pointers::SharedPointer< typename std::add_const< MatrixType >::type >;
+
+   static void configSetup( Config::ConfigDescription& config,
+                            const String& prefix = "" )
+   {}
+
+   virtual bool setup( const Config::ParameterContainer& parameters,
+                       const String& prefix = "" )
+   {
+      return true;
+   }
+
+   virtual void update( const MatrixPointer& matrixPointer )
+   {}
+
+   virtual bool solve( ConstVectorViewType b, VectorViewType x ) const
+   {
+      TNL_ASSERT_TRUE( false, "The solve() method of a dummy preconditioner should not be called." );
+      return true;
+   }
+
+   String getType() const
+   {
+      return String( "Preconditioner" );
+   }
+
+   virtual ~Preconditioner() {}
+};
+
+} // namespace Preconditioners
+} // namespace Linear
+} // namespace Solvers
+} // namespace TNL
diff --git a/src/TNL/Solvers/Linear/SOR.h b/src/TNL/Solvers/Linear/SOR.h
index fe36b65932aa0e4eed149a3e1f46a6acc9560c73..7e94634cdf178abf9df838e533529c44ea5fb2cc 100644
--- a/src/TNL/Solvers/Linear/SOR.h
+++ b/src/TNL/Solvers/Linear/SOR.h
@@ -10,35 +10,23 @@
 
 #pragma once
 
-#include <TNL/Object.h>
-#include <TNL/SharedPointer.h>
-#include <TNL/Solvers/Linear/Preconditioners/Dummy.h>
-#include <TNL/Solvers/IterativeSolver.h>
-#include <TNL/Solvers/Linear/LinearResidueGetter.h>
+#include "LinearSolver.h"
 
 namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix,
-          typename Preconditioner = Preconditioners::Dummy< typename Matrix :: RealType,
-                                                            typename Matrix :: DeviceType,
-                                                            typename Matrix :: IndexType> >
-class SOR : public Object,
-            public IterativeSolver< typename Matrix :: RealType,
-                                    typename Matrix :: IndexType >
+template< typename Matrix >
+class SOR
+: public LinearSolver< Matrix >
 {
-   public:
-
-   typedef typename Matrix :: RealType RealType;
-   typedef typename Matrix :: IndexType IndexType;
-   typedef typename Matrix :: DeviceType DeviceType;
-   typedef Matrix MatrixType;
-   typedef Preconditioner PreconditionerType;
-   typedef SharedPointer< const MatrixType, DeviceType > MatrixPointer;
-   typedef SharedPointer< const PreconditionerType, DeviceType > PreconditionerPointer;
-
-   SOR();
+   using Base = LinearSolver< Matrix >;
+public:
+   using RealType = typename Base::RealType;
+   using DeviceType = typename Base::DeviceType;
+   using IndexType = typename Base::IndexType;
+   using VectorViewType = typename Base::VectorViewType;
+   using ConstVectorViewType = typename Base::ConstVectorViewType;
 
    String getType() const;
 
@@ -46,26 +34,16 @@ class SOR : public Object,
                             const String& prefix = "" );
 
    bool setup( const Config::ParameterContainer& parameters,
-               const String& prefix = "" );
+               const String& prefix = "" ) override;
 
    void setOmega( const RealType& omega );
 
    const RealType& getOmega() const;
 
-   void setMatrix( const MatrixPointer& matrix );
-
-   void setPreconditioner( const PreconditionerPointer& preconditioner );
-
-   template< typename Vector,
-             typename ResidueGetter = LinearResidueGetter< Matrix, Vector >  >
-   bool solve( const Vector& b, Vector& x );
-
-   protected:
-
-   RealType omega;
+   bool solve( ConstVectorViewType b, VectorViewType x ) override;
 
-   MatrixPointer matrix;
-   PreconditionerPointer preconditioner;
+protected:
+   RealType omega = 1.0;
 };
 
 } // namespace Linear
diff --git a/src/TNL/Solvers/Linear/SOR_impl.h b/src/TNL/Solvers/Linear/SOR_impl.h
index b0b4361452ba974b9191da673152b3fd4fabf7fb..b0671e583e44d63c9a7e68a228f7e3bee4ad81b0 100644
--- a/src/TNL/Solvers/Linear/SOR_impl.h
+++ b/src/TNL/Solvers/Linear/SOR_impl.h
@@ -11,90 +11,59 @@
 #pragma once
 
 #include <TNL/Solvers/Linear/SOR.h>
+#include <TNL/Solvers/Linear/LinearResidueGetter.h>
 
 namespace TNL {
 namespace Solvers {
-namespace Linear {   
+namespace Linear {
 
-template< typename Matrix, typename Preconditioner >
-SOR< Matrix, Preconditioner > :: SOR()
-: omega( 1.0 )
-{
-   /****
-    * Clearing the shared pointer means that there is no
-    * preconditioner set.
-    */
-   this->preconditioner.clear();   
-}
-
-template< typename Matrix, typename Preconditioner >
-String SOR< Matrix, Preconditioner > :: getType() const
+template< typename Matrix >
+String SOR< Matrix > :: getType() const
 {
    return String( "SOR< " ) +
           this->matrix -> getType() + ", " +
           this->preconditioner -> getType() + " >";
 }
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 void
-SOR< Matrix, Preconditioner >::
+SOR< Matrix >::
 configSetup( Config::ConfigDescription& config,
              const String& prefix )
 {
-   //IterativeSolver< RealType, IndexType >::configSetup( config, prefix );
    config.addEntry< double >( prefix + "sor-omega", "Relaxation parameter of the SOR method.", 1.0 );
 }
 
-template< typename Matrix,
-          typename Preconditioner >
+template< typename Matrix >
 bool
-SOR< Matrix, Preconditioner >::
+SOR< Matrix >::
 setup( const Config::ParameterContainer& parameters,
        const String& prefix )
 {
-   IterativeSolver< RealType, IndexType >::setup( parameters, prefix );
    this->setOmega( parameters.getParameter< double >( prefix + "sor-omega" ) );
    if( this->omega <= 0.0 || this->omega > 2.0 )
    {
       std::cerr << "Warning: The SOR method parameter omega is out of interval (0,2). The value is " << this->omega << " the method will not converge." << std::endl;
    }
-   return true;
+   return LinearSolver< Matrix >::setup( parameters, prefix );
 }
 
-
-template< typename Matrix, typename Preconditioner >
-void SOR< Matrix, Preconditioner > :: setOmega( const RealType& omega )
+template< typename Matrix >
+void SOR< Matrix > :: setOmega( const RealType& omega )
 {
    this->omega = omega;
 }
 
-template< typename Matrix, typename Preconditioner >
-const typename SOR< Matrix, Preconditioner > :: RealType& SOR< Matrix, Preconditioner > :: getOmega( ) const
+template< typename Matrix >
+const typename SOR< Matrix > :: RealType& SOR< Matrix > :: getOmega( ) const
 {
    return this->omega;
 }
 
-template< typename Matrix,
-          typename Preconditioner >
-void SOR< Matrix, Preconditioner > :: setMatrix( const MatrixPointer& matrix )
-{
-   this->matrix = matrix;
-}
-
-template< typename Matrix,
-           typename Preconditioner >
-void SOR< Matrix, Preconditioner > :: setPreconditioner( const PreconditionerPointer& preconditioner )
-{
-   this->preconditioner = preconditioner;
-}
-
-
-template< typename Matrix, typename Preconditioner >
-   template< typename Vector, typename ResidueGetter >
-bool SOR< Matrix, Preconditioner > :: solve( const Vector& b, Vector& x )
+template< typename Matrix >
+bool SOR< Matrix > :: solve( ConstVectorViewType b, VectorViewType x )
 {
-   const IndexType size = matrix -> getRows();   
+   const IndexType size = this->matrix->getRows();
 
    this->resetIterations();
    this->setResidue( this->getConvergenceResidue() + 1.0 );
@@ -104,15 +73,12 @@ bool SOR< Matrix, Preconditioner > :: solve( const Vector& b, Vector& x )
    while( this->nextIteration() )
    {
       for( IndexType row = 0; row < size; row ++ )
-         matrix->performSORIteration( b,
-                                      row,
-                                      x,
-                                      this->getOmega() );
+         this->matrix->performSORIteration( b, row, x, this->getOmega() );
       // FIXME: the LinearResidueGetter works only on the host
-      this->setResidue( ResidueGetter::getResidue( *matrix, x, b, bNorm ) );
+      this->setResidue( LinearResidueGetter::getResidue( *this->matrix, x, b, bNorm ) );
       this->refreshSolverMonitor();
    }
-   this->setResidue( ResidueGetter::getResidue( *matrix, x, b, bNorm ) );
+   this->setResidue( LinearResidueGetter::getResidue( *this->matrix, x, b, bNorm ) );
    this->refreshSolverMonitor( true );
    return this->checkConvergence();
 };
diff --git a/src/TNL/Solvers/Linear/TFQMR.h b/src/TNL/Solvers/Linear/TFQMR.h
index abf37d737298e27395c1dbb4015f398582cf17a2..e693032a3f4f834f73d76a5952693fff01338891 100644
--- a/src/TNL/Solvers/Linear/TFQMR.h
+++ b/src/TNL/Solvers/Linear/TFQMR.h
@@ -10,65 +10,36 @@
 
 #pragma once
 
-#include <math.h>
-#include <TNL/Object.h>
-#include <TNL/SharedPointer.h>
+#include "LinearSolver.h"
+
 #include <TNL/Containers/Vector.h>
-#include <TNL/Solvers/Linear/Preconditioners/Dummy.h>
-#include <TNL/Solvers/IterativeSolver.h>
-#include <TNL/Solvers/Linear/LinearResidueGetter.h>
 
 namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix,
-          typename Preconditioner = Preconditioners::Dummy< typename Matrix :: RealType,
-                                                            typename Matrix :: DeviceType,
-                                                            typename Matrix :: IndexType> >
-
-class TFQMR : public Object,
-              public IterativeSolver< typename Matrix :: RealType,
-                                      typename Matrix :: IndexType >
+template< typename Matrix >
+class TFQMR
+: public LinearSolver< Matrix >
 {
-   public:
-
-   typedef typename Matrix::RealType RealType;
-   typedef typename Matrix::IndexType IndexType;
-   typedef typename Matrix::DeviceType DeviceType;
-   typedef Matrix MatrixType;
-   typedef Preconditioner PreconditionerType;
-   typedef SharedPointer< const MatrixType, DeviceType > MatrixPointer;
-   typedef SharedPointer< const PreconditionerType, DeviceType > PreconditionerPointer;
-
-   TFQMR();
+   using Base = LinearSolver< Matrix >;
+public:
+   using RealType = typename Base::RealType;
+   using DeviceType = typename Base::DeviceType;
+   using IndexType = typename Base::IndexType;
+   using VectorViewType = typename Base::VectorViewType;
+   using ConstVectorViewType = typename Base::ConstVectorViewType;
 
    String getType() const;
 
-   static void configSetup( Config::ConfigDescription& config,
-                            const String& prefix = "" );
-
-   bool setup( const Config::ParameterContainer& parameters,
-               const String& prefix = "" );
-
-   void setMatrix( const MatrixPointer& matrix );
-
-   void setPreconditioner( const PreconditionerPointer& preconditioner );
-
-   template< typename Vector,
-             typename ResidueGetter = LinearResidueGetter< Matrix, Vector >  >
-   bool solve( const Vector& b, Vector& x );
-
-   protected:
+   bool solve( ConstVectorViewType b, VectorViewType x ) override;
 
+protected:
    void setSize( IndexType size );
 
    Containers::Vector< RealType, DeviceType, IndexType > d, r, w, u, v, r_ast, Au, M_tmp;
 
-   IndexType size;
-
-   MatrixPointer matrix;
-   PreconditionerPointer preconditioner;
+   IndexType size = 0;
 };
 
 } // namespace Linear
diff --git a/src/TNL/Solvers/Linear/TFQMR_impl.h b/src/TNL/Solvers/Linear/TFQMR_impl.h
index 7f6546a75caa6fe1dc4c09de47a694b46a4f02df..f87d961520fc0bf7142564ad065c5bae49534bf0 100644
--- a/src/TNL/Solvers/Linear/TFQMR_impl.h
+++ b/src/TNL/Solvers/Linear/TFQMR_impl.h
@@ -10,103 +10,56 @@
 
 #pragma once
 
+#include <cmath>
+
 #include "TFQMR.h"
 
 namespace TNL {
 namespace Solvers {
 namespace Linear {
 
-template< typename Matrix,
-          typename Preconditioner >
-TFQMR< Matrix, Preconditioner > :: TFQMR()
-: size( 0 )
-{
-   /****
-    * Clearing the shared pointer means that there is no
-    * preconditioner set.
-    */
-   this->preconditioner.clear();   
-}
-
-template< typename Matrix,
-          typename Preconditioner >
-String TFQMR< Matrix, Preconditioner > :: getType() const
+template< typename Matrix >
+String TFQMR< Matrix > :: getType() const
 {
    return String( "TFQMR< " ) +
           this->matrix -> getType() + ", " +
           this->preconditioner -> getType() + " >";
 }
 
-template< typename Matrix,
-          typename Preconditioner >
-void
-TFQMR< Matrix, Preconditioner >::
-configSetup( Config::ConfigDescription& config,
-             const String& prefix )
+template< typename Matrix >
+bool TFQMR< Matrix >::solve( ConstVectorViewType b, VectorViewType x )
 {
-   //IterativeSolver< RealType, IndexType >::configSetup( config, prefix );
-}
-
-template< typename Matrix,
-          typename Preconditioner >
-bool
-TFQMR< Matrix, Preconditioner >::
-setup( const Config::ParameterContainer& parameters,
-       const String& prefix )
-{
-   return IterativeSolver< RealType, IndexType >::setup( parameters, prefix );
-}
-
-template< typename Matrix,
-          typename Preconditioner >
-void TFQMR< Matrix, Preconditioner > :: setMatrix( const MatrixPointer& matrix )
-{
-   this->matrix = matrix;
-}
-
-template< typename Matrix,
-          typename Preconditioner >
-void TFQMR< Matrix, Preconditioner > :: setPreconditioner( const PreconditionerPointer& preconditioner )
-{
-   this->preconditioner = preconditioner;
-}
-
-template< typename Matrix,
-          typename Preconditioner >
-   template< typename Vector, typename ResidueGetter >
-bool TFQMR< Matrix, Preconditioner >::solve( const Vector& b, Vector& x )
-{
-   this->setSize( matrix -> getRows() );
+   this->setSize( this->matrix->getRows() );
 
    RealType tau, theta, eta, rho, alpha, b_norm, w_norm;
 
-   if( preconditioner ) {
-      preconditioner -> solve( b, M_tmp );
+   if( this->preconditioner ) {
+      this->preconditioner->solve( b, M_tmp );
       b_norm = M_tmp. lpNorm( ( RealType ) 2.0 );
 
-      matrix -> vectorProduct( x, M_tmp );
+      this->matrix->vectorProduct( x, M_tmp );
       M_tmp.addVector( b, 1.0, -1.0 );
-      preconditioner -> solve( M_tmp, r );
+      this->preconditioner->solve( M_tmp, r );
    }
    else {
-      b_norm = b. lpNorm( 2.0 );
-      matrix -> vectorProduct( x, r );
+      b_norm = b.lpNorm( 2.0 );
+      this->matrix->vectorProduct( x, r );
       r.addVector( b, 1.0, -1.0 );
    }
    w = u = r;
-   if( preconditioner ) {
-      matrix -> vectorProduct( u, M_tmp );
-      preconditioner -> solve( M_tmp, Au );
+   if( this->preconditioner ) {
+      this->matrix->vectorProduct( u, M_tmp );
+      this->preconditioner->solve( M_tmp, Au );
    }
    else {
-      matrix -> vectorProduct( u, Au );
+      this->matrix->vectorProduct( u, Au );
    }
    v = Au;
-   d. setValue( 0.0 );
-   tau = r. lpNorm( 2.0 );
+   d.setValue( 0.0 );
+   tau = r.lpNorm( 2.0 );
    theta = eta = 0.0;
    r_ast = r;
-   rho = r_ast. scalarProduct( r );
+   rho = r_ast.scalarProduct( r );
    // only to avoid compiler warning; alpha is initialized inside the loop
    alpha = 0.0;
 
@@ -126,12 +79,12 @@ bool TFQMR< Matrix, Preconditioner >::solve( const Vector& b, Vector& x )
       else {
          // not necessary in odd iter since the previous iteration
          // already computed v_{m+1} = A*u_{m+1}
-         if( preconditioner ) {
-            matrix -> vectorProduct( u, M_tmp );
-            preconditioner -> solve( M_tmp, Au );
+         if( this->preconditioner ) {
+            this->matrix->vectorProduct( u, M_tmp );
+            this->preconditioner->solve( M_tmp, Au );
          }
          else {
-            matrix -> vectorProduct( u, Au );
+            this->matrix->vectorProduct( u, Au );
          }
       }
       w.addVector( Au, -alpha );
@@ -149,25 +102,25 @@ bool TFQMR< Matrix, Preconditioner >::solve( const Vector& b, Vector& x )
       }
 
       if( iter % 2 == 0 ) {
-         const RealType rho_new  = w. scalarProduct( this->r_ast );
+         const RealType rho_new  = w.scalarProduct( this->r_ast );
          const RealType beta = rho_new / rho;
          rho = rho_new;
 
          u.addVector( w, 1.0, beta );
          v.addVector( Au, beta, beta * beta );
-         if( preconditioner ) {
-            matrix -> vectorProduct( u, M_tmp );
-            preconditioner -> solve( M_tmp, Au );
+         if( this->preconditioner ) {
+            this->matrix->vectorProduct( u, M_tmp );
+            this->preconditioner->solve( M_tmp, Au );
          }
          else {
-            matrix -> vectorProduct( u, Au );
+            this->matrix->vectorProduct( u, Au );
          }
          v.addVector( Au, 1.0 );
       }
       else {
          u.addVector( v, -alpha );
       }
- 
+
       this->refreshSolverMonitor();
    }
 
@@ -175,9 +128,8 @@ bool TFQMR< Matrix, Preconditioner >::solve( const Vector& b, Vector& x )
    return this->checkConvergence();
 }
 
-template< typename Matrix,
-          typename Preconditioner >
-void TFQMR< Matrix, Preconditioner > :: setSize( IndexType size )
+template< typename Matrix >
+void TFQMR< Matrix > :: setSize( IndexType size )
 {
    if( this->size == size )
       return;
diff --git a/src/TNL/Solvers/Linear/UmfpackWrapper.h b/src/TNL/Solvers/Linear/UmfpackWrapper.h
index 972e33e6ec875120078f46719ffef5d2c714c5af..1d4e67ea2771e6d389eb4e84ca199423d927fd16 100644
--- a/src/TNL/Solvers/Linear/UmfpackWrapper.h
+++ b/src/TNL/Solvers/Linear/UmfpackWrapper.h
@@ -16,17 +16,14 @@
 
 #include <umfpack.h>
 
-#include <TNL/Object.h>
-#include <TNL/Config/ConfigDescription.h>
+#include "LinearSolver.h"
+
 #include <TNL/Matrices/CSR.h>
-#include <TNL/Solvers/Linear/Preconditioners/Dummy.h>
-#include <TNL/Solvers/IterativeSolver.h>
-#include <TNL/Solvers/Linear/LinearResidueGetter.h>
 
 
 namespace TNL {
 namespace Solvers {
-namespace Linear {   
+namespace Linear {
 
 template< typename Matrix >
 struct is_csr_matrix
@@ -41,99 +38,52 @@ struct is_csr_matrix< Matrices::CSR< Real, Device, Index > >
 };
 
 
-template< typename Matrix,
-          typename Preconditioner = Preconditioners::Dummy< typename Matrix :: RealType,
-                                                            typename Matrix :: DeviceType,
-                                                            typename Matrix :: IndexType> >
+template< typename Matrix >
 class UmfpackWrapper
-    : public Object,
-      // just to ensure the same interface as other linear solvers
-      public IterativeSolver< typename Matrix::RealType,
-                              typename Matrix::IndexType >
+: public LinearSolver< Matrix >
 {
+   using Base = LinearSolver< Matrix >;
 public:
-    typedef typename Matrix :: RealType RealType;
-    typedef typename Matrix :: IndexType IndexType;
-    typedef typename Matrix :: DeviceType DeviceType;
-    typedef Matrix MatrixType;
-    typedef Preconditioner PreconditionerType;
-    typedef SharedPointer< const MatrixType, DeviceType > MatrixPointer;
-    typedef SharedPointer< const PreconditionerType, DeviceType > PreconditionerPointer;
-
-    UmfpackWrapper()
-    {
-        if( ! is_csr_matrix< Matrix >::value )
-            std::cerr << "The UmfpackWrapper solver is available only for CSR matrices." << std::endl;
-        if( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value )
-            std::cerr << "The UmfpackWrapper solver is not available on CUDA." << std::endl;
-        if( ! std::is_same< RealType, double >::value )
-            std::cerr << "The UmfpackWrapper solver is available only for double precision." << std::endl;
-        if( ! std::is_same< IndexType, int >::value )
-            std::cerr << "The UmfpackWrapper solver is available only for 'int' index type." << std::endl;
-    }
-
-    static void configSetup( Config::ConfigDescription& config,
-                             const String& prefix = "" )
-    {}
-
-    bool setup( const Config::ParameterContainer& parameters,
-                const String& prefix = "" )
-    {
-        return false;
-    }
-
-    void setMatrix( const MatrixPointer& matrix )
-    {}
-
-    void setPreconditioner( const PreconditionerPointer& preconditioner )
-    {}
-
-    template< typename Vector,
-              typename ResidueGetter = LinearResidueGetter< MatrixType, Vector > >
-    bool solve( const Vector& b, Vector& x )
-    {
-        return false;
-    }
+   using RealType = typename Base::RealType;
+   using DeviceType = typename Base::DeviceType;
+   using IndexType = typename Base::IndexType;
+   using VectorViewType = typename Base::VectorViewType;
+   using ConstVectorViewType = typename Base::ConstVectorViewType;
+
+   UmfpackWrapper()
+   {
+      if( ! is_csr_matrix< Matrix >::value )
+         std::cerr << "The UmfpackWrapper solver is available only for CSR matrices." << std::endl;
+      if( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value )
+         std::cerr << "The UmfpackWrapper solver is not available on CUDA." << std::endl;
+      if( ! std::is_same< RealType, double >::value )
+         std::cerr << "The UmfpackWrapper solver is available only for double precision." << std::endl;
+      if( ! std::is_same< IndexType, int >::value )
+         std::cerr << "The UmfpackWrapper solver is available only for 'int' index type." << std::endl;
+   }
+
+   bool solve( ConstVectorViewType b, VectorViewType x ) override
+   {
+       return false;
+   }
 };
 
 
-template< typename Preconditioner >
-class UmfpackWrapper< Matrices::CSR< double, Devices::Host, int >, Preconditioner >
-    : public Object,
-      // just to ensure the same interface as other linear solvers
-      public IterativeSolver< double, int >
+template<>
+class UmfpackWrapper< Matrices::CSR< double, Devices::Host, int > >
+: public LinearSolver< Matrices::CSR< double, Devices::Host, int > >
 {
+   using Base = LinearSolver< Matrices::CSR< double, Devices::Host, int > >;
 public:
-    typedef double RealType;
-    typedef int IndexType;
-    typedef Devices::Host DeviceType;
-    typedef Matrices::CSR< double, Devices::Host, int > MatrixType;
-    typedef Preconditioner PreconditionerType;
-    typedef SharedPointer< const MatrixType, DeviceType > MatrixPointer;
-    typedef SharedPointer< const PreconditionerType, DeviceType > PreconditionerPointer;
-
-    UmfpackWrapper();
-
-    String getType() const;
-
-    static void configSetup( Config::ConfigDescription& config,
-                             const String& prefix = "" );
-
-    bool setup( const Config::ParameterContainer& parameters,
-               const String& prefix = "" );
-
-    void setMatrix( const MatrixPointer& matrix );
-
-    void setPreconditioner( const PreconditionerPointer& preconditioner );
-
-    template< typename Vector,
-              typename ResidueGetter = LinearResidueGetter< MatrixType, Vector > >
-    bool solve( const Vector& b, Vector& x );
+   using RealType = typename Base::RealType;
+   using DeviceType = typename Base::DeviceType;
+   using IndexType = typename Base::IndexType;
+   using VectorViewType = typename Base::VectorViewType;
+   using ConstVectorViewType = typename Base::ConstVectorViewType;
 
-protected:
-   MatrixPointer matrix;
+   String getType() const;
 
-   PreconditionerPointer preconditioner;
+   bool solve( ConstVectorViewType b, VectorViewType x ) override;
 };
 
 } // namespace Linear
diff --git a/src/TNL/Solvers/Linear/UmfpackWrapper_impl.h b/src/TNL/Solvers/Linear/UmfpackWrapper_impl.h
index ebc401d633e9e2792bcff1b16ebcb9bc88646a09..1a86290998961717cb274c5948a353299ed02df8 100644
--- a/src/TNL/Solvers/Linear/UmfpackWrapper_impl.h
+++ b/src/TNL/Solvers/Linear/UmfpackWrapper_impl.h
@@ -16,58 +16,20 @@
 
 #include "UmfpackWrapper.h"
 
+#include <TNL/Solvers/Linear/LinearResidueGetter.h>
+
 namespace TNL {
 namespace Solvers {
-namespace Linear {   
-
-template< typename Preconditioner >
-UmfpackWrapper< Matrices::CSR< double, Devices::Host, int >, Preconditioner >::
-UmfpackWrapper()
-{}
-
-template< typename Preconditioner >
-void
-UmfpackWrapper< Matrices::CSR< double, Devices::Host, int >, Preconditioner >::
-configSetup( Config::ConfigDescription& config,
-             const String& prefix )
-{
-}
-
-template< typename Preconditioner >
-bool
-UmfpackWrapper< Matrices::CSR< double, Devices::Host, int >, Preconditioner >::
-setup( const Config::ParameterContainer& parameters,
-       const String& prefix )
-{
-    return true;    
-}
-
-template< typename Preconditioner >
-void UmfpackWrapper< Matrices::CSR< double, Devices::Host, int >, Preconditioner >::
-setMatrix( const MatrixPointer& matrix )
-{
-    this -> matrix = matrix;
-}
+namespace Linear {
 
-template< typename Preconditioner >
-void UmfpackWrapper< Matrices::CSR< double, Devices::Host, int >, Preconditioner >::
-setPreconditioner( const PreconditionerPointer& preconditioner )
+bool UmfpackWrapper< Matrices::CSR< double, Devices::Host, int > >::
+solve( ConstVectorViewType b, VectorViewType x )
 {
-    this -> preconditioner = preconditioner;
-}
+    TNL_ASSERT_EQ( this->matrix->getRows(), this->matrix->getColumns(), "matrix must be square" );
+    TNL_ASSERT_EQ( this->matrix->getColumns(), x.getSize(), "wrong size of the solution vector" );
+    TNL_ASSERT_EQ( this->matrix->getColumns(), b.getSize(), "wrong size of the right hand side" );
 
-
-template< typename Preconditioner >
-    template< typename Vector, typename ResidueGetter >
-bool UmfpackWrapper< Matrices::CSR< double, Devices::Host, int >, Preconditioner >::
-solve( const Vector& b,
-       Vector& x )
-{
-    TNL_ASSERT_EQ( matrix->getRows(), matrix->getColumns(), "matrix must be square" );
-    TNL_ASSERT_EQ( matrix->getColumns(), x.getSize(), "wrong size of the solution vector" );
-    TNL_ASSERT_EQ( matrix->getColumns(), b.getSize(), "wrong size of the right hand side" );
-
-    const IndexType size = matrix -> getRows();
+    const IndexType size = this->matrix->getRows();
 
     this->resetIterations();
     this->setResidue( this -> getConvergenceResidue() + 1.0 );
@@ -88,9 +50,9 @@ solve( const Vector& b,
 
     // symbolic reordering of the sparse matrix
     status = umfpack_di_symbolic( size, size,
-                                  matrix->getRowPointers().getData(),
-                                  matrix->getColumnIndexes().getData(),
-                                  matrix->getValues().getData(),
+                                  this->matrix->getRowPointers().getData(),
+                                  this->matrix->getColumnIndexes().getData(),
+                                  this->matrix->getValues().getData(),
                                   &Symbolic, Control, Info );
     if( status != UMFPACK_OK ) {
         std::cerr << "error: symbolic reordering failed" << std::endl;
@@ -98,9 +60,9 @@ solve( const Vector& b,
     }
 
     // numeric factorization
-    status = umfpack_di_numeric( matrix->getRowPointers().getData(),
-                                 matrix->getColumnIndexes().getData(),
-                                 matrix->getValues().getData(),
+    status = umfpack_di_numeric( this->matrix->getRowPointers().getData(),
+                                 this->matrix->getColumnIndexes().getData(),
+                                 this->matrix->getValues().getData(),
                                  Symbolic, &Numeric, Control, Info );
     if( status != UMFPACK_OK ) {
         std::cerr << "error: numeric factorization failed" << std::endl;
@@ -109,9 +71,9 @@ solve( const Vector& b,
 
     // solve with specified right-hand-side
     status = umfpack_di_solve( system_type,
-                               matrix->getRowPointers().getData(),
-                               matrix->getColumnIndexes().getData(),
-                               matrix->getValues().getData(),
+                               this->matrix->getRowPointers().getData(),
+                               this->matrix->getColumnIndexes().getData(),
+                               this->matrix->getValues().getData(),
                                x.getData(),
                                b.getData(),
                                Numeric, Control, Info );
@@ -134,10 +96,10 @@ finished:
     if( Numeric )
         umfpack_di_free_numeric( &Numeric );
 
-    this->setResidue( ResidueGetter::getResidue( *matrix, x, b, bNorm ) );
+    this->setResidue( LinearResidueGetter::getResidue( *this->matrix, x, b, bNorm ) );
     this->refreshSolverMonitor( true );
     return status == UMFPACK_OK;
-};
+}
 
 } // namespace Linear
 } // namespace Solvers
diff --git a/src/TNL/Solvers/LinearSolverTypeResolver.h b/src/TNL/Solvers/LinearSolverTypeResolver.h
new file mode 100644
index 0000000000000000000000000000000000000000..8842038009c8a75f38b14799d5f192d495b06843
--- /dev/null
+++ b/src/TNL/Solvers/LinearSolverTypeResolver.h
@@ -0,0 +1,86 @@
+/***************************************************************************
+                          LinearSolverTypeResolver.h  -  description
+                             -------------------
+    begin                : Sep 4, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// Implemented by: Jakub Klinkovský
+
+#pragma once
+
+#include <memory>
+
+#include <TNL/Solvers/Linear/SOR.h>
+#include <TNL/Solvers/Linear/CG.h>
+#include <TNL/Solvers/Linear/BICGStab.h>
+#include <TNL/Solvers/Linear/BICGStabL.h>
+#include <TNL/Solvers/Linear/GMRES.h>
+#include <TNL/Solvers/Linear/CWYGMRES.h>
+#include <TNL/Solvers/Linear/TFQMR.h>
+#include <TNL/Solvers/Linear/UmfpackWrapper.h>
+#include <TNL/Solvers/Linear/Preconditioners/Diagonal.h>
+#include <TNL/Solvers/Linear/Preconditioners/ILU0.h>
+#include <TNL/Solvers/Linear/Preconditioners/ILUT.h>
+
+namespace TNL {
+namespace Solvers {
+
+template< typename MatrixType >
+std::shared_ptr< Linear::LinearSolver< MatrixType > >
+getLinearSolver( const Config::ParameterContainer& parameters )
+{
+   const String& discreteSolver = parameters.getParameter< String>( "discrete-solver" );
+
+   if( discreteSolver == "sor" )
+      return std::make_shared< Linear::SOR< MatrixType > >();
+   if( discreteSolver == "cg" )
+      return std::make_shared< Linear::CG< MatrixType > >();
+   if( discreteSolver == "bicgstab" )
+      return std::make_shared< Linear::BICGStab< MatrixType > >();
+   if( discreteSolver == "bicgstabl" )
+      return std::make_shared< Linear::BICGStabL< MatrixType > >();
+   if( discreteSolver == "gmres" )
+      return std::make_shared< Linear::GMRES< MatrixType > >();
+   if( discreteSolver == "cwygmres" )
+      return std::make_shared< Linear::CWYGMRES< MatrixType > >();
+   if( discreteSolver == "tfqmr" )
+      return std::make_shared< Linear::TFQMR< MatrixType > >();
+#ifdef HAVE_UMFPACK
+   if( discreteSolver == "umfpack" )
+      return std::make_shared< Linear::UmfpackWrapper< MatrixType > >();
+#endif
+
+   std::cerr << "Unknown semi-implicit discrete solver " << discreteSolver << ". It can be only: sor, cg, bicgstab, bicgstabl, gmres, cwygmres, tfqmr";
+#ifdef HAVE_UMFPACK
+   std::cerr << ", umfpack"
+#endif
+   std::cerr << "." << std::endl;
+
+   return nullptr;
+}
+
+template< typename MatrixType >
+std::shared_ptr< Linear::Preconditioners::Preconditioner< MatrixType > >
+getPreconditioner( const Config::ParameterContainer& parameters )
+{
+   const String& preconditioner = parameters.getParameter< String>( "preconditioner" );
+
+   if( preconditioner == "none" )
+      return nullptr;
+   if( preconditioner == "diagonal" )
+      return std::make_shared< Linear::Preconditioners::Diagonal< MatrixType > >();
+   if( preconditioner == "ilu0" )
+      return std::make_shared< Linear::Preconditioners::ILU0< MatrixType > >();
+   if( preconditioner == "ilut" )
+      return std::make_shared< Linear::Preconditioners::ILUT< MatrixType > >();
+
+   std::cerr << "Unknown preconditioner " << preconditioner << ". It can be only: none, diagonal, ilu0, ilut." << std::endl;
+   return nullptr;
+}
+
+} // namespace Solvers
+} // namespace TNL
diff --git a/src/TNL/Solvers/ODE/Euler.h b/src/TNL/Solvers/ODE/Euler.h
index 24942b098b32bdacb946683653d619fc740cf6db..5971fb8c040d261d8a8fde62b08392ed30d7882a 100644
--- a/src/TNL/Solvers/ODE/Euler.h
+++ b/src/TNL/Solvers/ODE/Euler.h
@@ -30,12 +30,12 @@ class Euler : public ExplicitSolver< Problem >
    typedef typename Problem :: RealType RealType;
    typedef typename Problem :: DeviceType DeviceType;
    typedef typename Problem :: IndexType IndexType;
-   typedef SharedPointer< DofVectorType, DeviceType > DofVectorPointer;
+   typedef Pointers::SharedPointer<  DofVectorType, DeviceType > DofVectorPointer;
 
 
    Euler();
 
-   String getType() const;
+   static String getType();
 
    static void configSetup( Config::ConfigDescription& config,
                             const String& prefix = "" );
diff --git a/src/TNL/Solvers/ODE/Euler_impl.h b/src/TNL/Solvers/ODE/Euler_impl.h
index 26f32f44ffff76e99283b07c7c2b1490e5325c9d..debfeb7c204b77c97d694d456ad048ff700ce303 100644
--- a/src/TNL/Solvers/ODE/Euler_impl.h
+++ b/src/TNL/Solvers/ODE/Euler_impl.h
@@ -34,7 +34,7 @@ Euler< Problem > :: Euler()
 };
 
 template< typename Problem >
-String Euler< Problem > :: getType() const
+String Euler< Problem > :: getType()
 {
    return String( "Euler< " ) +
           Problem :: getType() +
@@ -127,6 +127,7 @@ bool Euler< Problem > :: solve( DofVectorPointer& u )
        */
       if( currentTau + time == this -> stopTime ) this->setResidue( lastResidue );
       time += currentTau;
+      this->problem->applyBoundaryConditions( time, u );
 
       if( ! this->nextIteration() )
          return this->checkConvergence();
diff --git a/src/TNL/Solvers/ODE/ExplicitSolver.h b/src/TNL/Solvers/ODE/ExplicitSolver.h
index 7ac4f14ecd4bc1eb0ae827a3f67dfd2de4dd1628..4e20911de02879126c3b4360167cea39e5ae6343 100644
--- a/src/TNL/Solvers/ODE/ExplicitSolver.h
+++ b/src/TNL/Solvers/ODE/ExplicitSolver.h
@@ -18,7 +18,7 @@
 #include <TNL/Solvers/IterativeSolver.h>
 #include <TNL/Config/ConfigDescription.h>
 #include <TNL/Config/ParameterContainer.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 #include <TNL/Containers/Vector.h>
 
 namespace TNL {
@@ -27,7 +27,7 @@ namespace ODE {
 
 template< class Problem >
 class ExplicitSolver : public IterativeSolver< typename Problem::RealType,
-                                                     typename Problem::IndexType >
+                                               typename Problem::IndexType >
 {
    public:
  
@@ -36,7 +36,7 @@ class ExplicitSolver : public IterativeSolver< typename Problem::RealType,
    typedef typename Problem :: RealType RealType;
    typedef typename Problem :: DeviceType DeviceType;
    typedef typename Problem :: IndexType IndexType;
-   typedef SharedPointer< DofVectorType, DeviceType > DofVectorPointer;
+   typedef Pointers::SharedPointer<  DofVectorType, DeviceType > DofVectorPointer;
    typedef IterativeSolverMonitor< RealType, IndexType > SolverMonitorType;
 
    ExplicitSolver();
diff --git a/src/TNL/Solvers/ODE/Merson.h b/src/TNL/Solvers/ODE/Merson.h
index b4570ce2e96d729ce4a9c9b90184f73498e705ae..d39cf169fa3ddce131b3884ef00a250dc63d457e 100644
--- a/src/TNL/Solvers/ODE/Merson.h
+++ b/src/TNL/Solvers/ODE/Merson.h
@@ -28,11 +28,11 @@ class Merson : public ExplicitSolver< Problem >
    typedef typename Problem :: RealType RealType;
    typedef typename Problem :: DeviceType DeviceType;
    typedef typename Problem :: IndexType IndexType;
-   typedef SharedPointer< DofVectorType, DeviceType > DofVectorPointer;
+   typedef Pointers::SharedPointer<  DofVectorType, DeviceType > DofVectorPointer;
 
    Merson();
 
-   String getType() const;
+   static String getType();
 
    static void configSetup( Config::ConfigDescription& config,
                             const String& prefix = "" );
@@ -58,8 +58,9 @@ class Merson : public ExplicitSolver< Problem >
 
    RealType computeError( const RealType tau );
 
-   void computeNewTimeLevel( DofVectorPointer& u,
-                             RealType tau,
+   void computeNewTimeLevel( const RealType time,
+                             const RealType tau,
+                             DofVectorPointer& u,
                              RealType& currentResidue );
 
    void writeGrids( const DofVectorPointer& u );
diff --git a/src/TNL/Solvers/ODE/Merson_impl.h b/src/TNL/Solvers/ODE/Merson_impl.h
index 1775c0fdad41eec0e160748d68afb5ada0d0487b..53ccc9fd2d2f34aca2d53dbce957ee707756ed96 100644
--- a/src/TNL/Solvers/ODE/Merson_impl.h
+++ b/src/TNL/Solvers/ODE/Merson_impl.h
@@ -96,10 +96,10 @@ Merson< Problem > :: Merson()
 };
 
 template< typename Problem >
-String Merson< Problem > :: getType() const
+String Merson< Problem > :: getType()
 {
    return String( "Merson< " ) +
-          Problem :: getType() +
+          Problem::getType() +
           String( " >" );
 };
 
@@ -193,15 +193,16 @@ bool Merson< Problem > :: solve( DofVectorPointer& u )
       {
          RealType lastResidue = this->getResidue();
          RealType newResidue( 0.0 );
-         computeNewTimeLevel( u, currentTau, newResidue );
+         time += currentTau;
+         computeNewTimeLevel( time, currentTau, u, newResidue );
          this->setResidue( newResidue );
  
          /****
           * When time is close to stopTime the new residue
           * may be inaccurate significantly.
           */
-         if( currentTau + time == this->stopTime ) this->setResidue( lastResidue );
-         time += currentTau;
+         if( abs( time - this->stopTime ) < 1.0e-7 ) this->setResidue( lastResidue );
+         
 
          if( ! this->nextIteration() )
             return false;
@@ -243,8 +244,8 @@ bool Merson< Problem > :: solve( DofVectorPointer& u )
 
 template< typename Problem >
 void Merson< Problem >::computeKFunctions( DofVectorPointer& u,
-                                                    const RealType& time,
-                                                    RealType tau )
+                                             const RealType& time,
+                                             RealType tau )
 {
    IndexType size = u->getSize();
 
@@ -256,16 +257,6 @@ void Merson< Problem >::computeKFunctions( DofVectorPointer& u,
    RealType* _kAux = kAux->getData();
    RealType* _u = u->getData();
 
-   /****
-    * Compute data transfers statistics
-    */
-   k1->touch( 4 );
-   k2->touch( 1 );
-   k3->touch( 2 );
-   k4->touch( 1 );
-   kAux->touch( 4 );
-   u->touch( 4 );
-
    RealType tau_3 = tau / 3.0;
 
    if( std::is_same< DeviceType, Devices::Host >::value )
@@ -277,6 +268,7 @@ void Merson< Problem >::computeKFunctions( DofVectorPointer& u,
    #endif
       for( IndexType i = 0; i < size; i ++ )
          _kAux[ i ] = _u[ i ] + tau * ( 1.0 / 3.0 * _k1[ i ] );
+      this->problem->applyBoundaryConditions( time + tau_3, kAux );
       this->problem->getExplicitUpdate( time + tau_3, tau, kAux, k2 );
 
    #ifdef HAVE_OPENMP
@@ -284,6 +276,7 @@ void Merson< Problem >::computeKFunctions( DofVectorPointer& u,
    #endif
       for( IndexType i = 0; i < size; i ++ )
          _kAux[ i ] = _u[ i ] + tau * 1.0 / 6.0 * ( _k1[ i ] + _k2[ i ] );
+      this->problem->applyBoundaryConditions( time + tau_3, kAux );
       this->problem->getExplicitUpdate( time + tau_3, tau, kAux, k3 );
 
    #ifdef HAVE_OPENMP
@@ -291,6 +284,7 @@ void Merson< Problem >::computeKFunctions( DofVectorPointer& u,
    #endif
       for( IndexType i = 0; i < size; i ++ )
          _kAux[ i ] = _u[ i ] + tau * ( 0.125 * _k1[ i ] + 0.375 * _k3[ i ] );
+      this->problem->applyBoundaryConditions( time + 0.5 * tau, kAux );
       this->problem->getExplicitUpdate( time + 0.5 * tau, tau, kAux, k4 );
 
    #ifdef HAVE_OPENMP
@@ -298,6 +292,7 @@ void Merson< Problem >::computeKFunctions( DofVectorPointer& u,
    #endif
       for( IndexType i = 0; i < size; i ++ )
          _kAux[ i ] = _u[ i ] + tau * ( 0.5 * _k1[ i ] - 1.5 * _k3[ i ] + 2.0 * _k4[ i ] );
+      this->problem->applyBoundaryConditions( time + tau, kAux );
       this->problem->getExplicitUpdate( time + tau, tau, kAux, k5 );
    }
    if( std::is_same< DeviceType, Devices::Cuda >::value )
@@ -319,6 +314,7 @@ void Merson< Problem >::computeKFunctions( DofVectorPointer& u,
          computeK2Arg<<< cudaBlocks, cudaBlockSize >>>( currentSize, tau, &_u[ gridOffset ], &_k1[ gridOffset ], &_kAux[ gridOffset ] );
       }
       cudaThreadSynchronize();
+      this->problem->applyBoundaryConditions( time + tau_3, kAux );
       this->problem->getExplicitUpdate( time + tau_3, tau, kAux, k2 );
       cudaThreadSynchronize();
 
@@ -329,6 +325,7 @@ void Merson< Problem >::computeKFunctions( DofVectorPointer& u,
          computeK3Arg<<< cudaBlocks, cudaBlockSize >>>( currentSize, tau, &_u[ gridOffset ], &_k1[ gridOffset ], &_k2[ gridOffset ], &_kAux[ gridOffset ] );
       }
       cudaThreadSynchronize();
+      this->problem->applyBoundaryConditions( time + tau_3, kAux );
       this->problem->getExplicitUpdate( time + tau_3, tau, kAux, k3 );
       cudaThreadSynchronize();
 
@@ -339,6 +336,7 @@ void Merson< Problem >::computeKFunctions( DofVectorPointer& u,
          computeK4Arg<<< cudaBlocks, cudaBlockSize >>>( currentSize, tau, &_u[ gridOffset ], &_k1[ gridOffset ], &_k3[ gridOffset ], &_kAux[ gridOffset ] );
       }
       cudaThreadSynchronize();
+      this->problem->applyBoundaryConditions( time + 0.5 * tau, kAux );
       this->problem->getExplicitUpdate( time + 0.5 * tau, tau, kAux, k4 );
       cudaThreadSynchronize();
 
@@ -349,6 +347,7 @@ void Merson< Problem >::computeKFunctions( DofVectorPointer& u,
          computeK5Arg<<< cudaBlocks, cudaBlockSize >>>( currentSize, tau, &_u[ gridOffset ], &_k1[ gridOffset ], &_k3[ gridOffset ], &_k4[ gridOffset ], &_kAux[ gridOffset ] );
       }
       cudaThreadSynchronize();
+      this->problem->applyBoundaryConditions( time + tau, kAux );
       this->problem->getExplicitUpdate( time + tau, tau, kAux, k5 );
       cudaThreadSynchronize();
 #endif
@@ -365,14 +364,6 @@ typename Problem :: RealType Merson< Problem > :: computeError( const RealType t
    const RealType* _k5 = k5->getData();
    RealType* _kAux = kAux->getData();
 
-   /****
-    * Compute data transfers statistics
-    */
-   k1->touch();
-   k3->touch();
-   k4->touch();
-   k5->touch();
-
    RealType eps( 0.0 ), maxEps( 0.0 );
    if( std::is_same< DeviceType, Devices::Host >::value )
    {
@@ -428,9 +419,10 @@ typename Problem :: RealType Merson< Problem > :: computeError( const RealType t
 }
 
 template< typename Problem >
-void Merson< Problem >::computeNewTimeLevel( DofVectorPointer& u,
-                                                      RealType tau,
-                                                      RealType& currentResidue )
+void Merson< Problem >::computeNewTimeLevel( const RealType time,
+                                             const RealType tau,
+                                             DofVectorPointer& u,
+                                             RealType& currentResidue )
 {
    RealType localResidue = RealType( 0.0 );
    IndexType size = k1->getSize();
@@ -439,14 +431,6 @@ void Merson< Problem >::computeNewTimeLevel( DofVectorPointer& u,
    RealType* _k4 = k4->getData();
    RealType* _k5 = k5->getData();
 
-   /****
-    * Compute data transfers statistics
-    */
-   u->touch();
-   k1->touch();
-   k4->touch();
-   k5->touch();
-
    if( std::is_same< DeviceType, Devices::Host >::value )
    {
 #ifdef HAVE_OPENMP
@@ -458,6 +442,7 @@ void Merson< Problem >::computeNewTimeLevel( DofVectorPointer& u,
          _u[ i ] += add;
          localResidue += abs( ( RealType ) add );
       }
+      this->problem->applyBoundaryConditions( time, u );
    }
    if( std::is_same< DeviceType, Devices::Cuda >::value )
    {
@@ -485,6 +470,7 @@ void Merson< Problem >::computeNewTimeLevel( DofVectorPointer& u,
          localResidue += this->cudaBlockResidue.sum();
          cudaThreadSynchronize();
       }
+      this->problem->applyBoundaryConditions( time, u );
 
 #endif
    }
diff --git a/src/TNL/Solvers/PDE/BoundaryConditionsSetter.h b/src/TNL/Solvers/PDE/BoundaryConditionsSetter.h
index 3bbace4433c33edf526a641076a8b085af965c5b..a28a64cf5567eaee40f5d4efca3fd24af3dd2819 100644
--- a/src/TNL/Solvers/PDE/BoundaryConditionsSetter.h
+++ b/src/TNL/Solvers/PDE/BoundaryConditionsSetter.h
@@ -13,7 +13,7 @@
 
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Functions/FunctionAdapter.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 #include <TNL/Meshes/Traverser.h>
 
 namespace TNL {
@@ -57,16 +57,16 @@ class BoundaryConditionsSetter
          RealType,
          MeshFunction,
          BoundaryConditions > TraverserUserData;
-      typedef SharedPointer< MeshType, DeviceType > MeshPointer;
-      typedef SharedPointer< BoundaryConditions, DeviceType > BoundaryConditionsPointer;
-      typedef SharedPointer< MeshFunction, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer<  MeshType, DeviceType > MeshPointer;
+      typedef Pointers::SharedPointer<  BoundaryConditions, DeviceType > BoundaryConditionsPointer;
+      typedef Pointers::SharedPointer<  MeshFunction, DeviceType > MeshFunctionPointer;
 
       template< typename EntityType = typename MeshType::Cell >
       static void apply( const BoundaryConditionsPointer& boundaryConditions,
                          const RealType& time,
                          MeshFunctionPointer& u )
       {
-         SharedPointer< TraverserUserData, DeviceType >
+         Pointers::SharedPointer<  TraverserUserData, DeviceType >
             userData( time,
                       &boundaryConditions.template getData< DeviceType >(),
                       &u.template modifyData< DeviceType >() );
diff --git a/src/TNL/Solvers/PDE/ExplicitTimeStepper.h b/src/TNL/Solvers/PDE/ExplicitTimeStepper.h
index dbcd45e5ba98c4d41a2dac122db96267eb93a67b..b0951390cc1078ff5f564efa4f140e0f9487ff75 100644
--- a/src/TNL/Solvers/PDE/ExplicitTimeStepper.h
+++ b/src/TNL/Solvers/PDE/ExplicitTimeStepper.h
@@ -14,7 +14,7 @@
 #include <TNL/Config/ParameterContainer.h>
 #include <TNL/Timer.h>
 #include <TNL/Logger.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 #include <TNL/Solvers/IterativeSolverMonitor.h>
 
 namespace TNL {
@@ -27,20 +27,23 @@ class ExplicitTimeStepper
 {
    public:
 
-      typedef Problem ProblemType;
-      typedef OdeSolver< ExplicitTimeStepper< Problem, OdeSolver > > OdeSolverType;
-      typedef typename Problem::RealType RealType;
-      typedef typename Problem::DeviceType DeviceType;
-      typedef typename Problem::IndexType IndexType;
-      typedef typename Problem::MeshType MeshType;
-      typedef SharedPointer< MeshType > MeshPointer;
-      typedef typename ProblemType::DofVectorType DofVectorType;
-      typedef SharedPointer< DofVectorType, DeviceType > DofVectorPointer;
-      typedef IterativeSolverMonitor< RealType, IndexType > SolverMonitorType;
+      using ProblemType = Problem;
+      using RealType = typename Problem::RealType;
+      using DeviceType = typename Problem::DeviceType;
+      using IndexType = typename Problem::IndexType;
+      using MeshType = typename Problem::MeshType;
+      using MeshPointer = Pointers::SharedPointer< MeshType >;
+      using DofVectorType = typename ProblemType::DofVectorType;
+      using DofVectorPointer = Pointers::SharedPointer< DofVectorType, DeviceType >;
+      using SolverMonitorType = IterativeSolverMonitor< RealType, IndexType >;
       using CommunicatorType = typename Problem::CommunicatorType;
+      using OdeSolverType = OdeSolver< ExplicitTimeStepper< Problem, OdeSolver > >;
+      using OdeSolverPointer = Pointers::SharedPointer< OdeSolverType, DeviceType >;
 
       static_assert( ProblemType::isTimeDependent(), "The problem is not time dependent." );
 
+      static String getType();
+      
       ExplicitTimeStepper();
 
       static void configSetup( Config::ConfigDescription& config,
@@ -72,11 +75,14 @@ class ExplicitTimeStepper
                            DofVectorPointer& _u,
                            DofVectorPointer& _fu );
 
+      void applyBoundaryConditions( const RealType& time,
+                                 DofVectorPointer& _u );
+
       bool writeEpilog( Logger& logger ) const;
 
    protected:
 
-      OdeSolverType* odeSolver;
+      OdeSolverPointer odeSolver;
 
       SolverMonitorType* solverMonitor;
 
diff --git a/src/TNL/Solvers/PDE/ExplicitTimeStepper_impl.h b/src/TNL/Solvers/PDE/ExplicitTimeStepper_impl.h
index 25056d2aa9115a1b6ed96d1df00ad3f6eb66a4f1..12b804b79c2556a71d3a1c4e855c96f449409f43 100644
--- a/src/TNL/Solvers/PDE/ExplicitTimeStepper_impl.h
+++ b/src/TNL/Solvers/PDE/ExplicitTimeStepper_impl.h
@@ -16,12 +16,24 @@ namespace TNL {
 namespace Solvers {
 namespace PDE {   
 
+template< typename Problem,
+          template < typename OdeProblem > class OdeSolver >
+String
+ExplicitTimeStepper< Problem, OdeSolver >::
+getType()
+{
+   return String( "ExplicitTimeStepper< " ) +
+          Problem::getType() + ", " +
+          OdeSolverType::getType() + ", " +
+          String( " >" );
+};
+   
+   
 template< typename Problem,
           template < typename OdeProblem > class OdeSolver >
 ExplicitTimeStepper< Problem, OdeSolver >::
 ExplicitTimeStepper()
-: odeSolver( 0 ),
-  problem( 0 ),
+: problem( 0 ),
   timeStep( 0 ),
   allIterations( 0 )
 {
@@ -43,7 +55,7 @@ ExplicitTimeStepper< Problem, OdeSolver >::
 setup( const Config::ParameterContainer& parameters,
        const String& prefix )
 {
-   return true;
+   return this->odeSolver->setup( parameters, prefix );
 }
 
 template< typename Problem,
@@ -130,7 +142,7 @@ solve( const RealType& time,
       this->odeSolver->setMaxTau( ( stopTime - time ) / ( typename OdeSolver< Problem >::RealType ) this->odeSolver->getMinIterations() );
    if( ! this->odeSolver->solve( dofVector ) )
       return false;
-   this->problem->setExplicitBoundaryConditions( stopTime, dofVector );
+   //this->problem->setExplicitBoundaryConditions( stopTime, dofVector );
    mainTimer.stop();
    this->allIterations += this->odeSolver->getIterations();
    return true;
@@ -163,7 +175,7 @@ getExplicitUpdate( const RealType& time,
       this->solverMonitor->setStage( "Explicit update" );
 
    this->explicitUpdaterTimer.start();
-   this->problem->setExplicitBoundaryConditions( time, u );
+   this->problem->applyBoundaryConditions( time, u );
    this->problem->getExplicitUpdate( time, tau, u, fu );
    this->explicitUpdaterTimer.stop();
 
@@ -180,6 +192,17 @@ getExplicitUpdate( const RealType& time,
    this->postIterateTimer.stop();
 }
 
+template< typename Problem,
+          template < typename OdeProblem > class OdeSolver >
+void
+ExplicitTimeStepper< Problem, OdeSolver >::
+applyBoundaryConditions( const RealType& time,
+                            DofVectorPointer& u )
+{
+   this->problem->applyBoundaryConditions( time, u );
+}
+
+
 template< typename Problem,
           template < typename OdeProblem > class OdeSolver >
 bool
diff --git a/src/TNL/Solvers/PDE/ExplicitUpdater.h b/src/TNL/Solvers/PDE/ExplicitUpdater.h
index 2d79bec602fa1d234a0460c75d6b4757a5b12337..042c0671413533ee7313d3f26ea5a88477401b2b 100644
--- a/src/TNL/Solvers/PDE/ExplicitUpdater.h
+++ b/src/TNL/Solvers/PDE/ExplicitUpdater.h
@@ -12,12 +12,11 @@
 
 #include <TNL/Functions/FunctionAdapter.h>
 #include <TNL/Timer.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 #include <type_traits>
 #include <TNL/Meshes/GridDetails/Traverser_Grid1D.h>
 #include <TNL/Meshes/GridDetails/Traverser_Grid2D.h>
 #include <TNL/Meshes/GridDetails/Traverser_Grid3D.h>
-#include <TNL/Solvers/PDE/ExplicitUpdater.h>
 
 #ifdef USE_MPI
     #include <TNL/Meshes/DistributedGridSynchronizer.h>
@@ -83,7 +82,7 @@ class ExplicitUpdater
 {
    public:
       typedef Mesh MeshType;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer<  MeshType > MeshPointer;
       typedef typename MeshFunction::RealType RealType;
       typedef typename MeshFunction::DeviceType DeviceType;
       typedef typename MeshFunction::IndexType IndexType;
@@ -92,25 +91,25 @@ class ExplicitUpdater
                                                 DifferentialOperator,
                                                 BoundaryConditions,
                                                 RightHandSide > TraverserUserData;
-      typedef SharedPointer< DifferentialOperator, DeviceType > DifferentialOperatorPointer;
-      typedef SharedPointer< BoundaryConditions, DeviceType > BoundaryConditionsPointer;
-      typedef SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
-      typedef SharedPointer< MeshFunction, DeviceType > MeshFunctionPointer;
-      typedef SharedPointer< TraverserUserData, DeviceType > TraverserUserDataPointer;
+      typedef Pointers::SharedPointer<  DifferentialOperator, DeviceType > DifferentialOperatorPointer;
+      typedef Pointers::SharedPointer<  BoundaryConditions, DeviceType > BoundaryConditionsPointer;
+      typedef Pointers::SharedPointer<  RightHandSide, DeviceType > RightHandSidePointer;
+      typedef Pointers::SharedPointer<  MeshFunction, DeviceType > MeshFunctionPointer;
+      typedef Pointers::SharedPointer<  TraverserUserData, DeviceType > TraverserUserDataPointer;
       
       void setDifferentialOperator( const DifferentialOperatorPointer& differentialOperatorPointer )
       {
-         this->userDataPointer->differentialOperator = &differentialOperatorPointer.template getData< DeviceType >();
+         this->userData.differentialOperator = &differentialOperatorPointer.template getData< DeviceType >();
       }
       
       void setBoundaryConditions( const BoundaryConditionsPointer& boundaryConditionsPointer )
       {
-         this->userDataPointer->boundaryConditions = &boundaryConditionsPointer.template getData< DeviceType >();
+         this->userData.boundaryConditions = &boundaryConditionsPointer.template getData< DeviceType >();
       }
       
       void setRightHandSide( const RightHandSidePointer& rightHandSidePointer )
       {
-         this->userDataPointer->rightHandSide = &rightHandSidePointer.template getData< DeviceType >();
+         this->userData.rightHandSide = &rightHandSidePointer.template getData< DeviceType >();
       }
             
       template< typename EntityType,
@@ -126,34 +125,52 @@ class ExplicitUpdater
                                                  typename MeshFunction::DeviceType,
                                                  typename MeshFunction::IndexType > >::value != true,
             "Error: I am getting Vector instead of MeshFunction or similar object. You might forget to bind DofVector into MeshFunction in you method getExplicitUpdate."  );
+         TNL_ASSERT_GT( uPointer->getData().getSize(), 0, "The first MeshFunction in the parameters was not bound." );
+         TNL_ASSERT_GT( fuPointer->getData().getSize(), 0, "The second MeshFunction in the parameters was not bound." );
+
+         TNL_ASSERT_EQ( uPointer->getData().getSize(), meshPointer->template getEntitiesCount< EntityType >(),
+                        "The first MeshFunction in the parameters was not bound properly." );
+         TNL_ASSERT_EQ( fuPointer->getData().getSize(), meshPointer->template getEntitiesCount< EntityType >(),
+                        "The second MeshFunction in the parameters was not bound properly." );
             
-         TNL_ASSERT_TRUE( this->userDataPointer->differentialOperator,
+         TNL_ASSERT_TRUE( this->userData.differentialOperator,
                           "The differential operator is not correctly set-up. Use method setDifferentialOperator() to do it." );
-         TNL_ASSERT_TRUE( this->userDataPointer->boundaryConditions, 
-                          "The boundary conditions are not correctly set-up. Use method setBoundaryCondtions() to do it." );
-         TNL_ASSERT_TRUE( this->userDataPointer->rightHandSide, 
+         TNL_ASSERT_TRUE( this->userData.rightHandSide,
                           "The right-hand side is not correctly set-up. Use method setRightHandSide() to do it." );
-         
-         
-         this->userDataPointer->time = time;
-         this->userDataPointer->u = &uPointer.template modifyData< DeviceType >();
-         this->userDataPointer->fu = &fuPointer.template modifyData< DeviceType >();
+
+         this->userData.time = time;
+         this->userData.u = &uPointer.template modifyData< DeviceType >();
+         this->userData.fu = &fuPointer.template modifyData< DeviceType >();
          Meshes::Traverser< MeshType, EntityType > meshTraverser;
          meshTraverser.template processInteriorEntities< TraverserUserData,
                                                          TraverserInteriorEntitiesProcessor >
                                                        ( meshPointer,
-                                                         userDataPointer );
-         this->userDataPointer->time = time + tau;
+                                                         userData );
+      }
+      
+      template< typename EntityType >
+      void applyBoundaryConditions( const MeshPointer& meshPointer,
+                                    const RealType& time,
+                                    MeshFunctionPointer& uPointer )
+      {
+         TNL_ASSERT_TRUE( this->userData.boundaryConditions,
+                          "The boundary conditions are not correctly set-up. Use method setBoundaryCondtions() to do it." );         
+         TNL_ASSERT_TRUE( &uPointer.template modifyData< DeviceType >(),
+                          "The function u is not correctly set-up. It was not bound probably with DOFs." );
+
+         this->userData.time = time;
+         this->userData.u = &uPointer.template modifyData< DeviceType >();         
+         Meshes::Traverser< MeshType, EntityType > meshTraverser;
          meshTraverser.template processBoundaryEntities< TraverserUserData,
                                              TraverserBoundaryEntitiesProcessor >
                                            ( meshPointer,
-                                             userDataPointer );
+                                             userData );
 
-         if(CommunicatorType::isDistributed())
-            fuPointer->template synchronize<CommunicatorType>();
+         // TODO: I think that this is not necessary
+         /*if(CommunicatorType::isDistributed())
+            fuPointer->template synchronize<CommunicatorType>();*/
 
       }
-      
          
       class TraverserBoundaryEntitiesProcessor
       {
@@ -183,29 +200,17 @@ class ExplicitUpdater
                                               TraverserUserData& userData,
                                               const EntityType& entity )
             {
-           /*    std::cerr<<"===========================================================" << std::endl; 
-               std::cerr<<"fu:" << userData.fu << std::endl; 
-               std::cerr<< "diffOp:" << userData.differentialOperator << std::endl; 
-               std::cerr<<"===========================================================" << std::endl; 
-               
-               std::cerr<<std::flush;*/
-               
-            //   int blabla;
-             //  std::cin >> blabla; 
-               
-               ( *userData.fu )( entity ) = 
-                       ( *userData.differentialOperator )( *userData.u, entity, userData.time );
-            
                typedef Functions::FunctionAdapter< MeshType, RightHandSide > FunctionAdapter;
-               (  *userData.fu )( entity ) += 
-                  FunctionAdapter::getValue( *userData.rightHandSide, entity, userData.time );
+               ( *userData.fu )( entity ) = 
+                  ( *userData.differentialOperator )( *userData.u, entity, userData.time )
+                  + FunctionAdapter::getValue( *userData.rightHandSide, entity, userData.time );
                
             }
       }; 
 
    protected:
 
-      TraverserUserDataPointer userDataPointer;
+      TraverserUserData userData;
 
 };
 
diff --git a/src/TNL/Solvers/PDE/LinearSystemAssembler.h b/src/TNL/Solvers/PDE/LinearSystemAssembler.h
index d97dd15ef530ec33833d016d4ed321a83d711df6..b47ed34d7879b01b45658b0c792a3b9a9f66c5f8 100644
--- a/src/TNL/Solvers/PDE/LinearSystemAssembler.h
+++ b/src/TNL/Solvers/PDE/LinearSystemAssembler.h
@@ -10,8 +10,9 @@
 
 #pragma once
 
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 #include <TNL/Functions/FunctionAdapter.h>
+#include <TNL/Meshes/Traverser.h>
 
 namespace TNL {
 namespace Solvers {
@@ -77,26 +78,26 @@ class LinearSystemAssembler
                                                    RightHandSide,
                                                    DofVector > TraverserUserData;
 
-   //typedef SharedPointer< Matrix, DeviceType > MatrixPointer;
-   typedef SharedPointer< DifferentialOperator, DeviceType > DifferentialOperatorPointer;
-   typedef SharedPointer< BoundaryConditions, DeviceType > BoundaryConditionsPointer;
-   typedef SharedPointer< RightHandSide, DeviceType > RightHandSidePointer;
-   typedef SharedPointer< MeshFunction, DeviceType > MeshFunctionPointer;
-   typedef SharedPointer< DofVector, DeviceType > DofVectorPointer;
+   //typedef Pointers::SharedPointer<  Matrix, DeviceType > MatrixPointer;
+   typedef Pointers::SharedPointer<  DifferentialOperator, DeviceType > DifferentialOperatorPointer;
+   typedef Pointers::SharedPointer<  BoundaryConditions, DeviceType > BoundaryConditionsPointer;
+   typedef Pointers::SharedPointer<  RightHandSide, DeviceType > RightHandSidePointer;
+   typedef Pointers::SharedPointer<  MeshFunction, DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer<  DofVector, DeviceType > DofVectorPointer;
    
    void setDifferentialOperator( const DifferentialOperatorPointer& differentialOperatorPointer )
    {
-      this->userDataPointer->differentialOperator = &differentialOperatorPointer.template getData< DeviceType >();
+      this->userData.differentialOperator = &differentialOperatorPointer.template getData< DeviceType >();
    }
 
    void setBoundaryConditions( const BoundaryConditionsPointer& boundaryConditionsPointer )
    {
-      this->userDataPointer->boundaryConditions = &boundaryConditionsPointer.template getData< DeviceType >();
+      this->userData.boundaryConditions = &boundaryConditionsPointer.template getData< DeviceType >();
    }
 
    void setRightHandSide( const RightHandSidePointer& rightHandSidePointer )
    {
-      this->userDataPointer->rightHandSide = &rightHandSidePointer.template getData< DeviceType >();
+      this->userData.rightHandSide = &rightHandSidePointer.template getData< DeviceType >();
    }
    
    template< typename EntityType, typename Matrix >
@@ -104,7 +105,7 @@ class LinearSystemAssembler
                   const RealType& tau,
                   const MeshPointer& meshPointer,
                   const MeshFunctionPointer& uPointer,
-                  SharedPointer< Matrix >& matrixPointer,
+                  Pointers::SharedPointer<  Matrix >& matrixPointer,
                   DofVectorPointer& bPointer )
    {
       static_assert( std::is_same< MeshFunction,
@@ -115,20 +116,20 @@ class LinearSystemAssembler
 
       const IndexType maxRowLength = matrixPointer.template getData< Devices::Host >().getMaxRowLength();
       TNL_ASSERT_GT( maxRowLength, 0, "maximum row length must be positive" );
-      this->userDataPointer->time = time;
-      this->userDataPointer->tau = tau;
-      this->userDataPointer->u = &uPointer.template getData< DeviceType >();
-      this->userDataPointer->matrix = ( void* ) &matrixPointer.template modifyData< DeviceType >();
-      this->userDataPointer->b = &bPointer.template modifyData< DeviceType >();
+      this->userData.time = time;
+      this->userData.tau = tau;
+      this->userData.u = &uPointer.template getData< DeviceType >();
+      this->userData.matrix = ( void* ) &matrixPointer.template modifyData< DeviceType >();
+      this->userData.b = &bPointer.template modifyData< DeviceType >();
       Meshes::Traverser< MeshType, EntityType > meshTraverser;
       meshTraverser.template processBoundaryEntities< TraverserUserData,
                                                       TraverserBoundaryEntitiesProcessor< Matrix> >
                                                     ( meshPointer,
-                                                      userDataPointer );
+                                                      userData );
       meshTraverser.template processInteriorEntities< TraverserUserData,
                                                       TraverserInteriorEntitiesProcessor< Matrix > >
                                                     ( meshPointer,
-                                                      userDataPointer );
+                                                      userData );
       
    }
 
@@ -190,7 +191,7 @@ class LinearSystemAssembler
    };
 
 protected:
-   SharedPointer< TraverserUserData, DeviceType > userDataPointer;
+   TraverserUserData userData;
 };
 
 } // namespace PDE
diff --git a/src/TNL/Solvers/PDE/PDESolverTypeResolver.h b/src/TNL/Solvers/PDE/PDESolverTypeResolver.h
index bf82fce66445ddefe5f7a2476c2eba60e6626eda..94e4fc7aebd3009b3238bb1b4b4b6f6bef011204 100644
--- a/src/TNL/Solvers/PDE/PDESolverTypeResolver.h
+++ b/src/TNL/Solvers/PDE/PDESolverTypeResolver.h
@@ -15,38 +15,33 @@
 
 namespace TNL {
 namespace Solvers {
-namespace PDE { 
-   
+namespace PDE {
+
 template< typename Problem,
-          typename DiscreteSolver,
           typename TimeStepper,
           bool TimeDependent = Problem::isTimeDependent() >
 class PDESolverTypeResolver
 {
 };
-  
+
 template< typename Problem,
-          typename DiscreteSolver,
           typename TimeStepper >
-class PDESolverTypeResolver< Problem, DiscreteSolver, TimeStepper, true >
+class PDESolverTypeResolver< Problem, TimeStepper, true >
 {
    public:
-      
-      using SolverType = TimeDependentPDESolver< Problem, DiscreteSolver, TimeStepper >;
+
+      using SolverType = TimeDependentPDESolver< Problem, TimeStepper >;
 };
 
 template< typename Problem,
-          typename DiscreteSolver,
           typename TimeStepper >
-class PDESolverTypeResolver< Problem, DiscreteSolver, TimeStepper, false >
+class PDESolverTypeResolver< Problem, TimeStepper, false >
 {
    public:
-      
-      using SolverType = TimeIndependentPDESolver< Problem, DiscreteSolver >;
+
+      using SolverType = TimeIndependentPDESolver< Problem >;
 };
-   
- 
+
 } // namespace PDE
 } // namespace Solvers
 } // namespace TNL
-
diff --git a/src/TNL/Solvers/PDE/SemiImplicitTimeStepper.h b/src/TNL/Solvers/PDE/SemiImplicitTimeStepper.h
index 8da844b186b3b0540eab034b8c3f461f6fe0eaf2..9fbd9b3c65fef58fe00b7bbe1dc5ae7a7495fa92 100644
--- a/src/TNL/Solvers/PDE/SemiImplicitTimeStepper.h
+++ b/src/TNL/Solvers/PDE/SemiImplicitTimeStepper.h
@@ -10,17 +10,19 @@
 
 #pragma once
 
+#include <memory>  // std::shared_ptr
+
 #include <TNL/Timer.h>
 #include <TNL/Logger.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 #include <TNL/Solvers/IterativeSolverMonitor.h>
+#include <TNL/Solvers/Linear/LinearSolver.h>
 
 namespace TNL {
 namespace Solvers {
-namespace PDE {   
+namespace PDE {
 
-template< typename Problem,
-          typename LinearSystemSolver >
+template< typename Problem >
 class SemiImplicitTimeStepper
 {
    public:
@@ -31,16 +33,16 @@ class SemiImplicitTimeStepper
    typedef typename Problem::IndexType IndexType;
    typedef typename Problem::MeshType MeshType;
    typedef typename Problem::MeshPointer MeshPointer;
-   typedef typename ProblemType::DofVectorType DofVectorType;   
-   typedef LinearSystemSolver LinearSystemSolverType;
-   typedef typename LinearSystemSolverType::PreconditionerType PreconditionerType;
+   typedef typename ProblemType::DofVectorType DofVectorType;
    typedef typename ProblemType::MatrixType MatrixType;
-   typedef SharedPointer< MatrixType, DeviceType > MatrixPointer;
-   typedef SharedPointer< DofVectorType, DeviceType > DofVectorPointer;
-   typedef SharedPointer< PreconditionerType, DeviceType > PreconditionerPointer;
+   typedef Pointers::SharedPointer< MatrixType, DeviceType > MatrixPointer;
+   typedef Pointers::SharedPointer< DofVectorType, DeviceType > DofVectorPointer;
    typedef IterativeSolverMonitor< RealType, IndexType > SolverMonitorType;
 
-   SemiImplicitTimeStepper();
+   using LinearSolverType = Linear::LinearSolver< MatrixType >;
+   using LinearSolverPointer = std::shared_ptr< LinearSolverType >;
+   using PreconditionerType = typename LinearSolverType::PreconditionerType;
+   using PreconditionerPointer = std::shared_ptr< PreconditionerType >;
 
    static void configSetup( Config::ConfigDescription& config,
                             const String& prefix = "" );
@@ -54,12 +56,8 @@ class SemiImplicitTimeStepper
 
    ProblemType* getProblem() const;
 
-   void setSolver( LinearSystemSolver& linearSystemSolver );
-
    void setSolverMonitor( SolverMonitorType& solverMonitor );
 
-   LinearSystemSolverType* getSolver() const;
-
    bool setTimeStep( const RealType& timeStep );
 
    const RealType& getTimeStep() const;
@@ -72,23 +70,25 @@ class SemiImplicitTimeStepper
 
    protected:
 
-   Problem* problem;
+   // raw pointers with setters
+   Problem* problem = nullptr;
+   SolverMonitorType* solverMonitor = nullptr;
 
+   // smart pointers initialized to the default-created objects
    MatrixPointer matrix;
-
    DofVectorPointer rightHandSidePointer;
 
-   LinearSystemSolver* linearSystemSolver;
-
-   SolverMonitorType* solverMonitor;
+   // uninitialized smart pointers (they are initialized in the setup method)
+   LinearSolverPointer linearSystemSolver = nullptr;
+   PreconditionerPointer preconditioner = nullptr;
 
-   RealType timeStep;
+   RealType timeStep = 0.0;
 
    Timer preIterateTimer, linearSystemAssemblerTimer, preconditionerUpdateTimer, linearSystemSolverTimer, postIterateTimer;
- 
-   bool verbose;
- 
-   long long int allIterations;
+
+   bool verbose = false;
+
+   long long int allIterations = 0;
 };
 
 } // namespace PDE
@@ -96,4 +96,3 @@ class SemiImplicitTimeStepper
 } // namespace TNL
 
 #include <TNL/Solvers/PDE/SemiImplicitTimeStepper_impl.h>
-
diff --git a/src/TNL/Solvers/PDE/SemiImplicitTimeStepper_impl.h b/src/TNL/Solvers/PDE/SemiImplicitTimeStepper_impl.h
index 80393407bd50bc56f697395b3c94e6803d4e528d..68a1163b696d899b1ba6ce20d784d8697337376d 100644
--- a/src/TNL/Solvers/PDE/SemiImplicitTimeStepper_impl.h
+++ b/src/TNL/Solvers/PDE/SemiImplicitTimeStepper_impl.h
@@ -12,54 +12,56 @@
 
 #include <TNL/Math.h>
 #include <TNL/Solvers/PDE/SemiImplicitTimeStepper.h>
-#include <TNL/Solvers/Linear/Preconditioners/Dummy.h>
+#include <TNL/Solvers/LinearSolverTypeResolver.h>
 
 namespace TNL {
 namespace Solvers {
-namespace PDE {   
-
-template< typename Problem,
-          typename LinearSystemSolver >
-SemiImplicitTimeStepper< Problem, LinearSystemSolver >::
-SemiImplicitTimeStepper()
-: problem( 0 ),
-  linearSystemSolver( 0 ),
-  timeStep( 0 ),
-  allIterations( 0 )
-{
-};
+namespace PDE {
 
-template< typename Problem,
-          typename LinearSystemSolver >
+template< typename Problem >
 void
-SemiImplicitTimeStepper< Problem, LinearSystemSolver >::
+SemiImplicitTimeStepper< Problem >::
 configSetup( Config::ConfigDescription& config,
              const String& prefix )
 {
    config.addEntry< bool >( "verbose", "Verbose mode.", true );
 }
 
-template< typename Problem,
-          typename LinearSystemSolver >
+template< typename Problem >
 bool
-SemiImplicitTimeStepper< Problem, LinearSystemSolver >::
+SemiImplicitTimeStepper< Problem >::
 setup( const Config::ParameterContainer& parameters,
-      const String& prefix )
+       const String& prefix )
 {
    this->verbose = parameters.getParameter< bool >( "verbose" );
+
+   // set up the linear solver
+   linearSystemSolver = getLinearSolver< MatrixType >( parameters );
+   if( ! linearSystemSolver )
+      return false;
+   if( ! linearSystemSolver->setup( parameters ) )
+      return false;
+
+   // set up the preconditioner
+   preconditioner = getPreconditioner< MatrixType >( parameters );
+   if( preconditioner ) {
+      linearSystemSolver->setPreconditioner( preconditioner );
+      if( ! preconditioner->setup( parameters ) )
+         return false;
+   }
+
    return true;
 }
 
-template< typename Problem,
-          typename LinearSystemSolver >
+template< typename Problem >
 bool
-SemiImplicitTimeStepper< Problem, LinearSystemSolver >::
+SemiImplicitTimeStepper< Problem >::
 init( const MeshPointer& mesh )
 {
-  std::cout << "Setting up the linear system...";
-   if( ! this->problem->setupLinearSystem( this->matrix ) )
+   if( ! this->problem->setupLinearSystem( this->matrix ) ) {
+      std::cerr << "Failed to set up the linear system." << std::endl;
       return false;
-   std::cout << " [ OK ]" << std::endl;
+   }
    if( this->matrix.getData().getRows() == 0 || this->matrix.getData().getColumns() == 0 )
    {
       std::cerr << "The matrix for the semi-implicit time stepping was not set correctly." << std::endl;
@@ -82,37 +84,25 @@ init( const MeshPointer& mesh )
    return true;
 }
 
-template< typename Problem,
-          typename LinearSystemSolver >
+template< typename Problem >
 void
-SemiImplicitTimeStepper< Problem, LinearSystemSolver >::
+SemiImplicitTimeStepper< Problem >::
 setProblem( ProblemType& problem )
 {
    this->problem = &problem;
 };
 
-template< typename Problem,
-          typename LinearSystemSolver >
+template< typename Problem >
 Problem*
-SemiImplicitTimeStepper< Problem, LinearSystemSolver >::
+SemiImplicitTimeStepper< Problem >::
 getProblem() const
 {
     return this->problem;
 };
 
-template< typename Problem,
-          typename LinearSystemSolver >
+template< typename Problem >
 void
-SemiImplicitTimeStepper< Problem, LinearSystemSolver >::
-setSolver( LinearSystemSolver& linearSystemSolver )
-{
-   this->linearSystemSolver = &linearSystemSolver;
-}
-
-template< typename Problem,
-          typename LinearSystemSolver >
-void
-SemiImplicitTimeStepper< Problem, LinearSystemSolver >::
+SemiImplicitTimeStepper< Problem >::
 setSolverMonitor( SolverMonitorType& solverMonitor )
 {
    this->solverMonitor = &solverMonitor;
@@ -120,19 +110,9 @@ setSolverMonitor( SolverMonitorType& solverMonitor )
       this->linearSystemSolver->setSolverMonitor( solverMonitor );
 }
 
-template< typename Problem,
-          typename LinearSystemSolver >
-LinearSystemSolver*
-SemiImplicitTimeStepper< Problem, LinearSystemSolver >::
-getSolver() const
-{
-   return this->linearSystemSolver;
-}
-
-template< typename Problem,
-          typename LinearSystemSolver >
+template< typename Problem >
 bool
-SemiImplicitTimeStepper< Problem, LinearSystemSolver >::
+SemiImplicitTimeStepper< Problem >::
 setTimeStep( const RealType& timeStep )
 {
    if( timeStep <= 0.0 )
@@ -144,20 +124,19 @@ setTimeStep( const RealType& timeStep )
    return true;
 };
 
-template< typename Problem,
-          typename LinearSystemSolver >
+template< typename Problem >
 bool
-SemiImplicitTimeStepper< Problem, LinearSystemSolver >::
+SemiImplicitTimeStepper< Problem >::
 solve( const RealType& time,
        const RealType& stopTime,
        DofVectorPointer& dofVector )
 {
    TNL_ASSERT_TRUE( this->problem, "problem was not set" );
-   RealType t = time;
+
+   // set the matrix for the linear solver
    this->linearSystemSolver->setMatrix( this->matrix );
-   PreconditionerPointer preconditioner;
-   Linear::Preconditioners::SolverStarterSolverPreconditionerSetter< LinearSystemSolverType, PreconditionerType >
-       ::run( *(this->linearSystemSolver), preconditioner );
+
+   RealType t = time;
 
    // ignore very small steps at the end, most likely caused by truncation errors
    while( stopTime - t > this->timeStep * 1e-6 )
@@ -195,9 +174,12 @@ solve( const RealType& time,
       if( this->solverMonitor )
          this->solverMonitor->setStage( "Solving the linear system" );
 
-      this->preconditionerUpdateTimer.start();
-      preconditioner->update( this->matrix );
-      this->preconditionerUpdateTimer.stop();
+      if( this->preconditioner )
+      {
+         this->preconditionerUpdateTimer.start();
+         preconditioner->update( this->matrix );
+         this->preconditionerUpdateTimer.stop();
+      }
 
       this->linearSystemSolverTimer.start();
       if( ! this->linearSystemSolver->solve( *this->rightHandSidePointer, *dofVector ) )
@@ -229,10 +211,9 @@ solve( const RealType& time,
    return true;
 }
 
-template< typename Problem,
-          typename LinearSystemSolver >
+template< typename Problem >
 bool
-SemiImplicitTimeStepper< Problem, LinearSystemSolver >::
+SemiImplicitTimeStepper< Problem >::
 writeEpilog( Logger& logger ) const
 {
    logger.writeParameter< long long int >( "Iterations count:", this->allIterations );
diff --git a/src/TNL/Solvers/PDE/TimeDependentPDESolver.h b/src/TNL/Solvers/PDE/TimeDependentPDESolver.h
index 5dade15c9a7145dac7b95ea6011d74a39deb4f44..712e288996096ab4f1ff5c2580a073c43965c03c 100644
--- a/src/TNL/Solvers/PDE/TimeDependentPDESolver.h
+++ b/src/TNL/Solvers/PDE/TimeDependentPDESolver.h
@@ -13,7 +13,7 @@
 #include <TNL/Config/ConfigDescription.h>
 #include <TNL/Config/ParameterContainer.h>
 #include <TNL/Logger.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 #include <TNL/Solvers/PDE/PDESolver.h>
 #include <TNL/Solvers/PDE/MeshDependentTimeSteps.h>
 
@@ -22,10 +22,9 @@
 
 namespace TNL {
 namespace Solvers {
-namespace PDE {   
+namespace PDE {
 
 template< typename Problem,
-          typename DiscreteSolver,
           typename TimeStepper >
 class TimeDependentPDESolver
    : public PDESolver< typename Problem::RealType, 
@@ -44,8 +43,8 @@ class TimeDependentPDESolver
       typedef typename ProblemType::DofVectorType DofVectorType;
       typedef typename ProblemType::CommonDataType CommonDataType;
       typedef typename ProblemType::CommonDataPointer CommonDataPointer;
-      typedef SharedPointer< MeshType, DeviceType > MeshPointer;
-      typedef SharedPointer< DofVectorType, DeviceType > DofVectorPointer;
+      typedef Pointers::SharedPointer< MeshType, DeviceType > MeshPointer;
+      typedef Pointers::SharedPointer< DofVectorType, DeviceType > DofVectorPointer;
       typedef IterativeSolverMonitor< typename Problem::RealType, typename Problem::IndexType > SolverMonitorType;
       
       static_assert( ProblemType::isTimeDependent(), "The problem is not time dependent." );
@@ -94,9 +93,7 @@ class TimeDependentPDESolver
       CommonDataPointer commonDataPointer;
 
       TimeStepper timeStepper;
-      
-      DiscreteSolver discreteSolver;
-      
+
       ProblemType* problem;
 
       RealType initialTime, finalTime, snapshotPeriod, timeStep;
diff --git a/src/TNL/Solvers/PDE/TimeDependentPDESolver_impl.h b/src/TNL/Solvers/PDE/TimeDependentPDESolver_impl.h
index c9b25ca6c26c19a72487490095284b5aa3e83591..78c9a6912ddd7fb9651ceab3035873843fab2348 100644
--- a/src/TNL/Solvers/PDE/TimeDependentPDESolver_impl.h
+++ b/src/TNL/Solvers/PDE/TimeDependentPDESolver_impl.h
@@ -18,9 +18,8 @@ namespace Solvers {
 namespace PDE {   
 
 template< typename Problem,
-          typename DiscreteSolver,
           typename TimeStepper >
-TimeDependentPDESolver< Problem, DiscreteSolver, TimeStepper >::
+TimeDependentPDESolver< Problem, TimeStepper >::
 TimeDependentPDESolver()
 : problem( 0 ),
   initialTime( 0.0 ),
@@ -32,10 +31,9 @@ TimeDependentPDESolver()
 
 
 template< typename Problem,
-          typename DiscreteSolver,   
           typename TimeStepper >
 void
-TimeDependentPDESolver< Problem, DiscreteSolver, TimeStepper >::
+TimeDependentPDESolver< Problem, TimeStepper >::
 configSetup( Config::ConfigDescription& config,
              const String& prefix )
 {
@@ -49,10 +47,9 @@ configSetup( Config::ConfigDescription& config,
 }
 
 template< typename Problem,
-          typename DiscreteSolver,   
           typename TimeStepper >
 bool
-TimeDependentPDESolver< Problem, DiscreteSolver, TimeStepper >::
+TimeDependentPDESolver< Problem, TimeStepper >::
 setup( const Config::ParameterContainer& parameters,
        const String& prefix )
 {
@@ -100,11 +97,11 @@ setup( const Config::ParameterContainer& parameters,
    /***
     * Set-up the initial condition
     */
-   std::cout << "Setting up the initial condition ... ";
    typedef typename Problem :: DofVectorType DofVectorType;
-   if( ! this->problem->setInitialCondition( parameters, this->dofsPointer ) )
+   if( ! this->problem->setInitialCondition( parameters, this->dofsPointer ) ) {
+      std::cerr << "Failed to set up the initial condition." << std::endl;
       return false;
-   std::cout << " [ OK ]" << std::endl;
+   }
 
    /****
     * Initialize the time discretisation
@@ -118,27 +115,19 @@ setup( const Config::ParameterContainer& parameters,
    if( ! status )
       return false;
 
-   /****
-    * Set-up the discrete solver
-    */
-   if( ! this->discreteSolver.setup( parameters ) )
-      return false;
-   
    /****
     * Set-up the time stepper
     */
    if( ! this->timeStepper.setup( parameters ) )
       return false;
-   this->timeStepper.setSolver( this->discreteSolver );
    this->timeStepper.setSolverMonitor( *this->solverMonitorPointer );      
    return true;
 }
 
 template< typename Problem,
-          typename DiscreteSolver,   
           typename TimeStepper >
 bool
-TimeDependentPDESolver< Problem, DiscreteSolver, TimeStepper >::
+TimeDependentPDESolver< Problem, TimeStepper >::
 writeProlog( Logger& logger,
              const Config::ParameterContainer& parameters )
 {   
@@ -173,40 +162,36 @@ writeProlog( Logger& logger,
 }
 
 template< typename Problem,
-          typename DiscreteSolver,   
           typename TimeStepper >
 void
-TimeDependentPDESolver< Problem, DiscreteSolver, TimeStepper >::
+TimeDependentPDESolver< Problem, TimeStepper >::
 setProblem( ProblemType& problem )
 {
    this->problem = &problem;
 }
 
 template< typename Problem,
-          typename DiscreteSolver,   
           typename TimeStepper >
 void
-TimeDependentPDESolver< Problem, DiscreteSolver, TimeStepper >::
+TimeDependentPDESolver< Problem, TimeStepper >::
 setInitialTime( const RealType& initialTime )
 {
    this->initialTime = initialTime;
 }
 
 template< typename Problem,
-          typename DiscreteSolver,   
           typename TimeStepper >
 const typename Problem::RealType&
-TimeDependentPDESolver< Problem, DiscreteSolver, TimeStepper >::
+TimeDependentPDESolver< Problem, TimeStepper >::
 getInitialTime() const
 {
    return this->initialTime;
 }
 
 template< typename Problem,
-          typename DiscreteSolver,   
           typename TimeStepper >
 bool
-TimeDependentPDESolver< Problem, DiscreteSolver, TimeStepper >::
+TimeDependentPDESolver< Problem, TimeStepper >::
 setFinalTime( const RealType& finalTime )
 {
    if( finalTime <= this->initialTime )
@@ -219,20 +204,18 @@ setFinalTime( const RealType& finalTime )
 }
 
 template< typename Problem,
-          typename DiscreteSolver,   
           typename TimeStepper >
 const typename Problem::RealType&
-TimeDependentPDESolver< Problem, DiscreteSolver, TimeStepper >::
+TimeDependentPDESolver< Problem, TimeStepper >::
 getFinalTime() const
 {
    return this->finalTime;
 }
 
 template< typename Problem,
-          typename DiscreteSolver,   
           typename TimeStepper >
 bool
-TimeDependentPDESolver< Problem, DiscreteSolver, TimeStepper >::
+TimeDependentPDESolver< Problem, TimeStepper >::
 setSnapshotPeriod( const RealType& period )
 {
    if( period <= 0 )
@@ -245,20 +228,18 @@ setSnapshotPeriod( const RealType& period )
 }
 
 template< typename Problem,
-          typename DiscreteSolver,   
           typename TimeStepper >
 const typename Problem::RealType&
-TimeDependentPDESolver< Problem, DiscreteSolver, TimeStepper >::
+TimeDependentPDESolver< Problem, TimeStepper >::
 getSnapshotPeriod() const
 {
    return this->snapshotPeriod;
 }
 
 template< typename Problem,
-          typename DiscreteSolver,   
           typename TimeStepper >
 bool
-TimeDependentPDESolver< Problem, DiscreteSolver, TimeStepper >::
+TimeDependentPDESolver< Problem, TimeStepper >::
 setTimeStep( const RealType& timeStep )
 {
    if( timeStep <= 0 )
@@ -271,20 +252,18 @@ setTimeStep( const RealType& timeStep )
 }
  
 template< typename Problem,
-          typename DiscreteSolver,   
           typename TimeStepper >
 const typename Problem::RealType&
-TimeDependentPDESolver< Problem, DiscreteSolver, TimeStepper >::
+TimeDependentPDESolver< Problem, TimeStepper >::
 getTimeStep() const
 {
    return this->timeStep;
 }
 
 template< typename Problem,
-          typename DiscreteSolver,   
           typename TimeStepper >
 bool
-TimeDependentPDESolver< Problem, DiscreteSolver, TimeStepper >::
+TimeDependentPDESolver< Problem, TimeStepper >::
 solve()
 {
    TNL_ASSERT_TRUE( problem, "No problem was set in PDESolver." );
@@ -343,10 +322,9 @@ solve()
 }
 
 template< typename Problem,
-          typename DiscreteSolver,   
           typename TimeStepper >
 bool
-TimeDependentPDESolver< Problem, DiscreteSolver, TimeStepper >::
+TimeDependentPDESolver< Problem, TimeStepper >::
 writeEpilog( Logger& logger ) const
 {
    return ( this->timeStepper.writeEpilog( logger ) &&
diff --git a/src/TNL/Solvers/PDE/TimeIndependentPDESolver.h b/src/TNL/Solvers/PDE/TimeIndependentPDESolver.h
index 0a5a0c96dcd0f1d04211b47aa91eb8f1842c296c..5711ec82e7ed2379d194cc3ba88eb2f3c5cf4463 100644
--- a/src/TNL/Solvers/PDE/TimeIndependentPDESolver.h
+++ b/src/TNL/Solvers/PDE/TimeIndependentPDESolver.h
@@ -26,11 +26,10 @@
 
 
 namespace TNL {
-namespace Solvers {   
+namespace Solvers {
 namespace PDE {
 
-template< typename Problem,
-          typename DiscreteSolver >
+template< typename Problem >
 class TimeIndependentPDESolver : public PDESolver< typename Problem::RealType,
                                                    typename Problem::IndexType >
 {
@@ -42,8 +41,8 @@ class TimeIndependentPDESolver : public PDESolver< typename Problem::RealType,
       typedef typename ProblemType::IndexType IndexType;
       typedef typename ProblemType::MeshType MeshType;
       typedef typename ProblemType::DofVectorType DofVectorType;
-      typedef SharedPointer< MeshType, DeviceType > MeshPointer;
-      typedef SharedPointer< DofVectorType, DeviceType > DofVectorPointer;
+      typedef Pointers::SharedPointer< MeshType, DeviceType > MeshPointer;
+      typedef Pointers::SharedPointer< DofVectorType, DeviceType > DofVectorPointer;
       typedef typename ProblemType::CommonDataType CommonDataType;
       typedef typename ProblemType::CommonDataPointer CommonDataPointer;
 
@@ -69,13 +68,11 @@ class TimeIndependentPDESolver : public PDESolver< typename Problem::RealType,
    protected:
 
       MeshPointer mesh;
-      
+
       CommonDataPointer commonDataPointer;
 
       DofVectorPointer dofs;
 
-      DiscreteSolver discreteSolver;
-
       ProblemType* problem;
 };
 
diff --git a/src/TNL/Solvers/PDE/TimeIndependentPDESolver_impl.h b/src/TNL/Solvers/PDE/TimeIndependentPDESolver_impl.h
index 1f2b46b68a6210b23ff5e9459e04bc65c1f671fa..f53a989060e5b34a40f0bd7fe24b9700c15901ad 100644
--- a/src/TNL/Solvers/PDE/TimeIndependentPDESolver_impl.h
+++ b/src/TNL/Solvers/PDE/TimeIndependentPDESolver_impl.h
@@ -24,27 +24,24 @@ namespace Solvers {
 namespace PDE {   
 
 
-template< typename Problem,
-          typename DiscreteSolver >
-TimeIndependentPDESolver< Problem, DiscreteSolver >::
+template< typename Problem >
+TimeIndependentPDESolver< Problem >::
 TimeIndependentPDESolver()
 : problem( 0 )
 {
 }
 
-template< typename Problem,
-          typename DiscreteSolver >
+template< typename Problem >
 void
-TimeIndependentPDESolver< Problem, DiscreteSolver >::
+TimeIndependentPDESolver< Problem >::
 configSetup( Config::ConfigDescription& config,
              const String& prefix )
 {
 }
 
-template< typename Problem,
-          typename DiscreteSolver >
+template< typename Problem >
 bool
-TimeIndependentPDESolver< Problem, DiscreteSolver >::
+TimeIndependentPDESolver< Problem >::
 setup( const Config::ParameterContainer& parameters,
        const String& prefix )
 {
@@ -62,6 +59,8 @@ setup( const Config::ParameterContainer& parameters,
    }
    std::cout << " [ OK ] " << std::endl;
    
+   problem->setMesh( this->mesh );
+
    /****
     * Set-up common data
     */
@@ -102,10 +101,9 @@ setup( const Config::ParameterContainer& parameters,
    return true;
 }
 
-template< typename Problem,
-          typename DiscreteSolver >
+template< typename Problem >
 bool
-TimeIndependentPDESolver< Problem, DiscreteSolver >::
+TimeIndependentPDESolver< Problem >::
 writeProlog( Logger& logger,
              const Config::ParameterContainer& parameters )
 {
@@ -136,19 +134,17 @@ writeProlog( Logger& logger,
    return true;
 }
 
-template< typename Problem,
-          typename DiscreteSolver >
+template< typename Problem >
 void
-TimeIndependentPDESolver< Problem, DiscreteSolver >::
+TimeIndependentPDESolver< Problem >::
 setProblem( ProblemType& problem )
 {
    this->problem = &problem;
 }
 
-template< typename Problem,
-          typename DiscreteSolver >
+template< typename Problem >
 bool
-TimeIndependentPDESolver< Problem, DiscreteSolver >::
+TimeIndependentPDESolver< Problem >::
 solve()
 {
    TNL_ASSERT_TRUE( problem, "No problem was set in tnlPDESolver." );
@@ -164,10 +160,9 @@ solve()
    return true;
 }
 
-template< typename Problem,
-          typename DiscreteSolver >
+template< typename Problem >
 bool
-TimeIndependentPDESolver< Problem, DiscreteSolver >::
+TimeIndependentPDESolver< Problem >::
 writeEpilog( Logger& logger ) const
 {
    return this->problem->writeEpilog( logger );
diff --git a/src/TNL/Solvers/SolverConfig_impl.h b/src/TNL/Solvers/SolverConfig_impl.h
index 43a3cf3755cd1fe8e5cec617d9b21ffbbce69508..bc86767e166cfc8d1c9c6728184e282738f68193 100644
--- a/src/TNL/Solvers/SolverConfig_impl.h
+++ b/src/TNL/Solvers/SolverConfig_impl.h
@@ -16,6 +16,18 @@
 #include <TNL/Solvers/DummyProblem.h>
 #include <TNL/Solvers/PDE/ExplicitTimeStepper.h>
 #include <TNL/Solvers/PDE/TimeDependentPDESolver.h>
+#include <TNL/Solvers/Linear/SOR.h>
+#include <TNL/Solvers/Linear/CG.h>
+#include <TNL/Solvers/Linear/BICGStab.h>
+#include <TNL/Solvers/Linear/BICGStabL.h>
+#include <TNL/Solvers/Linear/GMRES.h>
+#include <TNL/Solvers/Linear/CWYGMRES.h>
+#include <TNL/Solvers/Linear/TFQMR.h>
+#include <TNL/Solvers/Linear/UmfpackWrapper.h>
+#include <TNL/Solvers/Linear/Preconditioners/Diagonal.h>
+#include <TNL/Solvers/Linear/Preconditioners/ILU0.h>
+#include <TNL/Solvers/Linear/Preconditioners/ILUT.h>
+#include <TNL/Matrices/CSR.h>
 #include <TNL/Meshes/DistributedMeshes/DistributedGrid.h>
 
 namespace TNL {
@@ -96,7 +108,7 @@ bool SolverConfig< ConfigTag, ProblemConfig >::configSetup( Config::ConfigDescri
    config.addDelimiter( " === Time discretisation parameters ==== " );
    typedef PDE::ExplicitTimeStepper< DummyProblemType, ODE::Euler > ExplicitTimeStepper;
    typedef Solvers::DummySolver DiscreteSolver;
-   PDE::TimeDependentPDESolver< DummyProblemType, DiscreteSolver, ExplicitTimeStepper >::configSetup( config );
+   PDE::TimeDependentPDESolver< DummyProblemType, ExplicitTimeStepper >::configSetup( config );
    ExplicitTimeStepper::configSetup( config );
    if( ConfigTagTimeDiscretisation< ConfigTag, ExplicitTimeDiscretisationTag >::enabled ||
        ConfigTagTimeDiscretisation< ConfigTag, SemiImplicitTimeDiscretisationTag >::enabled ||
@@ -120,32 +132,25 @@ bool SolverConfig< ConfigTag, ProblemConfig >::configSetup( Config::ConfigDescri
    }
    if( ConfigTagTimeDiscretisation< ConfigTag, SemiImplicitTimeDiscretisationTag >::enabled )
    {
-      if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitCGSolverTag >::enabled )
-         config.addEntryEnum( "cg" );
-      if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitBICGStabSolverTag >::enabled )
-         config.addEntryEnum( "bicgstab" );
-      if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitBICGStabLSolverTag >::enabled )
-         config.addEntryEnum( "bicgstabl" );
-      if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitCWYGMRESSolverTag >::enabled )
-         config.addEntryEnum( "cwygmres" );
-      if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitGMRESSolverTag >::enabled )
-         config.addEntryEnum( "gmres" );
-      if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitTFQMRSolverTag >::enabled )
-         config.addEntryEnum( "tfqmr" );
-      if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitSORSolverTag >::enabled )
-         config.addEntryEnum( "sor" );
+      config.addEntryEnum( "cg" );
+      config.addEntryEnum( "bicgstab" );
+      config.addEntryEnum( "bicgstabl" );
+      config.addEntryEnum( "cwygmres" );
+      config.addEntryEnum( "gmres" );
+      config.addEntryEnum( "tfqmr" );
+      config.addEntryEnum( "sor" );
 #ifdef HAVE_UMFPACK
-      if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitUmfpackSolverTag >::enabled )
-         config.addEntryEnum( "umfpack" );
+      config.addEntryEnum( "umfpack" );
 #endif
-   }
-   config.addEntry< String >( "preconditioner", "The preconditioner for the discrete solver:", "none" );
-   config.addEntryEnum( "none" );
-   config.addEntryEnum( "diagonal" );
-// TODO: implement parallel ILU or device-dependent build config tags for preconditioners
+      config.addEntry< String >( "preconditioner", "The preconditioner for the discrete solver:", "none" );
+      config.addEntryEnum( "none" );
+      config.addEntryEnum( "diagonal" );
+   // TODO: implement parallel ILU or device-dependent build config tags for preconditioners
 #ifndef HAVE_CUDA
-   config.addEntryEnum( "ilu0" );
+      config.addEntryEnum( "ilu0" );
+      config.addEntryEnum( "ilut" );
 #endif
+   }
    if( ConfigTagTimeDiscretisation< ConfigTag, ExplicitTimeDiscretisationTag >::enabled ||
        ConfigTagTimeDiscretisation< ConfigTag, SemiImplicitTimeDiscretisationTag >::enabled )
    {
@@ -166,23 +171,19 @@ bool SolverConfig< ConfigTag, ProblemConfig >::configSetup( Config::ConfigDescri
    {
       config.addDelimiter( " === Semi-implicit solvers parameters === " );
       typedef Matrices::CSR< double, Devices::Host, int > MatrixType;
-      if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitCGSolverTag >::enabled )
-         Linear::CG< MatrixType >::configSetup( config );
-      if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitBICGStabSolverTag >::enabled )
-         Linear::BICGStab< MatrixType >::configSetup( config );
-      if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitBICGStabLSolverTag >::enabled )
-         Linear::BICGStabL< MatrixType >::configSetup( config );
+      Linear::CG< MatrixType >::configSetup( config );
+      Linear::BICGStab< MatrixType >::configSetup( config );
+      Linear::BICGStabL< MatrixType >::configSetup( config );
 
       // GMRES and CWYGMRES have the same options
-      if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitCWYGMRESSolverTag >::enabled )
-         Linear::CWYGMRES< MatrixType >::configSetup( config );
-      else if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitGMRESSolverTag >::enabled )
-         Linear::GMRES< MatrixType >::configSetup( config );
-
-      if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitTFQMRSolverTag >::enabled )
-         Linear::TFQMR< MatrixType >::configSetup( config );
-      if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitSORSolverTag >::enabled )
-         Linear::SOR< MatrixType >::configSetup( config );
+      Linear::GMRES< MatrixType >::configSetup( config );
+
+      Linear::TFQMR< MatrixType >::configSetup( config );
+      Linear::SOR< MatrixType >::configSetup( config );
+
+      Linear::Preconditioners::Diagonal< MatrixType >::configSetup( config );
+      Linear::Preconditioners::ILU0< MatrixType >::configSetup( config );
+      Linear::Preconditioners::ILUT< MatrixType >::configSetup( config );
    }
 
    config.addDelimiter( " === Logs and messages ===" );
diff --git a/src/TNL/Solvers/SolverInitiator_impl.h b/src/TNL/Solvers/SolverInitiator_impl.h
index 38d5f545f9a371ddd8acacd2725ceabffe9bfbb0..c6bc5ca7f494abd8922f1a0fcb45b4814277094f 100644
--- a/src/TNL/Solvers/SolverInitiator_impl.h
+++ b/src/TNL/Solvers/SolverInitiator_impl.h
@@ -16,10 +16,6 @@
 #include <TNL/Config/ParameterContainer.h>
 #include <TNL/Meshes/TypeResolver/TypeResolver.h>
 #include <TNL/Solvers/BuildConfigTags.h>
-#include <TNL/Solvers/Linear/SOR.h>
-#include <TNL/Solvers/Linear/CG.h>
-#include <TNL/Solvers/Linear/BICGStab.h>
-#include <TNL/Solvers/Linear/GMRES.h>
 #include <TNL/Solvers/SolverStarter.h>
 #include <TNL/Meshes/DummyMesh.h>
 
@@ -73,8 +69,6 @@ template< template< typename Real, typename Device, typename Index, typename Mes
 bool SolverInitiator< ProblemSetter, ConfigTag > :: run( const Config::ParameterContainer& parameters )
 {
    const String& realType = parameters. getParameter< String >( "real-type" );
-   if( parameters. getParameter< int >( "verbose" ) )
-     std::cout << "Setting RealType to   ... " << realType << std::endl;
    if( realType == "float" )
       return SolverInitiatorRealResolver< ProblemSetter, float, ConfigTag >::run( parameters );
    if( realType == "double" )
@@ -94,9 +88,6 @@ class SolverInitiatorRealResolver< ProblemSetter, Real, ConfigTag, true >
       static bool run( const Config::ParameterContainer& parameters )
       {
          const String& device = parameters. getParameter< String >( "device" );
-         if( parameters. getParameter< int >( "verbose" ) )
-           std::cout << "Setting DeviceType to ... " << device << std::endl;
-
          if( device == "host" )
             return SolverInitiatorDeviceResolver< ProblemSetter, Real, Devices::Host, ConfigTag >::run( parameters );
          if( device == "cuda" )
@@ -131,8 +122,6 @@ class SolverInitiatorDeviceResolver< ProblemSetter, Real, Device, ConfigTag, tru
       static bool run( const Config::ParameterContainer& parameters )
       {
          const String& indexType = parameters. getParameter< String >( "index-type" );
-         if( parameters. getParameter< int >( "verbose" ) )
-           std::cout << "Setting IndexType to  ... " << indexType << std::endl;
          if( indexType == "short-int" )
             return SolverInitiatorIndexResolver< ProblemSetter, Real, Device, short int, ConfigTag >::run( parameters );
          if( indexType == "int" )
@@ -197,15 +186,9 @@ class CommunicatorTypeResolver< ProblemSetter, Real, Device, Index, ConfigTag, t
    public:
       static bool run( const Config::ParameterContainer& parameters )
       {
-         if(Communicators::MpiCommunicator::isDistributed())
-         {     
-               bool ret=SolverInitiatorMeshResolver< ProblemSetter, Real, Device, Index, ConfigTag, Communicators::MpiCommunicator >::run( parameters );
-               Communicators::MpiCommunicator::Finalize();      
-               return ret;
-         }
-         Communicators::MpiCommunicator::Finalize();
+         if( Communicators::MpiCommunicator::isDistributed() )
+            return SolverInitiatorMeshResolver< ProblemSetter, Real, Device, Index, ConfigTag, Communicators::MpiCommunicator >::run( parameters );
          return SolverInitiatorMeshResolver< ProblemSetter, Real, Device, Index, ConfigTag, Communicators::NoDistrCommunicator >::run( parameters );
-         
       }
 };
 
diff --git a/src/TNL/Solvers/SolverStarter.h b/src/TNL/Solvers/SolverStarter.h
index aa4604e5f58d11e498366844a534d11d983c736b..0843c93b4bdc4f5db0cc0a95a221d556baf9ad5d 100644
--- a/src/TNL/Solvers/SolverStarter.h
+++ b/src/TNL/Solvers/SolverStarter.h
@@ -31,7 +31,7 @@ class SolverStarter
    template< typename Solver >
    bool writeEpilog( std::ostream& str, const Solver& solver );
 
-   template< typename Problem, typename TimeStepper, typename DiscreteSolver >
+   template< typename Problem, typename TimeStepper >
    bool runPDESolver( Problem& problem,
                       const Config::ParameterContainer& parameters );
 
diff --git a/src/TNL/Solvers/SolverStarter_impl.h b/src/TNL/Solvers/SolverStarter_impl.h
index 71a8dfe23ee8b890ec5500bea72633b03a627bf3..f848b774b1252f8abfff24fd3a97d71f3fbae8f3 100644
--- a/src/TNL/Solvers/SolverStarter_impl.h
+++ b/src/TNL/Solvers/SolverStarter_impl.h
@@ -21,20 +21,8 @@
 #include <TNL/Solvers/BuildConfigTags.h>
 #include <TNL/Solvers/ODE/Merson.h>
 #include <TNL/Solvers/ODE/Euler.h>
-#include <TNL/Solvers/Linear/SOR.h>
-#include <TNL/Solvers/Linear/CG.h>
-#include <TNL/Solvers/Linear/BICGStab.h>
-#include <TNL/Solvers/Linear/BICGStabL.h>
-#include <TNL/Solvers/Linear/GMRES.h>
-#include <TNL/Solvers/Linear/CWYGMRES.h>
-#include <TNL/Solvers/Linear/TFQMR.h>
-#include <TNL/Solvers/Linear/UmfpackWrapper.h>
-#include <TNL/Solvers/Linear/Preconditioners/Dummy.h>
-#include <TNL/Solvers/Linear/Preconditioners/Diagonal.h>
-#include <TNL/Solvers/Linear/Preconditioners/ILU0.h>
 #include <TNL/Solvers/PDE/ExplicitTimeStepper.h>
 #include <TNL/Solvers/PDE/SemiImplicitTimeStepper.h>
-#include <TNL/Solvers/PDE/TimeDependentPDESolver.h>
 #include <TNL/Solvers/PDE/PDESolverTypeResolver.h>
 
 namespace TNL {
@@ -63,18 +51,6 @@ template< typename Problem,
           bool enabled = ConfigTagExplicitSolver< ConfigTag, ExplicitSolver >::enabled >
 class SolverStarterExplicitSolverSetter{};
 
-template< typename Problem,
-          typename SemiImplicitSolver,
-          template<typename, typename, typename> class Preconditioner,
-          typename ConfigTag,
-          bool enabled = ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitSolver >::enabled >
-class SolverStarterLinearSolverSetter{};
-
-template< typename Problem,
-          typename SemiImplicitSolverTag,
-          typename ConfigTag >
-class SolverStarterPreconditionerSetter;
-
 
 template< typename ConfigTag >
 SolverStarter< ConfigTag > :: SolverStarter()
@@ -96,7 +72,6 @@ bool SolverStarter< ConfigTag > :: run( const Config::ParameterContainer& parame
     )
       return false;
    Problem problem;
-   //return UserDefinedTimeDiscretisationSetter< Problem, ConfigTag >::run( problem, parameters );
    return TimeDependencyResolver< Problem, ConfigTag >::run( problem, parameters );
 }
 
@@ -141,8 +116,7 @@ class UserDefinedTimeDiscretisationSetter
             return false;
          }
          SolverStarter< ConfigTag > solverStarter;
-         // TODO: Solve the set-up of the DiscreteSOlver type in some better way
-         return solverStarter.template runPDESolver< Problem, TimeStepper, typename Problem::DiscreteSolver >( problem, parameters );
+         return solverStarter.template runPDESolver< Problem, TimeStepper >( problem, parameters );
       }
 };
 
@@ -175,7 +149,7 @@ class UserDefinedTimeDiscretisationSetter< Problem, ConfigTag, void >
             {
                std::cerr << "TNL currently does not support implicit solvers with MPI." << std::endl;
                return false;
-            }            
+            }
             return SolverStarterTimeDiscretisationSetter< Problem, ImplicitTimeDiscretisationTag, ConfigTag >::run( problem, parameters );
          }
          std::cerr << "Uknown time discretisation: " << timeDiscretisation << "." << std::endl;
@@ -188,9 +162,9 @@ class UserDefinedTimeDiscretisationSetter< Problem, ConfigTag, void >
  */
 
 template< typename Problem,
-          typename TimeDiscretisation,
+          typename TimeDiscretisationTag,
           typename ConfigTag >
-class SolverStarterTimeDiscretisationSetter< Problem, TimeDiscretisation, ConfigTag, false >
+class SolverStarterTimeDiscretisationSetter< Problem, TimeDiscretisationTag, ConfigTag, false >
 {
    public:
       static bool run( Problem& problem,
@@ -232,53 +206,9 @@ class SolverStarterTimeDiscretisationSetter< Problem, SemiImplicitTimeDiscretisa
       static bool run( Problem& problem,
                        const Config::ParameterContainer& parameters )
       {
-         const String& discreteSolver = parameters. getParameter< String>( "discrete-solver" );
-#ifndef HAVE_UMFPACK
-         if( discreteSolver != "sor" &&
-             discreteSolver != "cg" &&
-             discreteSolver != "bicgstab" &&
-             discreteSolver != "bicgstabl" &&
-             discreteSolver != "gmres" &&
-             discreteSolver != "cwygmres" &&
-             discreteSolver != "tfqmr" )
-         {
-            std::cerr << "Unknown semi-implicit discrete solver " << discreteSolver << ". It can be only: sor, cg, bicgstab, bicgstabl, gmres, cwygmres or tfqmr." << std::endl;
-            return false;
-         }
-#else
-         if( discreteSolver != "sor" &&
-             discreteSolver != "cg" &&
-             discreteSolver != "bicgstab" &&
-             discreteSolver != "bicgstabl" &&
-             discreteSolver != "gmres" &&
-             discreteSolver != "cwygmres" &&
-             discreteSolver != "tfqmr" &&
-             discreteSolver != "umfpack" )
-         {
-            std::cerr << "Unknown semi-implicit discrete solver " << discreteSolver << ". It can be only: sor, cg, bicgstab, bicgstabl, gmres, cwygmres, tfqmr or umfpack." << std::endl;
-            return false;
-         }
-#endif
-
-         if( discreteSolver == "sor" )
-            return SolverStarterPreconditionerSetter< Problem, SemiImplicitSORSolverTag, ConfigTag >::run( problem, parameters );
-         if( discreteSolver == "cg" )
-            return SolverStarterPreconditionerSetter< Problem, SemiImplicitCGSolverTag, ConfigTag >::run( problem, parameters );
-         if( discreteSolver == "bicgstab" )
-            return SolverStarterPreconditionerSetter< Problem, SemiImplicitBICGStabSolverTag, ConfigTag >::run( problem, parameters );
-         if( discreteSolver == "bicgstabl" )
-            return SolverStarterPreconditionerSetter< Problem, SemiImplicitBICGStabLSolverTag, ConfigTag >::run( problem, parameters );
-         if( discreteSolver == "gmres" )
-            return SolverStarterPreconditionerSetter< Problem, SemiImplicitGMRESSolverTag, ConfigTag >::run( problem, parameters );
-         if( discreteSolver == "cwygmres" )
-            return SolverStarterPreconditionerSetter< Problem, SemiImplicitCWYGMRESSolverTag, ConfigTag >::run( problem, parameters );
-         if( discreteSolver == "tfqmr" )
-            return SolverStarterPreconditionerSetter< Problem, SemiImplicitTFQMRSolverTag, ConfigTag >::run( problem, parameters );
-#ifdef HAVE_UMFPACK
-         if( discreteSolver == "umfpack" )
-            return SolverStarterPreconditionerSetter< Problem, SemiImplicitUmfpackSolverTag, ConfigTag >::run( problem, parameters );
-#endif
-         return false;
+         typedef PDE::SemiImplicitTimeStepper< Problem > TimeStepper;
+         SolverStarter< ConfigTag > solverStarter;
+         return solverStarter.template runPDESolver< Problem, TimeStepper >( problem, parameters );
       }
 };
 
@@ -323,81 +253,14 @@ class SolverStarterExplicitSolverSetter< Problem, ExplicitSolverTag, ConfigTag,
                        const Config::ParameterContainer& parameters )
       {
          typedef PDE::ExplicitTimeStepper< Problem, ExplicitSolverTag::template Template > TimeStepper;
-         typedef typename ExplicitSolverTag::template Template< TimeStepper > ExplicitSolver;
-         SolverStarter< ConfigTag > solverStarter;
-         return solverStarter.template runPDESolver< Problem, TimeStepper, ExplicitSolver >( problem, parameters );
-      }
-};
-
-/****
- * Setting the semi-implicit solver
- */
-
-template< typename Problem,
-          typename SemiImplicitSolverTag,
-          typename ConfigTag >
-class SolverStarterPreconditionerSetter
-{
-   public:
-      static bool run( Problem& problem,
-                       const Config::ParameterContainer& parameters )
-      {
-         const String& preconditioner = parameters.getParameter< String>( "preconditioner" );
-
-         if( preconditioner == "none" )
-            return SolverStarterLinearSolverSetter< Problem, SemiImplicitSolverTag, Linear::Preconditioners::Dummy, ConfigTag >::run( problem, parameters );
-         if( preconditioner == "diagonal" )
-            return SolverStarterLinearSolverSetter< Problem, SemiImplicitSolverTag, Linear::Preconditioners::Diagonal, ConfigTag >::run( problem, parameters );
-         if( preconditioner == "ilu0" )
-            return SolverStarterLinearSolverSetter< Problem, SemiImplicitSolverTag, Linear::Preconditioners::ILU0, ConfigTag >::run( problem, parameters );
-
-         std::cerr << "Unknown preconditioner " << preconditioner << ". It can be only: none, diagonal, ilu0." << std::endl;
-         return false;
-      }
-};
-
-template< typename Problem,
-          typename SemiImplicitSolverTag,
-          template<typename, typename, typename> class Preconditioner,
-          typename ConfigTag >
-class SolverStarterLinearSolverSetter< Problem, SemiImplicitSolverTag, Preconditioner, ConfigTag, false >
-{
-   public:
-      static bool run( Problem& problem,
-                       const Config::ParameterContainer& parameters )
-      {
-         std::cerr << "The semi-implicit solver " << parameters.getParameter< String >( "discrete-solver" ) << " is not supported." << std::endl;
-         return false;
-      }
-};
-
-template< typename Problem,
-          typename SemiImplicitSolverTag,
-          template<typename, typename, typename> class Preconditioner,
-          typename ConfigTag >
-class SolverStarterLinearSolverSetter< Problem, SemiImplicitSolverTag, Preconditioner, ConfigTag, true >
-{
-   public:
-      static bool run( Problem& problem,
-                       const Config::ParameterContainer& parameters )
-      {
-         typedef typename Problem::MatrixType MatrixType;
-         typedef typename MatrixType::RealType RealType;
-         typedef typename MatrixType::DeviceType DeviceType;
-         typedef typename MatrixType::IndexType IndexType;
-         typedef typename SemiImplicitSolverTag::template Template< MatrixType, Preconditioner< RealType, DeviceType, IndexType > > LinearSystemSolver;
-         typedef PDE::SemiImplicitTimeStepper< Problem, LinearSystemSolver > TimeStepper;
-         typedef typename TimeStepper::LinearSystemSolverType LinearSystemSolverType;
          SolverStarter< ConfigTag > solverStarter;
-         return solverStarter.template runPDESolver< Problem, TimeStepper, LinearSystemSolverType >( problem, parameters );
+         return solverStarter.template runPDESolver< Problem, TimeStepper >( problem, parameters );
       }
 };
 
-
 template< typename ConfigTag >
    template< typename Problem,
-             typename TimeStepper,
-             typename DiscreteSolver >
+             typename TimeStepper >
 bool SolverStarter< ConfigTag > :: runPDESolver( Problem& problem,
                                                  const Config::ParameterContainer& parameters )
 {
@@ -421,8 +284,7 @@ bool SolverStarter< ConfigTag > :: runPDESolver( Problem& problem,
    /****
     * Set-up the PDE solver
     */
-   //PDE::TimeDependentPDESolver< Problem, TimeStepper > solver;
-   typename PDE::PDESolverTypeResolver< Problem, DiscreteSolver, TimeStepper >::SolverType solver;
+   typename PDE::PDESolverTypeResolver< Problem, TimeStepper >::SolverType solver;
    solver.setComputeTimer( this->computeTimer );
    solver.setIoTimer( this->ioTimer );
    solver.setTotalTimer( this->totalTimer );
@@ -439,7 +301,6 @@ bool SolverStarter< ConfigTag > :: runPDESolver( Problem& problem,
    if( catch_exceptions ) {
       try {
          solver.setProblem( problem );
-         //solver.setTimeStepper( timeStepper ); // TODO: BETTER FIX: This does not make sense for time independent problem
          if( ! solver.setup( parameters ) )
             return false;
       }
diff --git a/src/TNL/Solvers/Solver_impl.h b/src/TNL/Solvers/Solver_impl.h
index a0e4f1953208edb1fc89d820571aa329e376ff5c..ef865e8b7b25b6ce426c1b0d3c2918187058918d 100644
--- a/src/TNL/Solvers/Solver_impl.h
+++ b/src/TNL/Solvers/Solver_impl.h
@@ -14,8 +14,8 @@
 #include <TNL/Solvers/SolverStarter.h>
 #include <TNL/Solvers/SolverConfig.h>
 #include <TNL/Devices/Cuda.h>
-#include <TNL/Communicators/NoDistrCommunicator.h>
 #include <TNL/Communicators/MpiCommunicator.h>
+#include <TNL/Communicators/ScopedInitializer.h>
 
 namespace TNL {
 namespace Solvers {
@@ -34,19 +34,15 @@ run( int argc, char* argv[] )
    configDescription.addDelimiter( "Parallelization setup:" );
    Devices::Host::configSetup( configDescription );
    Devices::Cuda::configSetup( configDescription );
-   Communicators::NoDistrCommunicator::configSetup( configDescription );
    Communicators::MpiCommunicator::configSetup( configDescription );
-   
-   Communicators::NoDistrCommunicator::Init(argc,argv);
-   Communicators::MpiCommunicator::Init(argc,argv);
+
+   Communicators::ScopedInitializer< Communicators::MpiCommunicator > mpi( argc, argv );
 
    if( ! parseCommandLine( argc, argv, configDescription, parameters ) )
       return false;
 
    SolverInitiator< ProblemSetter, MeshConfig > solverInitiator;
-   bool ret= solverInitiator.run( parameters );
-
-	return ret;
+   return solverInitiator.run( parameters );
 };
 
 } // namespace Solvers
diff --git a/src/Tools/tnl-diff.h b/src/Tools/tnl-diff.h
index 5f4ca182a9fb23783c42f0f47ee13fcdefe9ea8e..d37eb84431373f7daeeef110252acfd8c7303d3b 100644
--- a/src/Tools/tnl-diff.h
+++ b/src/Tools/tnl-diff.h
@@ -20,7 +20,7 @@
 
 using namespace TNL;
 
-template< typename MeshPointer, typename Element, typename Real, typename Index >
+template< typename MeshPointer, typename Value, typename Real, typename Index >
 bool computeDifferenceOfMeshFunctions( const MeshPointer& meshPointer, const Config::ParameterContainer& parameters )
 {
    bool verbose = parameters. getParameter< bool >( "verbose" );
@@ -157,7 +157,7 @@ bool computeDifferenceOfMeshFunctions( const MeshPointer& meshPointer, const Con
 }
 
 
-template< typename MeshPointer, typename Element, typename Real, typename Index >
+template< typename MeshPointer, typename Value, typename Real, typename Index >
 bool computeDifferenceOfVectors( const MeshPointer& meshPointer, const Config::ParameterContainer& parameters )
 {
    bool verbose = parameters. getParameter< bool >( "verbose" );
@@ -291,21 +291,21 @@ bool computeDifferenceOfVectors( const MeshPointer& meshPointer, const Config::P
    return true;
 }
 
-template< typename MeshPointer, typename Element, typename Real, typename Index >
+template< typename MeshPointer, typename Value, typename Real, typename Index >
 bool computeDifference( const MeshPointer& meshPointer, const String& objectType, const Config::ParameterContainer& parameters )
 {
    if( objectType == "Functions::MeshFunction" ||
        objectType == "tnlMeshFunction" )  // TODO: remove deprecated type name
-      return computeDifferenceOfMeshFunctions< MeshPointer, Element, Real, Index >( meshPointer, parameters );
+      return computeDifferenceOfMeshFunctions< MeshPointer, Value, Real, Index >( meshPointer, parameters );
    if( objectType == "Containers::Vector" ||
        objectType == "tnlVector" || objectType == "tnlSharedVector" )   // TODO: remove deprecated type name
-      return computeDifferenceOfVectors< MeshPointer, Element, Real, Index >( meshPointer, parameters );
+      return computeDifferenceOfVectors< MeshPointer, Value, Real, Index >( meshPointer, parameters );
    std::cerr << "Unknown object type " << objectType << "." << std::endl;
    return false;
 }
 
 
-template< typename MeshPointer, typename Element, typename Real >
+template< typename MeshPointer, typename Value, typename Real >
 bool setIndexType( const MeshPointer& meshPointer,
                    const String& inputFileName,
                    const Containers::List< String >& parsedObjectType,
@@ -323,12 +323,12 @@ bool setIndexType( const MeshPointer& meshPointer,
 
    if( parsedObjectType[ 0 ] == "Functions::MeshFunction" ||
        parsedObjectType[ 0 ] == "tnlMeshFunction" )                      // TODO: remove deprecated type names
-      return computeDifference< MeshPointer, Element, Real, typename MeshPointer::ObjectType::IndexType >( meshPointer, parsedObjectType[ 0 ], parameters );
+      return computeDifference< MeshPointer, Value, Real, typename MeshPointer::ObjectType::IndexType >( meshPointer, parsedObjectType[ 0 ], parameters );
    
    if( indexType == "int" )
-      return computeDifference< MeshPointer, Element, Real, int >( meshPointer, parsedObjectType[ 0 ], parameters );
+      return computeDifference< MeshPointer, Value, Real, int >( meshPointer, parsedObjectType[ 0 ], parameters );
    if( indexType == "long-int" )
-      return computeDifference< MeshPointer, Element, Real, long int >( meshPointer, parsedObjectType[ 0 ], parameters );
+      return computeDifference< MeshPointer, Value, Real, long int >( meshPointer, parsedObjectType[ 0 ], parameters );
    std::cerr << "Unknown index type " << indexType << "." << std::endl;
    return false;
 }
@@ -337,11 +337,11 @@ template< typename MeshPointer >
 bool setTupleType( const MeshPointer& meshPointer,
                    const String& inputFileName,
                    const Containers::List< String >& parsedObjectType,
-                   const Containers::List< String >& parsedElementType,
+                   const Containers::List< String >& parsedValueType,
                    const Config::ParameterContainer& parameters )
 {
-   int dimensions = atoi( parsedElementType[ 1 ].getString() );
-   String dataType = parsedElementType[ 2 ];
+   int dimensions = atoi( parsedValueType[ 1 ].getString() );
+   String dataType = parsedValueType[ 2 ];
    if( dataType == "float" )
       switch( dimensions )
       {
@@ -385,7 +385,7 @@ bool setTupleType( const MeshPointer& meshPointer,
 }
 
 template< typename MeshPointer >
-bool setElementType( const MeshPointer& meshPointer,
+bool setValueType( const MeshPointer& meshPointer,
                      const String& inputFileName,
                      const Containers::List< String >& parsedObjectType,
                      const Config::ParameterContainer& parameters )
@@ -411,14 +411,14 @@ bool setElementType( const MeshPointer& meshPointer,
       return setIndexType< MeshPointer, double, double >( meshPointer, inputFileName, parsedObjectType, parameters );
    if( elementType == "long double" )
       return setIndexType< MeshPointer, long double, long double >( meshPointer, inputFileName, parsedObjectType, parameters );
-   Containers::List< String > parsedElementType;
-   if( ! parseObjectType( elementType, parsedElementType ) )
+   Containers::List< String > parsedValueType;
+   if( ! parseObjectType( elementType, parsedValueType ) )
    {
       std::cerr << "Unable to parse object type " << elementType << "." << std::endl;
       return false;
    }
-   if( parsedElementType[ 0 ] == "Containers::StaticVector" )
-      return setTupleType< MeshPointer >( meshPointer, inputFileName, parsedObjectType, parsedElementType, parameters );
+   if( parsedValueType[ 0 ] == "Containers::StaticVector" )
+      return setTupleType< MeshPointer >( meshPointer, inputFileName, parsedObjectType, parsedValueType, parameters );
 
    std::cerr << "Unknown element type " << elementType << "." << std::endl;
    return false;
@@ -436,7 +436,7 @@ bool processFiles( const Config::ParameterContainer& parameters )
     */
    String meshFile = parameters. getParameter< String >( "mesh" );
    
-   typedef SharedPointer< Mesh > MeshPointer;
+   typedef Pointers::SharedPointer<  Mesh > MeshPointer;
 
    MeshPointer meshPointer;
    if( meshFile != "" )
@@ -461,7 +461,7 @@ bool processFiles( const Config::ParameterContainer& parameters )
       std::cerr << "Unable to parse object type " << objectType << "." << std::endl;
       return false;
    }
-   setElementType< MeshPointer >( meshPointer, inputFiles[ 0 ], parsedObjectType, parameters );
+   setValueType< MeshPointer >( meshPointer, inputFiles[ 0 ], parsedObjectType, parameters );
    return true;
 }
 
diff --git a/src/Tools/tnl-image-converter.cpp b/src/Tools/tnl-image-converter.cpp
index 3c5d5aa5503a3d926a27b1980ce83f4b3129a950..29c633058d69250965293681a2d010b9349eb5e4 100644
--- a/src/Tools/tnl-image-converter.cpp
+++ b/src/Tools/tnl-image-converter.cpp
@@ -12,7 +12,7 @@
 #include <TNL/Config/ParameterContainer.h>
 #include <TNL/FileName.h>
 #include <TNL/Meshes/Grid.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Images/PGMImage.h>
 #include <TNL/Images/PNGImage.h>
@@ -48,7 +48,7 @@ bool processImages( const Config::ParameterContainer& parameters )
     bool verbose = parameters.getParameter< bool >( "verbose" );
  
     using GridType = Meshes::Grid< 2, Real, Devices::Host, int >;
-    using GridPointer = SharedPointer< GridType >;
+    using GridPointer = Pointers::SharedPointer< GridType >;
     using MeshFunctionType = Functions::MeshFunction< GridType >;
     GridPointer grid;
     MeshFunctionType meshFunction;
diff --git a/src/Tools/tnl-init.cpp b/src/Tools/tnl-init.cpp
index ac72f7a13283958fd407c4e281c1baeb4a038996..7dd7032810794f5f2ee43023472be6773fcb2de6 100644
--- a/src/Tools/tnl-init.cpp
+++ b/src/Tools/tnl-init.cpp
@@ -17,8 +17,8 @@
 #include <TNL/Meshes/DummyMesh.h>
 #include <TNL/Meshes/Grid.h>
 
-#include <TNL/Communicators/NoDistrCommunicator.h>
 #include <TNL/Communicators/MpiCommunicator.h>
+#include <TNL/Communicators/ScopedInitializer.h>
 
 
 using namespace TNL;
@@ -51,18 +51,14 @@ void setupConfig( Config::ConfigDescription& config )
 
 int main( int argc, char* argv[] )
 {
-
    Config::ParameterContainer parameters;
    Config::ConfigDescription configDescription;
 
    setupConfig( configDescription );
-   
-   Communicators::NoDistrCommunicator::configSetup( configDescription );
    Communicators::MpiCommunicator::configSetup( configDescription );
-   
-   Communicators::NoDistrCommunicator::Init(argc,argv);
-   Communicators::MpiCommunicator::Init(argc,argv);   
- 
+
+   Communicators::ScopedInitializer< Communicators::MpiCommunicator > mpi(argc, argv);
+
    if( ! parseCommandLine( argc, argv, configDescription, parameters ) )
       return EXIT_FAILURE;
 
@@ -83,9 +79,5 @@ int main( int argc, char* argv[] )
    if( ! resolveMeshType( parsedMeshType, parameters ) )
       return EXIT_FAILURE;
 
-#ifdef HAVE_MPI
-   Communicators::MpiCommunicator::Finalize();
-#endif
-      
    return EXIT_SUCCESS;
 }
diff --git a/src/Tools/tnl-init.h b/src/Tools/tnl-init.h
index 6b554a0cde0b35c5d336e2b2366f36ff8e43a1be..3f2a8f15fd01df8c61785ffae155ccf84ac86ddd 100644
--- a/src/Tools/tnl-init.h
+++ b/src/Tools/tnl-init.h
@@ -39,7 +39,7 @@ bool renderFunction( const Config::ParameterContainer& parameters )
    using namespace  Meshes::DistributedMeshes;
    using DistributedGridType = Meshes::DistributedMeshes::DistributedMesh<MeshType>;
    DistributedGridType distributedMesh;
-   SharedPointer< MeshType > meshPointer;
+   Pointers::SharedPointer< MeshType > meshPointer;
    MeshType globalMesh;
 
    if(CommunicatorType::isDistributed())
@@ -66,14 +66,14 @@ bool renderFunction( const Config::ParameterContainer& parameters )
     }
 
    typedef Functions::TestFunction< MeshType::getMeshDimension(), RealType > FunctionType;
-   typedef SharedPointer< FunctionType, typename MeshType::DeviceType > FunctionPointer;
+   typedef Pointers::SharedPointer<  FunctionType, typename MeshType::DeviceType > FunctionPointer;
    FunctionPointer function;
    std::cout << "Setting up the function ... " << std::endl;
    if( ! function->setup( parameters, "" ) )
       return false;
    std::cout << "done." << std::endl;
    typedef Functions::MeshFunction< MeshType, MeshType::getMeshDimension() > MeshFunctionType;
-   typedef SharedPointer< MeshFunctionType, typename MeshType::DeviceType > MeshFunctionPointer;
+   typedef Pointers::SharedPointer<  MeshFunctionType, typename MeshType::DeviceType > MeshFunctionPointer;
    MeshFunctionPointer meshFunction( meshPointer );
    //if( ! discreteFunction.setSize( mesh.template getEntitiesCount< typename MeshType::Cell >() ) )
    //   return false;
diff --git a/src/Tools/tnl-lattice-init.h b/src/Tools/tnl-lattice-init.h
index 269ff5201d25567a2595a2fa17fe71745249a072..b22e71f96ad7c8e31e18cc22a35105cab31723e8 100644
--- a/src/Tools/tnl-lattice-init.h
+++ b/src/Tools/tnl-lattice-init.h
@@ -23,8 +23,8 @@ bool performExtrude( const Config::ParameterContainer& parameters,
                      MeshFunction& f,
                      const ProfileMeshFunction& profile )
 {
-   using MeshPointer = SharedPointer< typename MeshFunction::MeshType >;
-   using ProfileMeshPointer = SharedPointer< typename ProfileMeshFunction::MeshType >;
+   using MeshPointer = Pointers::SharedPointer< typename MeshFunction::MeshType >;
+   using ProfileMeshPointer = Pointers::SharedPointer< typename ProfileMeshFunction::MeshType >;
    using ProfileMeshType = typename ProfileMeshFunction::MeshType;
    using MeshType = typename MeshFunction::MeshType;
    using RealType = typename MeshFunction::RealType;
@@ -177,7 +177,7 @@ bool
 readProfileMeshFunction( const Config::ParameterContainer& parameters )
 {
    String profileMeshFile = parameters.getParameter< String >( "profile-mesh" );
-   using ProfileMeshPointer = SharedPointer< typename ProfileMeshFunction::MeshType >;
+   using ProfileMeshPointer = Pointers::SharedPointer< typename ProfileMeshFunction::MeshType >;
    ProfileMeshPointer profileMesh;
    if( ! profileMesh->load( profileMeshFile ) )
    {
@@ -192,7 +192,7 @@ readProfileMeshFunction( const Config::ParameterContainer& parameters )
       return false;
    }
    String meshFile = parameters.getParameter< String >( "mesh" );
-   using MeshPointer = SharedPointer< Mesh >;
+   using MeshPointer = Pointers::SharedPointer< Mesh >;
    MeshPointer mesh;
    if( ! mesh->load( meshFile ) )
    {
diff --git a/src/Tools/tnl-view.cpp b/src/Tools/tnl-view.cpp
index 25e38bc65d4f831379827118af9856a898e737de..f426b6fe0c6bf36318e3e245ecce184b35ae0d0f 100644
--- a/src/Tools/tnl-view.cpp
+++ b/src/Tools/tnl-view.cpp
@@ -80,17 +80,6 @@ void setupConfig( Config::ConfigDescription& config )
       config.addEntryEnum  < String >             ( "gnuplot" );
       config.addEntryEnum  < String >             ( "vtk" );
    config.addEntry        < int >                 ( "verbose", "Set the verbosity of the program.", 1 );
-
-   config.addDelimiter( "Matrix settings:" );
-   config.addEntry        < String >           ( "matrix-format", "Matrix format to be drawn." );
-      config.addEntryEnum  < String >             ( "csr" );
-      config.addEntryEnum  < String >             ( "ellpack" );
-      config.addEntryEnum  < String >             ( "sliced-ellpack" );
-      config.addEntryEnum  < String >             ( "chunked-ellpack" );
-   config.addEntry        < int >                 ( "matrix-slice-size", "Sets the slice size of the matrix.", 0 );
-   config.addEntry        < int >                 ( "desired-matrix-chunk-size", "Sets desired chunk size for the Chunked Ellpack format.");
-   config.addEntry        < int >                 ( "cuda-block-size", "Sets CUDA block size for the Chunked Ellpack format." );
-   config.addEntry       < bool >                 ( "sort-matrix", "Sort the matrix rows decreasingly by the number of the non-zero elements.", false );
 }
 
 int main( int argc, char* argv[] )
diff --git a/src/Tools/tnl-view.h b/src/Tools/tnl-view.h
index ed81bdf0878f709ee47878b7b4cbfa0add139f7f..c231d606dd064a6e96730d7698d0043871bda693 100644
--- a/src/Tools/tnl-view.h
+++ b/src/Tools/tnl-view.h
@@ -220,7 +220,7 @@ bool setVectorFieldSize( const MeshPointer& meshPointer,
    return false;
 }
 
-template< typename MeshPointer, typename Element, typename Real, typename Index, int Dimension >
+template< typename MeshPointer, typename Value, typename Real, typename Index, int Dimension >
 bool convertObject( const MeshPointer& meshPointer,
                     const String& inputFileName,
                     const Containers::List< String >& parsedObjectType,
@@ -243,11 +243,11 @@ bool convertObject( const MeshPointer& meshPointer,
    {
       using MeshType = typename MeshPointer::ObjectType;
       // FIXME: why is MeshType::GlobalIndexType not the same as Index?
-//      Containers::Vector< Element, Devices::Host, Index > vector;
-      Containers::Vector< Element, Devices::Host, typename MeshType::GlobalIndexType > vector;
+//      Containers::Vector< Value, Devices::Host, Index > vector;
+      Containers::Vector< Value, Devices::Host, typename MeshType::GlobalIndexType > vector;
       if( ! vector.load( inputFileName ) )
          return false;
-      Functions::MeshFunction< MeshType, MeshType::getMeshDimension(), Element > mf;
+      Functions::MeshFunction< MeshType, MeshType::getMeshDimension(), Value > mf;
       mf.bind( meshPointer, vector );
       if( ! mf.write( outputFileName, outputFormat ) )
          return false;
@@ -257,23 +257,22 @@ bool convertObject( const MeshPointer& meshPointer,
        parsedObjectType[ 0 ] == "tnlMultiVector" ||      // TODO: remove deprecated type names  
        parsedObjectType[ 0 ] == "tnlSharedMultiVector" ) //
    {
-      Containers::MultiVector< Dimension, Element, Devices::Host, Index > multiVector;
+      Containers::MultiVector< Dimension, Value, Devices::Host, Index > multiVector;
       if( ! multiVector. load( inputFileName ) )
          return false;
       typedef Meshes::Grid< Dimension, Real, Devices::Host, Index > GridType;
       typedef typename GridType::PointType PointType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      GridType grid;
-      grid. setDomain( PointType( 0.0 ), PointType( 1.0 ) );
-      grid. setDimensions( CoordinatesType( multiVector. getDimensions() ) );
-      const Real spaceStep = grid. getSpaceSteps(). x();
+//      GridType grid;
+//      grid. setDomain( PointType( 0.0 ), PointType( 1.0 ) );
+//      grid. setDimensions( CoordinatesType( multiVector. getDimensions() ) );
 //      if( ! grid. write( multiVector, outputFileName, outputFormat ) )
          return false;
    }
    return true;
 }
 
-template< typename MeshPointer, typename Element, typename Real, typename Index >
+template< typename MeshPointer, typename Value, typename Real, typename Index >
 bool setDimension( const MeshPointer& meshPointer,
                     const String& inputFileName,
                     const Containers::List< String >& parsedObjectType,
@@ -291,17 +290,17 @@ bool setDimension( const MeshPointer& meshPointer,
    switch( dimensions )
    {
       case 1:
-         return convertObject< MeshPointer, Element, Real, Index, 1 >( meshPointer, inputFileName, parsedObjectType, parameters );
+         return convertObject< MeshPointer, Value, Real, Index, 1 >( meshPointer, inputFileName, parsedObjectType, parameters );
       case 2:
-         return convertObject< MeshPointer, Element, Real, Index, 2 >( meshPointer, inputFileName, parsedObjectType, parameters );
+         return convertObject< MeshPointer, Value, Real, Index, 2 >( meshPointer, inputFileName, parsedObjectType, parameters );
       case 3:
-         return convertObject< MeshPointer, Element, Real, Index, 3 >( meshPointer, inputFileName, parsedObjectType, parameters );
+         return convertObject< MeshPointer, Value, Real, Index, 3 >( meshPointer, inputFileName, parsedObjectType, parameters );
    }
    std::cerr << "Cannot convert objects with " << dimensions << " dimensions." << std::endl;
    return false;
 }
 
-template< typename MeshPointer, typename Element, typename Real >
+template< typename MeshPointer, typename Value, typename Real >
 bool setIndexType( const MeshPointer& meshPointer,
                    const String& inputFileName,
                    const Containers::List< String >& parsedObjectType,
@@ -318,9 +317,9 @@ bool setIndexType( const MeshPointer& meshPointer,
       indexType = parsedObjectType[ 3 ];
 
    if( indexType == "int" )
-      return setDimension< MeshPointer, Element, Real, int >( meshPointer, inputFileName, parsedObjectType, parameters );
+      return setDimension< MeshPointer, Value, Real, int >( meshPointer, inputFileName, parsedObjectType, parameters );
    if( indexType == "long-int" )
-      return setDimension< MeshPointer, Element, Real, long int >( meshPointer, inputFileName, parsedObjectType, parameters );
+      return setDimension< MeshPointer, Value, Real, long int >( meshPointer, inputFileName, parsedObjectType, parameters );
    std::cerr << "Unknown index type " << indexType << "." << std::endl;
    return false;
 }
@@ -329,11 +328,11 @@ template< typename MeshPointer >
 bool setTupleType( const MeshPointer& meshPointer,
                    const String& inputFileName,
                    const Containers::List< String >& parsedObjectType,
-                   const Containers::List< String >& parsedElementType,
+                   const Containers::List< String >& parsedValueType,
                    const Config::ParameterContainer& parameters )
 {
-   int dimensions = atoi( parsedElementType[ 1 ].getString() );
-   String dataType = parsedElementType[ 2 ];
+   int dimensions = atoi( parsedValueType[ 1 ].getString() );
+   String dataType = parsedValueType[ 2 ];
    if( dataType == "float" )
       switch( dimensions )
       {
@@ -377,7 +376,7 @@ bool setTupleType( const MeshPointer& meshPointer,
 }
 
 template< typename MeshPointer >
-bool setElementType( const MeshPointer& meshPointer,
+bool setValueType( const MeshPointer& meshPointer,
                      const String& inputFileName,
                      const Containers::List< String >& parsedObjectType,
                      const Config::ParameterContainer& parameters )
@@ -401,15 +400,15 @@ bool setElementType( const MeshPointer& meshPointer,
       return setIndexType< MeshPointer, double, double >( meshPointer, inputFileName, parsedObjectType, parameters );
    if( elementType == "long double" )
       return setIndexType< MeshPointer, long double, long double >( meshPointer, inputFileName, parsedObjectType, parameters );
-   Containers::List< String > parsedElementType;
-   if( ! parseObjectType( elementType, parsedElementType ) )
+   Containers::List< String > parsedValueType;
+   if( ! parseObjectType( elementType, parsedValueType ) )
    {
       std::cerr << "Unable to parse object type " << elementType << "." << std::endl;
       return false;
    }
-   if( parsedElementType[ 0 ] == "Containers::StaticVector" ||
-       parsedElementType[ 0 ] == "Containers::StaticVector" )               // TODO: remove deprecated type names
-      return setTupleType< MeshPointer >( meshPointer, inputFileName, parsedObjectType, parsedElementType, parameters );
+   if( parsedValueType[ 0 ] == "Containers::StaticVector" ||
+       parsedValueType[ 0 ] == "Containers::StaticVector" )               // TODO: remove deprecated type names
+      return setTupleType< MeshPointer >( meshPointer, inputFileName, parsedObjectType, parsedValueType, parameters );
 
    std::cerr << "Unknown element type " << elementType << "." << std::endl;
    return false;
@@ -423,7 +422,7 @@ struct FilesProcessor
       int verbose = parameters. getParameter< int >( "verbose");
       String meshFile = parameters. getParameter< String >( "mesh" );
 
-      typedef SharedPointer< Mesh > MeshPointer;
+      typedef Pointers::SharedPointer<  Mesh > MeshPointer;
       MeshPointer meshPointer;
       
       if( meshFile != "" )
@@ -481,7 +480,7 @@ struct FilesProcessor
                 parsedObjectType[ 0 ] == "tnlSharedMultiVector" ||               // 
                 parsedObjectType[ 0 ] == "tnlSharedVector" ||                    //
                 parsedObjectType[ 0 ] == "tnlVector" )                           //
-               setElementType< MeshPointer >( meshPointer, inputFiles[ i ], parsedObjectType, parameters );
+               setValueType< MeshPointer >( meshPointer, inputFiles[ i ], parsedObjectType, parameters );
             if( parsedObjectType[ 0 ] == "Functions::MeshFunction" ||
                 parsedObjectType[ 0 ] == "tnlMeshFunction" )                     // TODO: remove deprecated type names
                setMeshFunction< MeshPointer >( meshPointer, inputFiles[ i ], parsedObjectType, parameters );
diff --git a/src/UnitTests/AssertCudaTest.cu b/src/UnitTests/AssertCudaTest.cu
new file mode 100644
index 0000000000000000000000000000000000000000..2aa9705c5aa8319aee7ce5ee12c9bd6a26d857f0
--- /dev/null
+++ b/src/UnitTests/AssertCudaTest.cu
@@ -0,0 +1,114 @@
+/***************************************************************************
+                          AssertCudaTest.h  -  description
+                             -------------------
+    begin                : Sep 7, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#ifdef NDEBUG
+   #undef NDEBUG
+#endif
+
+#include <TNL/Assert.h>
+#include <TNL/Devices/Cuda.h>
+#include <TNL/Exceptions/CudaRuntimeError.h>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+using namespace TNL;
+
+#ifdef HAVE_CUDA
+
+#define WRAP_ASSERT( suffix, statement, not_failing )             \
+__global__                                                        \
+void kernel_##suffix( int* output )                               \
+{                                                                 \
+   const bool tr = true;                                          \
+   const bool fa = false;                                         \
+   const int two = 2;                                             \
+   const int ten = 10;                                            \
+   /* pointers */                                                 \
+   const double* data_null = nullptr;                             \
+   const double** data_full = &data_null;                         \
+                                                                  \
+   statement                                                      \
+                                                                  \
+   /* actually do something to test if the execution control gets here */ \
+   output[0] = 1;                                                 \
+}                                                                 \
+                                                                  \
+TEST( AssertCudaTest, suffix )                                    \
+{                                                                 \
+   int* output_cuda;                                              \
+   int output_host = 0;                                           \
+                                                                  \
+   cudaMalloc( (void**)&output_cuda, sizeof(int) );               \
+   cudaMemcpy( output_cuda, &output_host, sizeof(int), cudaMemcpyHostToDevice );    \
+                                                                  \
+   kernel_##suffix<<<1, 1>>>(output_cuda);                        \
+   cudaDeviceSynchronize();                                       \
+   if( not_failing ) {                                            \
+      EXPECT_NO_THROW( TNL_CHECK_CUDA_DEVICE; );                  \
+      cudaMemcpy( &output_host, output_cuda, sizeof(int), cudaMemcpyDeviceToHost );    \
+      cudaFree( output_cuda );                                    \
+      EXPECT_EQ( output_host, 1 );                                \
+   }                                                              \
+   else                                                           \
+      EXPECT_THROW( TNL_CHECK_CUDA_DEVICE;,                       \
+                    TNL::Exceptions::CudaRuntimeError );          \
+}                                                                 \
+
+
+// not failing statements:
+WRAP_ASSERT( test1,  TNL_ASSERT_TRUE( true, "true is true" );, true );
+WRAP_ASSERT( test2,  TNL_ASSERT_TRUE( tr, "true is true" );, true );
+WRAP_ASSERT( test3,  TNL_ASSERT_FALSE( false, "false is false" );, true );
+WRAP_ASSERT( test4,  TNL_ASSERT_FALSE( fa, "false is false" );, true );
+
+WRAP_ASSERT( test5,  TNL_ASSERT_EQ( two, 2, "two is 2" );, true );
+WRAP_ASSERT( test6,  TNL_ASSERT_NE( ten, 2, "ten is not 2" );, true );
+WRAP_ASSERT( test7,  TNL_ASSERT_LT( two, 10, "two < 10" );, true );
+WRAP_ASSERT( test8,  TNL_ASSERT_LE( two, 10, "two <= 10" );, true );
+WRAP_ASSERT( test9,  TNL_ASSERT_LE( two, 2, "two <= 2" );, true );
+WRAP_ASSERT( test10, TNL_ASSERT_GT( ten, 2, "ten > 2" );, true );
+WRAP_ASSERT( test11, TNL_ASSERT_GE( ten, 10, "ten >= 10" );, true );
+WRAP_ASSERT( test12, TNL_ASSERT_GE( ten, 2, "ten >= 2" );, true );
+
+WRAP_ASSERT( test13, TNL_ASSERT_FALSE( data_null, "nullptr is false" );, true );
+WRAP_ASSERT( test14, TNL_ASSERT_TRUE( data_full, "non-nullptr is true" );, true );
+
+// errors:
+WRAP_ASSERT( test15, TNL_ASSERT_TRUE( false, "false is true" );, false );
+WRAP_ASSERT( test16, TNL_ASSERT_TRUE( fa, "false is true" );, false );
+WRAP_ASSERT( test17, TNL_ASSERT_FALSE( true, "true is false" );, false );
+WRAP_ASSERT( test18, TNL_ASSERT_FALSE( tr, "true is false" );, false );
+
+WRAP_ASSERT( test19, TNL_ASSERT_NE( two, 2, "two != 2" );, false );
+WRAP_ASSERT( test20, TNL_ASSERT_EQ( ten, 2, "ten == 2" );, false );
+WRAP_ASSERT( test21, TNL_ASSERT_GE( two, 10, "two >= 10" );, false );
+WRAP_ASSERT( test22, TNL_ASSERT_GT( two, 10, "two > 10" );, false );
+WRAP_ASSERT( test23, TNL_ASSERT_GT( two, 2, "two > 2" );, false );
+WRAP_ASSERT( test24, TNL_ASSERT_LE( ten, 2, "ten <= 2" );, false );
+WRAP_ASSERT( test25, TNL_ASSERT_LT( ten, 10, "ten < 10" );, false );
+WRAP_ASSERT( test26, TNL_ASSERT_LT( ten, 2, "ten < 2" );, false );
+
+WRAP_ASSERT( test27, TNL_ASSERT_TRUE( data_null, "nullptr is true" );, false );
+WRAP_ASSERT( test28, TNL_ASSERT_FALSE( data_full, "non-nullptr is false" );, false );
+
+#endif
+#endif
+
+#include "GtestMissingError.h"
+int main( int argc, char* argv[] )
+{
+#ifdef HAVE_GTEST
+   ::testing::InitGoogleTest( &argc, argv );
+   return RUN_ALL_TESTS();
+#else
+   throw GtestMissingError();
+#endif
+}
diff --git a/src/UnitTests/AssertTest.cpp b/src/UnitTests/AssertTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ea20f7957a3fb4103b482d6a7650f4d3902de191
--- /dev/null
+++ b/src/UnitTests/AssertTest.cpp
@@ -0,0 +1,80 @@
+/***************************************************************************
+                          AssertTest.h  -  description
+                             -------------------
+    begin                : Sep 7, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#ifdef NDEBUG
+   #undef NDEBUG
+#endif
+
+#include <TNL/Assert.h>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+using namespace TNL;
+
+TEST( AssertTest, basicTest )
+{
+   const bool tr = true;
+   const bool fa = false;
+   const int two = 2;
+   const int ten = 10;
+
+   // true statements:
+   EXPECT_NO_THROW( TNL_ASSERT_TRUE( true, "true is true" ); );
+   EXPECT_NO_THROW( TNL_ASSERT_TRUE( tr, "true is true" ); );
+   EXPECT_NO_THROW( TNL_ASSERT_FALSE( false, "false is false" ); );
+   EXPECT_NO_THROW( TNL_ASSERT_FALSE( fa, "false is false" ); );
+   EXPECT_NO_THROW( TNL_ASSERT_EQ( two, 2, "two is 2" ); );
+   EXPECT_NO_THROW( TNL_ASSERT_NE( ten, 2, "ten is not 2" ); );
+   EXPECT_NO_THROW( TNL_ASSERT_LT( two, 10, "two < 10" ); );
+   EXPECT_NO_THROW( TNL_ASSERT_LE( two, 10, "two <= 10" ); );
+   EXPECT_NO_THROW( TNL_ASSERT_LE( two, 2, "two <= 2" ); );
+   EXPECT_NO_THROW( TNL_ASSERT_GT( ten, 2, "ten > 2" ); );
+   EXPECT_NO_THROW( TNL_ASSERT_GE( ten, 10, "ten >= 10" ); );
+   EXPECT_NO_THROW( TNL_ASSERT_GE( ten, 2, "ten >= 2" ); );
+
+   // errors:
+   EXPECT_ANY_THROW( TNL_ASSERT_TRUE( false, "false is true" ); );
+   EXPECT_ANY_THROW( TNL_ASSERT_TRUE( fa, "false is true" ); );
+   EXPECT_ANY_THROW( TNL_ASSERT_FALSE( true, "true is false" ); );
+   EXPECT_ANY_THROW( TNL_ASSERT_FALSE( tr, "true is false" ); );
+   EXPECT_ANY_THROW( TNL_ASSERT_NE( two, 2, "two != 2" ); );
+   EXPECT_ANY_THROW( TNL_ASSERT_EQ( ten, 2, "ten == 2" ); );
+   EXPECT_ANY_THROW( TNL_ASSERT_GE( two, 10, "two >= 10" ); );
+   EXPECT_ANY_THROW( TNL_ASSERT_GT( two, 10, "two > 10" ); );
+   EXPECT_ANY_THROW( TNL_ASSERT_GT( two, 2, "two > 2" ); );
+   EXPECT_ANY_THROW( TNL_ASSERT_LE( ten, 2, "ten <= 2" ); );
+   EXPECT_ANY_THROW( TNL_ASSERT_LT( ten, 10, "ten < 10" ); );
+   EXPECT_ANY_THROW( TNL_ASSERT_LT( ten, 2, "ten < 2" ); );
+
+   // pointers
+   const double* data_null = nullptr;
+   const double** data_full = &data_null;
+
+   // true statements:
+   EXPECT_NO_THROW( TNL_ASSERT_FALSE( data_null, "nullptr is false" ); );
+   EXPECT_NO_THROW( TNL_ASSERT_TRUE( data_full, "non-nullptr is true" ); );
+
+   // errors
+   EXPECT_ANY_THROW( TNL_ASSERT_TRUE( data_null, "nullptr is true" ); );
+   EXPECT_ANY_THROW( TNL_ASSERT_FALSE( data_full, "non-nullptr is false" ); );
+}
+#endif
+
+#include "GtestMissingError.h"
+int main( int argc, char* argv[] )
+{
+#ifdef HAVE_GTEST
+   ::testing::InitGoogleTest( &argc, argv );
+   return RUN_ALL_TESTS();
+#else
+   throw GtestMissingError();
+#endif
+}
diff --git a/src/UnitTests/CMakeLists.txt b/src/UnitTests/CMakeLists.txt
index a104de45b14615878736e30827c38fdd7367dd37..e7132a722af5e65a9f2d25bc089cf81cc88f9041 100644
--- a/src/UnitTests/CMakeLists.txt
+++ b/src/UnitTests/CMakeLists.txt
@@ -1,29 +1,34 @@
-if( ${WITH_TESTS} )
-
 ADD_SUBDIRECTORY( Containers )
 ADD_SUBDIRECTORY( Functions )
 ADD_SUBDIRECTORY( Matrices )
-ADD_SUBDIRECTORY( Mpi )
 ADD_SUBDIRECTORY( Meshes )
+ADD_SUBDIRECTORY( Pointers )
 
-ADD_EXECUTABLE( UniquePointerTest UniquePointerTest.cpp )
-TARGET_COMPILE_OPTIONS( UniquePointerTest PRIVATE ${CXX_TESTS_FLAGS} )
-TARGET_LINK_LIBRARIES( UniquePointerTest
+ADD_EXECUTABLE( AssertTest AssertTest.cpp )
+TARGET_COMPILE_OPTIONS( AssertTest PRIVATE ${CXX_TESTS_FLAGS} )
+TARGET_LINK_LIBRARIES( AssertTest
                            ${GTEST_BOTH_LIBRARIES}
                            tnl )
 
-IF( BUILD_CUDA )
+if( BUILD_CUDA )
+   CUDA_ADD_EXECUTABLE( AssertCudaTest AssertCudaTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( AssertCudaTest
+                              ${GTEST_BOTH_LIBRARIES}
+                              tnl )
+endif()
+
+if( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( FileTest FileTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( FileTest
                               ${GTEST_BOTH_LIBRARIES}
                               tnl )
-ELSE(  BUILD_CUDA )               
+else()
    ADD_EXECUTABLE( FileTest FileTest.cpp )
    TARGET_COMPILE_OPTIONS( FileTest PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( FileTest
                               ${GTEST_BOTH_LIBRARIES}
                               tnl )
-ENDIF( BUILD_CUDA )
+endif()
 
 ADD_EXECUTABLE( StringTest StringTest.cpp )
 TARGET_COMPILE_OPTIONS( StringTest PRIVATE ${CXX_TESTS_FLAGS} )
@@ -43,10 +48,11 @@ TARGET_LINK_LIBRARIES( SaveAndLoadMeshfunctionTest
                            ${GTEST_BOTH_LIBRARIES} 
                            tnl )
 
+ADD_TEST( AssertTest ${EXECUTABLE_OUTPUT_PATH}/AssertTest${CMAKE_EXECUTABLE_SUFFIX} )
+if( BUILD_CUDA )
+   ADD_TEST( AssertCudaTest ${EXECUTABLE_OUTPUT_PATH}/AssertCudaTest${CMAKE_EXECUTABLE_SUFFIX} )
+endif()
 ADD_TEST( FileTest ${EXECUTABLE_OUTPUT_PATH}/FileTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( StringTest ${EXECUTABLE_OUTPUT_PATH}/StringTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( ObjectTest ${EXECUTABLE_OUTPUT_PATH}/ObjectTest${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( UniquePointerTest ${EXECUTABLE_OUTPUT_PATH}/UniquePointerTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SaveAndLoadMeshfunctionTest ${EXECUTABLE_OUTPUT_PATH}/SaveAndLoadMeshfunctionTest${CMAKE_EXECUTABLE_SUFFIX} )
-
-endif( ${WITH_TESTS} )
diff --git a/src/UnitTests/Containers/ArrayOperationsTest.h b/src/UnitTests/Containers/ArrayOperationsTest.h
index 58606def9f1a1435f319ec390ed97ddad6cf07e1..109e947649bec236bcb7b7e64bcc84eacd44aef7 100644
--- a/src/UnitTests/Containers/ArrayOperationsTest.h
+++ b/src/UnitTests/Containers/ArrayOperationsTest.h
@@ -23,23 +23,23 @@ using namespace TNL::Containers::Algorithms;
 constexpr int ARRAY_TEST_SIZE = 5000;
 
 // test fixture for typed tests
-template< typename Element >
+template< typename Value >
 class ArrayOperationsTest : public ::testing::Test
 {
 protected:
-   using ElementType = Element;
+   using ValueType = Value;
 };
 
 // types for which ArrayTest is instantiated
-using ElementTypes = ::testing::Types< short int, int, long, float, double >;
+using ValueTypes = ::testing::Types< short int, int, long, float, double >;
 
-TYPED_TEST_CASE( ArrayOperationsTest, ElementTypes );
+TYPED_TEST_CASE( ArrayOperationsTest, ValueTypes );
 
 TYPED_TEST( ArrayOperationsTest, allocateMemory_host )
 {
-   using ElementType = typename TestFixture::ElementType;
+   using ValueType = typename TestFixture::ValueType;
 
-   ElementType* data;
+   ValueType* data;
    ArrayOperations< Devices::Host >::allocateMemory( data, ARRAY_TEST_SIZE );
    ASSERT_NE( data, nullptr );
 
@@ -48,13 +48,13 @@ TYPED_TEST( ArrayOperationsTest, allocateMemory_host )
 
 TYPED_TEST( ArrayOperationsTest, setMemoryElement_host )
 {
-   using ElementType = typename TestFixture::ElementType;
+   using ValueType = typename TestFixture::ValueType;
    const int size = ARRAY_TEST_SIZE;
 
-   ElementType *data;
+   ValueType *data;
    ArrayOperations< Devices::Host >::allocateMemory( data, size );
    for( int i = 0; i < size; i++ ) {
-      ArrayOperations< Devices::Host >::setMemoryElement( data + i, (ElementType) i );
+      ArrayOperations< Devices::Host >::setMemoryElement( data + i, (ValueType) i );
       EXPECT_EQ( data[ i ], i );
       EXPECT_EQ( ArrayOperations< Devices::Host >::getMemoryElement( data + i ), i );
    }
@@ -63,12 +63,12 @@ TYPED_TEST( ArrayOperationsTest, setMemoryElement_host )
 
 TYPED_TEST( ArrayOperationsTest, setMemory_host )
 {
-   using ElementType = typename TestFixture::ElementType;
+   using ValueType = typename TestFixture::ValueType;
    const int size = ARRAY_TEST_SIZE;
 
-   ElementType *data;
+   ValueType *data;
    ArrayOperations< Devices::Host >::allocateMemory( data, size );
-   ArrayOperations< Devices::Host >::setMemory( data, (ElementType) 13, size );
+   ArrayOperations< Devices::Host >::setMemory( data, (ValueType) 13, size );
    for( int i = 0; i < size; i ++ )
       EXPECT_EQ( data[ i ], 13 );
    ArrayOperations< Devices::Host >::freeMemory( data );
@@ -76,14 +76,14 @@ TYPED_TEST( ArrayOperationsTest, setMemory_host )
 
 TYPED_TEST( ArrayOperationsTest, copyMemory_host )
 {
-   using ElementType = typename TestFixture::ElementType;
+   using ValueType = typename TestFixture::ValueType;
    const int size = ARRAY_TEST_SIZE;
 
-   ElementType *data1, *data2;
+   ValueType *data1, *data2;
    ArrayOperations< Devices::Host >::allocateMemory( data1, size );
    ArrayOperations< Devices::Host >::allocateMemory( data2, size );
-   ArrayOperations< Devices::Host >::setMemory( data1, (ElementType) 13, size );
-   ArrayOperations< Devices::Host >::copyMemory< ElementType, ElementType >( data2, data1, size );
+   ArrayOperations< Devices::Host >::setMemory( data1, (ValueType) 13, size );
+   ArrayOperations< Devices::Host >::copyMemory< ValueType, ValueType >( data2, data1, size );
    for( int i = 0; i < size; i ++ )
       EXPECT_EQ( data1[ i ], data2[ i ]);
    ArrayOperations< Devices::Host >::freeMemory( data1 );
@@ -92,7 +92,7 @@ TYPED_TEST( ArrayOperationsTest, copyMemory_host )
 
 TYPED_TEST( ArrayOperationsTest, copyMemoryWithConversion_host )
 {
-   using ElementType = typename TestFixture::ElementType;
+   using ValueType = typename TestFixture::ValueType;
    const int size = ARRAY_TEST_SIZE;
 
    int *data1;
@@ -109,17 +109,17 @@ TYPED_TEST( ArrayOperationsTest, copyMemoryWithConversion_host )
 
 TYPED_TEST( ArrayOperationsTest, compareMemory_host )
 {
-   using ElementType = typename TestFixture::ElementType;
+   using ValueType = typename TestFixture::ValueType;
    const int size = ARRAY_TEST_SIZE;
 
-   ElementType *data1, *data2;
+   ValueType *data1, *data2;
    ArrayOperations< Devices::Host >::allocateMemory( data1, size );
    ArrayOperations< Devices::Host >::allocateMemory( data2, size );
-   ArrayOperations< Devices::Host >::setMemory( data1, (ElementType) 7, size );
-   ArrayOperations< Devices::Host >::setMemory( data2, (ElementType) 0, size );
-   EXPECT_FALSE( ( ArrayOperations< Devices::Host >::compareMemory< ElementType, ElementType >( data1, data2, size ) ) );
-   ArrayOperations< Devices::Host >::setMemory( data2, (ElementType) 7, size );
-   EXPECT_TRUE( ( ArrayOperations< Devices::Host >::compareMemory< ElementType, ElementType >( data1, data2, size ) ) );
+   ArrayOperations< Devices::Host >::setMemory( data1, (ValueType) 7, size );
+   ArrayOperations< Devices::Host >::setMemory( data2, (ValueType) 0, size );
+   EXPECT_FALSE( ( ArrayOperations< Devices::Host >::compareMemory< ValueType, ValueType >( data1, data2, size ) ) );
+   ArrayOperations< Devices::Host >::setMemory( data2, (ValueType) 7, size );
+   EXPECT_TRUE( ( ArrayOperations< Devices::Host >::compareMemory< ValueType, ValueType >( data1, data2, size ) ) );
    ArrayOperations< Devices::Host >::freeMemory( data1 );
    ArrayOperations< Devices::Host >::freeMemory( data2 );
 }
@@ -204,10 +204,10 @@ TYPED_TEST( ArrayOperationsTest, containsOnlyValue_host )
 #ifdef HAVE_CUDA
 TYPED_TEST( ArrayOperationsTest, allocateMemory_cuda )
 {
-   using ElementType = typename TestFixture::ElementType;
+   using ValueType = typename TestFixture::ValueType;
    const int size = ARRAY_TEST_SIZE;
 
-   ElementType* data;
+   ValueType* data;
    ArrayOperations< Devices::Cuda >::allocateMemory( data, size );
    ASSERT_TRUE( TNL_CHECK_CUDA_DEVICE );
    ASSERT_NE( data, nullptr );
@@ -218,20 +218,20 @@ TYPED_TEST( ArrayOperationsTest, allocateMemory_cuda )
 
 TYPED_TEST( ArrayOperationsTest, setMemoryElement_cuda )
 {
-   using ElementType = typename TestFixture::ElementType;
+   using ValueType = typename TestFixture::ValueType;
    const int size = ARRAY_TEST_SIZE;
 
-   ElementType* data;
+   ValueType* data;
    ArrayOperations< Devices::Cuda >::allocateMemory( data, size );
    ASSERT_TRUE( TNL_CHECK_CUDA_DEVICE );
 
    for( int i = 0; i < size; i++ )
-      ArrayOperations< Devices::Cuda >::setMemoryElement( &data[ i ], (ElementType) i );
+      ArrayOperations< Devices::Cuda >::setMemoryElement( &data[ i ], (ValueType) i );
 
    for( int i = 0; i < size; i++ )
    {
-      ElementType d;
-      ASSERT_EQ( cudaMemcpy( &d, &data[ i ], sizeof( ElementType ), cudaMemcpyDeviceToHost ), cudaSuccess );
+      ValueType d;
+      ASSERT_EQ( cudaMemcpy( &d, &data[ i ], sizeof( ValueType ), cudaMemcpyDeviceToHost ), cudaSuccess );
       EXPECT_EQ( d, i );
       EXPECT_EQ( ArrayOperations< Devices::Cuda >::getMemoryElement( &data[ i ] ), i );
    }
@@ -242,16 +242,16 @@ TYPED_TEST( ArrayOperationsTest, setMemoryElement_cuda )
 
 TYPED_TEST( ArrayOperationsTest, setMemory_cuda )
 {
-   using ElementType = typename TestFixture::ElementType;
+   using ValueType = typename TestFixture::ValueType;
    const int size = ARRAY_TEST_SIZE;
 
-   ElementType *hostData, *deviceData;
+   ValueType *hostData, *deviceData;
    ArrayOperations< Devices::Host >::allocateMemory( hostData, size );
    ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size );
-   ArrayOperations< Devices::Host >::setMemory( hostData, (ElementType) 0, size );
-   ArrayOperations< Devices::Cuda >::setMemory( deviceData, (ElementType) 13, size );
+   ArrayOperations< Devices::Host >::setMemory( hostData, (ValueType) 0, size );
+   ArrayOperations< Devices::Cuda >::setMemory( deviceData, (ValueType) 13, size );
    ASSERT_TRUE( TNL_CHECK_CUDA_DEVICE );
-   ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< ElementType, ElementType >( hostData, deviceData, size );
+   ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< ValueType, ValueType >( hostData, deviceData, size );
    ASSERT_TRUE( TNL_CHECK_CUDA_DEVICE );
    for( int i = 0; i < size; i++ )
       EXPECT_EQ( hostData[ i ], 13 );
@@ -261,19 +261,19 @@ TYPED_TEST( ArrayOperationsTest, setMemory_cuda )
 
 TYPED_TEST( ArrayOperationsTest, copyMemory_cuda )
 {
-   using ElementType = typename TestFixture::ElementType;
+   using ValueType = typename TestFixture::ValueType;
    const int size = ARRAY_TEST_SIZE;
 
-   ElementType *hostData, *hostData2, *deviceData, *deviceData2;
+   ValueType *hostData, *hostData2, *deviceData, *deviceData2;
    ArrayOperations< Devices::Host >::allocateMemory( hostData, size );
    ArrayOperations< Devices::Host >::allocateMemory( hostData2, size );
    ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size );
    ArrayOperations< Devices::Cuda >::allocateMemory( deviceData2, size );
-   ArrayOperations< Devices::Host >::setMemory( hostData, (ElementType) 13, size );
-   ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< ElementType >( deviceData, hostData, size );
-   ArrayOperations< Devices::Cuda >::copyMemory< ElementType, ElementType >( deviceData2, deviceData, size );
-   ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< ElementType, ElementType >( hostData2, deviceData2, size );
-   EXPECT_TRUE( ( ArrayOperations< Devices::Host >::compareMemory< ElementType, ElementType >( hostData, hostData2, size) ) );
+   ArrayOperations< Devices::Host >::setMemory( hostData, (ValueType) 13, size );
+   ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< ValueType >( deviceData, hostData, size );
+   ArrayOperations< Devices::Cuda >::copyMemory< ValueType, ValueType >( deviceData2, deviceData, size );
+   ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< ValueType, ValueType >( hostData2, deviceData2, size );
+   EXPECT_TRUE( ( ArrayOperations< Devices::Host >::compareMemory< ValueType, ValueType >( hostData, hostData2, size) ) );
    ArrayOperations< Devices::Host >::freeMemory( hostData );
    ArrayOperations< Devices::Host >::freeMemory( hostData2 );
    ArrayOperations< Devices::Cuda >::freeMemory( deviceData );
@@ -306,26 +306,26 @@ TYPED_TEST( ArrayOperationsTest, copyMemoryWithConversions_cuda )
 
 TYPED_TEST( ArrayOperationsTest, compareMemory_cuda )
 {
-   using ElementType = typename TestFixture::ElementType;
+   using ValueType = typename TestFixture::ValueType;
    const int size = ARRAY_TEST_SIZE;
 
-   ElementType *hostData, *deviceData, *deviceData2;
+   ValueType *hostData, *deviceData, *deviceData2;
    ArrayOperations< Devices::Host >::allocateMemory( hostData, size );
    ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size );
    ArrayOperations< Devices::Cuda >::allocateMemory( deviceData2, size );
 
-   ArrayOperations< Devices::Host >::setMemory( hostData, (ElementType) 7, size );
-   ArrayOperations< Devices::Cuda >::setMemory( deviceData, (ElementType) 8, size );
-   ArrayOperations< Devices::Cuda >::setMemory( deviceData2, (ElementType) 9, size );
-   EXPECT_FALSE(( ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< ElementType, ElementType >( hostData, deviceData, size ) ));
-   EXPECT_FALSE(( ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< ElementType, ElementType >( deviceData, hostData, size ) ));
-   EXPECT_FALSE(( ArrayOperations< Devices::Cuda >::compareMemory< ElementType, ElementType >( deviceData, deviceData2, size ) ));
-
-   ArrayOperations< Devices::Cuda >::setMemory( deviceData, (ElementType) 7, size );
-   ArrayOperations< Devices::Cuda >::setMemory( deviceData2, (ElementType) 7, size );
-   EXPECT_TRUE(( ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< ElementType, ElementType >( hostData, deviceData, size ) ));
-   EXPECT_TRUE(( ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< ElementType, ElementType >( deviceData, hostData, size ) ));
-   EXPECT_TRUE(( ArrayOperations< Devices::Cuda >::compareMemory< ElementType, ElementType >( deviceData, deviceData2, size ) ));
+   ArrayOperations< Devices::Host >::setMemory( hostData, (ValueType) 7, size );
+   ArrayOperations< Devices::Cuda >::setMemory( deviceData, (ValueType) 8, size );
+   ArrayOperations< Devices::Cuda >::setMemory( deviceData2, (ValueType) 9, size );
+   EXPECT_FALSE(( ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< ValueType, ValueType >( hostData, deviceData, size ) ));
+   EXPECT_FALSE(( ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< ValueType, ValueType >( deviceData, hostData, size ) ));
+   EXPECT_FALSE(( ArrayOperations< Devices::Cuda >::compareMemory< ValueType, ValueType >( deviceData, deviceData2, size ) ));
+
+   ArrayOperations< Devices::Cuda >::setMemory( deviceData, (ValueType) 7, size );
+   ArrayOperations< Devices::Cuda >::setMemory( deviceData2, (ValueType) 7, size );
+   EXPECT_TRUE(( ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< ValueType, ValueType >( hostData, deviceData, size ) ));
+   EXPECT_TRUE(( ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< ValueType, ValueType >( deviceData, hostData, size ) ));
+   EXPECT_TRUE(( ArrayOperations< Devices::Cuda >::compareMemory< ValueType, ValueType >( deviceData, deviceData2, size ) ));
 
    ArrayOperations< Devices::Host >::freeMemory( hostData );
    ArrayOperations< Devices::Cuda >::freeMemory( deviceData );
diff --git a/src/UnitTests/Containers/ArrayTest.h b/src/UnitTests/Containers/ArrayTest.h
index 403d551970d1cac1facbbfa00c4331d238da3827..e451c0416f5f14e63af1e2a4a3bb117159f16229 100644
--- a/src/UnitTests/Containers/ArrayTest.h
+++ b/src/UnitTests/Containers/ArrayTest.h
@@ -14,13 +14,14 @@
 #include <type_traits>
 
 #include <TNL/Containers/Array.h>
+#include <TNL/Containers/Vector.h>
 
 #include "gtest/gtest.h"
 
 using namespace TNL;
 using namespace TNL::Containers;
 
-// minimal custom data structure usable as ElementType in Array
+// minimal custom data structure usable as ValueType in Array
 struct MyData
 {
    double data;
@@ -78,11 +79,10 @@ using ArrayTypes = ::testing::Types<
    Array< long,   Devices::Host, long >,
    Array< float,  Devices::Host, long >,
    Array< double, Devices::Host, long >,
-   Array< MyData, Devices::Host, long >
+   Array< MyData, Devices::Host, long >,
    // FIXME: this segfaults in String::~String()
-//   , Array< String, Devices::Host, long >
+//   Array< String, Devices::Host, long >,
 #ifdef HAVE_CUDA
-   ,
    Array< short,  Devices::Cuda, short >,
    Array< int,    Devices::Cuda, short >,
    Array< long,   Devices::Cuda, short >,
@@ -100,10 +100,9 @@ using ArrayTypes = ::testing::Types<
    Array< long,   Devices::Cuda, long >,
    Array< float,  Devices::Cuda, long >,
    Array< double, Devices::Cuda, long >,
-   Array< MyData, Devices::Cuda, long >
+   Array< MyData, Devices::Cuda, long >,
 #endif
 #ifdef HAVE_MIC
-   ,
    Array< short,  Devices::MIC, short >,
    Array< int,    Devices::MIC, short >,
    Array< long,   Devices::MIC, short >,
@@ -122,9 +121,24 @@ using ArrayTypes = ::testing::Types<
    Array< int,    Devices::MIC, long >,
    Array< long,   Devices::MIC, long >,
    Array< float,  Devices::MIC, long >,
-   Array< double, Devices::MIC, long >
+   Array< double, Devices::MIC, long >,
    // TODO: MyData does not work on MIC
-//   Array< MyData, Devices::MIC, long >
+//   Array< MyData, Devices::MIC, long >,
+#endif
+
+   // all array tests should also work with Vector
+   // (but we can't test all types because the argument list would be too long...)
+   Vector< float,  Devices::Host, long >,
+   Vector< double, Devices::Host, long >
+#ifdef HAVE_CUDA
+   ,
+   Vector< float,  Devices::Cuda, long >,
+   Vector< double, Devices::Cuda, long >
+#endif
+#ifdef HAVE_MIC
+   ,
+   Vector< float,  Devices::MIC, long >,
+   Vector< double, Devices::MIC, long >
 #endif
 >;
 
@@ -142,7 +156,7 @@ TYPED_TEST( ArrayTest, constructors )
    EXPECT_EQ( v.getSize(), 10 );
 
    if( std::is_same< typename ArrayType::DeviceType, Devices::Host >::value ) {
-      typename ArrayType::ElementType data[ 10 ];
+      typename ArrayType::ValueType data[ 10 ];
       ArrayType w( data, 10 );
       EXPECT_EQ( w.getData(), data );
 
@@ -225,7 +239,7 @@ TYPED_TEST( ArrayTest, bind )
    EXPECT_EQ( v.getElement( 0 ), 50 );
 
    if( std::is_same< typename ArrayType::DeviceType, Devices::Host >::value ) {
-      typename ArrayType::ElementType data[ 10 ] = { 1, 2, 3, 4, 5, 6, 7, 8, 10 };
+      typename ArrayType::ValueType data[ 10 ] = { 1, 2, 3, 4, 5, 6, 7, 8, 10 };
       u.bind( data, 10 );
       EXPECT_EQ( u.getData(), data );
       EXPECT_EQ( u.getSize(), 10 );
@@ -274,8 +288,8 @@ TYPED_TEST( ArrayTest, reset )
    EXPECT_EQ( u.getData(), nullptr );
 }
 
-template< typename Element, typename Index >
-void testArrayElementwiseAccess( Array< Element, Devices::Host, Index >&& u )
+template< typename Value, typename Index >
+void testArrayElementwiseAccess( Array< Value, Devices::Host, Index >&& u )
 {
    u.setSize( 10 );
    for( int i = 0; i < 10; i++ ) {
@@ -287,20 +301,20 @@ void testArrayElementwiseAccess( Array< Element, Devices::Host, Index >&& u )
 }
 
 #ifdef HAVE_CUDA
-template< typename ElementType, typename IndexType >
-__global__ void testSetGetElementKernel( Array< ElementType, Devices::Cuda, IndexType >* u )
+template< typename ValueType, typename IndexType >
+__global__ void testSetGetElementKernel( Array< ValueType, Devices::Cuda, IndexType >* u )
 {
    if( threadIdx.x < ( *u ).getSize() )
       ( *u )[ threadIdx.x ] = threadIdx.x;
 }
 #endif /* HAVE_CUDA */
 
-template< typename Element, typename Index >
-void testArrayElementwiseAccess( Array< Element, Devices::Cuda, Index >&& u )
+template< typename Value, typename Index >
+void testArrayElementwiseAccess( Array< Value, Devices::Cuda, Index >&& u )
 {
 #ifdef HAVE_CUDA
    u.setSize( 10 );
-   using ArrayType = Array< Element, Devices::Cuda, Index >;
+   using ArrayType = Array< Value, Devices::Cuda, Index >;
    ArrayType* kernel_u = Devices::Cuda::passToDevice( u );
    testSetGetElementKernel<<< 1, 16 >>>( kernel_u );
    Devices::Cuda::freeFromDevice( kernel_u );
@@ -311,8 +325,8 @@ void testArrayElementwiseAccess( Array< Element, Devices::Cuda, Index >&& u )
 #endif
 }
 
-template< typename Element, typename Index >
-void testArrayElementwiseAccess( Array< Element, Devices::MIC, Index >&& u )
+template< typename Value, typename Index >
+void testArrayElementwiseAccess( Array< Value, Devices::MIC, Index >&& u )
 {
 #ifdef HAVE_MIC
    // TODO
@@ -443,7 +457,7 @@ TYPED_TEST( ArrayTest, assignmentOperator )
 
 // test works only for arithmetic types
 template< typename ArrayType,
-          typename = typename std::enable_if< std::is_arithmetic< typename ArrayType::ElementType >::value >::type >
+          typename = typename std::enable_if< std::is_arithmetic< typename ArrayType::ValueType >::value >::type >
 void testArrayAssignmentWithDifferentType()
 {
    ArrayType u( 10 );
@@ -471,7 +485,7 @@ void testArrayAssignmentWithDifferentType()
 }
 
 template< typename ArrayType,
-          typename = typename std::enable_if< ! std::is_arithmetic< typename ArrayType::ElementType >::value >::type,
+          typename = typename std::enable_if< ! std::is_arithmetic< typename ArrayType::ValueType >::value >::type,
           typename = void >
 void testArrayAssignmentWithDifferentType()
 {
@@ -547,7 +561,7 @@ TYPED_TEST( ArrayTest, referenceCountingConstructors )
 
    // copies of a static array
    if( std::is_same< typename ArrayType::DeviceType, Devices::Host >::value ) {
-      typename ArrayType::ElementType data[ 10 ] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+      typename ArrayType::ValueType data[ 10 ] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
       ArrayType u( data, 10 );
       ArrayType v( u );
       ArrayType w( v );
@@ -572,7 +586,7 @@ TYPED_TEST( ArrayTest, referenceCountingBind )
 
    // copies of a static array
    if( std::is_same< typename ArrayType::DeviceType, Devices::Host >::value ) {
-      typename ArrayType::ElementType data[ 10 ] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+      typename ArrayType::ValueType data[ 10 ] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
       ArrayType u( data, 10 );
       ArrayType v;
       v.bind( u );
diff --git a/src/UnitTests/Containers/ArrayViewTest.cpp b/src/UnitTests/Containers/ArrayViewTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0370cb807b7b7c3b0ccf84f686b22cc4f246e8ef
--- /dev/null
+++ b/src/UnitTests/Containers/ArrayViewTest.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          ArrayViewTest.cpp  -  description
+                             -------------------
+    begin                : Sep 1, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "ArrayViewTest.h"
diff --git a/src/UnitTests/Containers/ArrayViewTest.cu b/src/UnitTests/Containers/ArrayViewTest.cu
new file mode 100644
index 0000000000000000000000000000000000000000..d1fde2417f1b7bbd50b8a5f1d3b83d5cca60d80c
--- /dev/null
+++ b/src/UnitTests/Containers/ArrayViewTest.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          ArrayViewTest.cu  -  description
+                             -------------------
+    begin                : Sep 1, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "ArrayViewTest.h"
diff --git a/src/UnitTests/Containers/ArrayViewTest.h b/src/UnitTests/Containers/ArrayViewTest.h
new file mode 100644
index 0000000000000000000000000000000000000000..7d37a3b78be2c5c006fc48ad9a943a6bd27ca39b
--- /dev/null
+++ b/src/UnitTests/Containers/ArrayViewTest.h
@@ -0,0 +1,509 @@
+/***************************************************************************
+                          ArrayViewTest.h -  description
+                             -------------------
+    begin                : Sep 1, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#ifdef HAVE_GTEST 
+#include <type_traits>
+
+#include <TNL/Containers/Array.h>
+#include <TNL/Containers/ArrayView.h>
+
+#include "gtest/gtest.h"
+
+using namespace TNL;
+using namespace TNL::Containers;
+
+// minimal custom data structure usable as ValueType in Array
+struct MyData
+{
+   double data;
+
+   __cuda_callable__
+   MyData() : data(0) {}
+
+   template< typename T >
+   __cuda_callable__
+   MyData( T v ) : data(v) {}
+
+   __cuda_callable__
+   bool operator==( const MyData& v ) const { return data == v.data; }
+
+   // operator used in tests, not necessary for Array to work
+   template< typename T >
+   bool operator==( T v ) const { return data == v; }
+
+   static String getType()
+   {
+      return String( "MyData" );
+   }
+};
+
+std::ostream& operator<<( std::ostream& str, const MyData& v )
+{
+   return str << v.data;
+}
+
+
+// test fixture for typed tests
+template< typename Array >
+class ArrayViewTest : public ::testing::Test
+{
+protected:
+   using ArrayType = Array;
+   using ViewType = ArrayView< typename Array::ValueType, typename Array::DeviceType, typename Array::IndexType >;
+};
+
+// types for which ArrayViewTest is instantiated
+using ArrayTypes = ::testing::Types<
+   Array< short,  Devices::Host, short >,
+   Array< int,    Devices::Host, short >,
+   Array< long,   Devices::Host, short >,
+   Array< float,  Devices::Host, short >,
+   Array< double, Devices::Host, short >,
+   Array< MyData, Devices::Host, short >,
+   Array< short,  Devices::Host, int >,
+   Array< int,    Devices::Host, int >,
+   Array< long,   Devices::Host, int >,
+   Array< float,  Devices::Host, int >,
+   Array< double, Devices::Host, int >,
+   Array< MyData, Devices::Host, int >,
+   Array< short,  Devices::Host, long >,
+   Array< int,    Devices::Host, long >,
+   Array< long,   Devices::Host, long >,
+   Array< float,  Devices::Host, long >,
+   Array< double, Devices::Host, long >,
+   Array< MyData, Devices::Host, long >
+   // FIXME: this segfaults in String::~String()
+//   , Array< String, Devices::Host, long >
+#ifdef HAVE_CUDA
+   ,
+   Array< short,  Devices::Cuda, short >,
+   Array< int,    Devices::Cuda, short >,
+   Array< long,   Devices::Cuda, short >,
+   Array< float,  Devices::Cuda, short >,
+   Array< double, Devices::Cuda, short >,
+   Array< MyData, Devices::Cuda, short >,
+   Array< short,  Devices::Cuda, int >,
+   Array< int,    Devices::Cuda, int >,
+   Array< long,   Devices::Cuda, int >,
+   Array< float,  Devices::Cuda, int >,
+   Array< double, Devices::Cuda, int >,
+   Array< MyData, Devices::Cuda, int >,
+   Array< short,  Devices::Cuda, long >,
+   Array< int,    Devices::Cuda, long >,
+   Array< long,   Devices::Cuda, long >,
+   Array< float,  Devices::Cuda, long >,
+   Array< double, Devices::Cuda, long >,
+   Array< MyData, Devices::Cuda, long >
+#endif
+#ifdef HAVE_MIC
+   ,
+   Array< short,  Devices::MIC, short >,
+   Array< int,    Devices::MIC, short >,
+   Array< long,   Devices::MIC, short >,
+   Array< float,  Devices::MIC, short >,
+   Array< double, Devices::MIC, short >,
+   // TODO: MyData does not work on MIC
+//   Array< MyData, Devices::MIC, short >,
+   Array< short,  Devices::MIC, int >,
+   Array< int,    Devices::MIC, int >,
+   Array< long,   Devices::MIC, int >,
+   Array< float,  Devices::MIC, int >,
+   Array< double, Devices::MIC, int >,
+   // TODO: MyData does not work on MIC
+//   Array< MyData, Devices::MIC, int >,
+   Array< short,  Devices::MIC, long >,
+   Array< int,    Devices::MIC, long >,
+   Array< long,   Devices::MIC, long >,
+   Array< float,  Devices::MIC, long >,
+   Array< double, Devices::MIC, long >
+   // TODO: MyData does not work on MIC
+//   Array< MyData, Devices::MIC, long >
+#endif
+>;
+
+TYPED_TEST_CASE( ArrayViewTest, ArrayTypes );
+
+
+TYPED_TEST( ArrayViewTest, constructors )
+{
+   using ArrayType = typename TestFixture::ArrayType;
+   using ViewType = typename TestFixture::ViewType;
+   using ConstViewType = ArrayView< const typename ArrayType::ValueType, typename ArrayType::DeviceType, typename ArrayType::IndexType >;
+
+   ArrayType a( 10 );
+   EXPECT_EQ( a.getSize(), 10 );
+
+   ViewType v( a );
+   EXPECT_EQ( v.getSize(), 10 );
+   EXPECT_EQ( v.getData(), a.getData() );
+
+   if( std::is_same< typename ArrayType::DeviceType, Devices::Host >::value ) {
+      typename ArrayType::ValueType data[ 10 ];
+      ViewType w( data, 10 );
+      EXPECT_EQ( w.getData(), data );
+
+      ViewType z( w );
+      EXPECT_EQ( z.getData(), data );
+      EXPECT_EQ( z.getSize(), 10 );
+   }
+
+   // test initialization by const reference
+   const ArrayType& b = a;
+   ConstViewType b_view( b );
+   ConstViewType const_a_view( a );
+
+   // test initialization of cons view by non-const view
+   ConstViewType const_b_view( b_view );
+}
+
+TYPED_TEST( ArrayViewTest, bind )
+{
+   using ArrayType = typename TestFixture::ArrayType;
+   using ViewType = typename TestFixture::ViewType;
+
+   ArrayType a( 10 );
+   ViewType v;
+   v.bind( a );
+   EXPECT_EQ( v.getSize(), a.getSize() );
+   EXPECT_EQ( v.getData(), a.getData() );
+
+   // setting values
+   a.setValue( 27 );
+   EXPECT_EQ( a.getElement( 0 ), 27 );
+   v.setValue( 50 );
+   EXPECT_EQ( a.getElement( 0 ), 50 );
+   a.reset();
+   EXPECT_EQ( a.getSize(), 0 );
+   EXPECT_EQ( v.getSize(), 10 );
+
+   if( std::is_same< typename ArrayType::DeviceType, Devices::Host >::value ) {
+      typename ArrayType::ValueType data[ 10 ] = { 1, 2, 3, 4, 5, 6, 7, 8, 10 };
+      a.bind( data, 10 );
+      EXPECT_EQ( a.getData(), data );
+      EXPECT_EQ( a.getSize(), 10 );
+      EXPECT_EQ( a.getElement( 1 ), 2 );
+      v.bind( a );
+      EXPECT_EQ( v.getElement( 1 ), 2 );
+      a.reset();
+      v.setElement( 1, 3 );
+      v.reset();
+      EXPECT_EQ( data[ 1 ], 3 );
+   }
+}
+
+TYPED_TEST( ArrayViewTest, swap )
+{
+   using ArrayType = typename TestFixture::ArrayType;
+   using ViewType = typename TestFixture::ViewType;
+
+   ArrayType a( 10 ), b( 20 );
+   a.setValue( 0 );
+   b.setValue( 1 );
+
+   ViewType u( a ), v( b );
+   u.swap( v );
+   EXPECT_EQ( u.getSize(), 20 );
+   EXPECT_EQ( v.getSize(), 10 );
+   for( int i = 0; i < 20; i++ )
+      EXPECT_EQ( u.getElement( i ), 1 );
+   for( int i = 0; i < 10; i++ )
+      EXPECT_EQ( v.getElement( i ), 0 );
+}
+
+TYPED_TEST( ArrayViewTest, reset )
+{
+   using ArrayType = typename TestFixture::ArrayType;
+   using ViewType = typename TestFixture::ViewType;
+
+   ArrayType a;
+   a.setSize( 100 );
+   ViewType u( a );
+   EXPECT_EQ( u.getSize(), 100 );
+   EXPECT_NE( u.getData(), nullptr );
+   u.reset();
+   EXPECT_EQ( u.getSize(), 0 );
+   EXPECT_EQ( u.getData(), nullptr );
+   u.bind( a );
+   EXPECT_EQ( u.getSize(), 100 );
+   EXPECT_NE( u.getData(), nullptr );
+   u.reset();
+   EXPECT_EQ( u.getSize(), 0 );
+   EXPECT_EQ( u.getData(), nullptr );
+}
+
+template< typename Value, typename Index >
+void testArrayViewElementwiseAccess( Array< Value, Devices::Host, Index >&& a )
+{
+   a.setSize( 10 );
+   using ViewType = ArrayView< Value, Devices::Host, Index >;
+   ViewType u( a );
+   for( int i = 0; i < 10; i++ ) {
+      u.setElement( i, i );
+      EXPECT_EQ( u.getData()[ i ], i );
+      EXPECT_EQ( u.getElement( i ), i );
+      EXPECT_EQ( u[ i ], i );
+   }
+}
+
+#ifdef HAVE_CUDA
+template< typename ValueType, typename IndexType >
+__global__ void testSetGetElementKernel( ArrayView< ValueType, Devices::Cuda, IndexType > v )
+{
+   if( threadIdx.x < v.getSize() )
+      v[ threadIdx.x ] = threadIdx.x;
+}
+#endif /* HAVE_CUDA */
+
+template< typename Value, typename Index >
+void testArrayViewElementwiseAccess( Array< Value, Devices::Cuda, Index >&& u )
+{
+#ifdef HAVE_CUDA
+   u.setSize( 10 );
+   using ArrayType = Array< Value, Devices::Cuda, Index >;
+   using ViewType = ArrayView< Value, Devices::Cuda, Index >;
+   ViewType v( u );
+   testSetGetElementKernel<<< 1, 16 >>>( v );
+   TNL_CHECK_CUDA_DEVICE;
+   for( int i = 0; i < 10; i++ ) {
+      EXPECT_EQ( u.getElement( i ), i );
+   }
+#endif
+}
+
+template< typename Value, typename Index >
+void testArrayViewElementwiseAccess( Array< Value, Devices::MIC, Index >&& u )
+{
+#ifdef HAVE_MIC
+   // TODO
+#endif
+}
+
+TYPED_TEST( ArrayViewTest, elementwiseAccess )
+{
+   using ArrayType = typename TestFixture::ArrayType;
+
+   testArrayViewElementwiseAccess( ArrayType() );
+}
+
+TYPED_TEST( ArrayViewTest, containsValue )
+{
+   using ArrayType = typename TestFixture::ArrayType;
+   using ViewType = typename TestFixture::ViewType;
+
+   ArrayType a;
+   a.setSize( 1024 );
+   ViewType v( a );
+
+   for( int i = 0; i < v.getSize(); i++ )
+      v.setElement( i, i % 10 );
+
+   for( int i = 0; i < 10; i++ )
+      EXPECT_TRUE( v.containsValue( i ) );
+
+   for( int i = 10; i < 20; i++ )
+      EXPECT_FALSE( v.containsValue( i ) );
+}
+
+TYPED_TEST( ArrayViewTest, containsOnlyValue )
+{
+   using ArrayType = typename TestFixture::ArrayType;
+   using ViewType = typename TestFixture::ViewType;
+
+   ArrayType a;
+   a.setSize( 1024 );
+   ViewType v( a );
+
+   for( int i = 0; i < v.getSize(); i++ )
+      v.setElement( i, i % 10 );
+
+   for( int i = 0; i < 20; i++ )
+      EXPECT_FALSE( v.containsOnlyValue( i ) );
+
+   a.setValue( 100 );
+   EXPECT_TRUE( v.containsOnlyValue( 100 ) );
+}
+
+TYPED_TEST( ArrayViewTest, comparisonOperator )
+{
+   using ArrayType = typename TestFixture::ArrayType;
+   using ViewType = typename TestFixture::ViewType;
+
+   ArrayType a( 10 ), b( 10 );
+   typename ArrayType::HostType a_host( 10 );
+   for( int i = 0; i < 10; i ++ ) {
+      a.setElement( i, i );
+      a_host.setElement( i, i );
+      b.setElement( i, 2 * i );
+   }
+
+   ViewType u( a ), v( a ), w( b );
+
+   EXPECT_TRUE( u == u );
+   EXPECT_TRUE( u == v );
+   EXPECT_TRUE( v == u );
+   EXPECT_FALSE( u != v );
+   EXPECT_FALSE( v != u );
+   EXPECT_TRUE( u != w );
+   EXPECT_TRUE( w != u );
+   EXPECT_FALSE( u == w );
+   EXPECT_FALSE( w == u );
+
+   // comparison with arrays
+   EXPECT_TRUE( a == u );
+   EXPECT_FALSE( a != u );
+   EXPECT_TRUE( u == a );
+   EXPECT_FALSE( u != a );
+   EXPECT_TRUE( a != w );
+   EXPECT_FALSE( a == w );
+
+   // comparison with different device
+   EXPECT_TRUE( u == a_host );
+   EXPECT_TRUE( a_host == u );
+   // FIXME: what operator is called without explicit retyping?
+//   EXPECT_TRUE( w != a_host );
+   EXPECT_TRUE( w != (ArrayView< typename ArrayType::ValueType, Devices::Host, typename ArrayType::IndexType >) a_host );
+   EXPECT_TRUE( a_host != w );
+
+   v.reset();
+   EXPECT_FALSE( u == v );
+   u.reset();
+   EXPECT_TRUE( u == v );
+}
+
+TYPED_TEST( ArrayViewTest, comparisonOperatorWithDifferentType )
+{
+   using DeviceType = typename TestFixture::ArrayType::DeviceType;
+   using ArrayType1 = Array< short, DeviceType >;
+   using ArrayType2 = Array< float, Devices::Host >;
+   using ViewType1 = ArrayView< short, DeviceType >;
+   using ViewType2 = ArrayView< float, Devices::Host >;
+
+   ArrayType1 a( 10 );
+   ArrayType2 b( 10 );
+   for( int i = 0; i < 10; i++ ) {
+      a.setElement( i, i );
+      b.setElement( i, i );
+   }
+
+   ViewType1 u( a );
+   ViewType2 v( b );
+
+   EXPECT_TRUE( u == v );
+   EXPECT_FALSE( u != v );
+
+   // the comparison will be in floats
+   v.setElement( 0, 0.1f );
+   EXPECT_FALSE( u == v );
+   EXPECT_TRUE( u != v );
+}
+
+TYPED_TEST( ArrayViewTest, assignmentOperator )
+{
+   using ArrayType = typename TestFixture::ArrayType;
+   using ViewType = typename TestFixture::ViewType;
+
+   ArrayType a( 10 ), b( 10 );
+   typename ArrayType::HostType a_host( 10 );
+   for( int i = 0; i < 10; i++ ) {
+      a.setElement( i, i );
+      a_host.setElement( i, i );
+   }
+
+   ViewType u( a ), v( b );
+   typename ViewType::HostType u_host( a_host );
+
+   v.setValue( 0 );
+   v = u;
+   EXPECT_EQ( u, v );
+   EXPECT_EQ( v.getData(), b.getData() );
+
+   // assignment from host to device
+   v.setValue( 0 );
+   v = u_host;
+   EXPECT_EQ( u, v );
+   EXPECT_EQ( v.getData(), b.getData() );
+
+   // assignment from device to host
+   u_host.setValue( 0 );
+   u_host = u;
+   EXPECT_EQ( u_host, u );
+   EXPECT_EQ( u_host.getData(), a_host.getData() );
+}
+
+// test works only for arithmetic types
+template< typename ArrayType,
+          typename = typename std::enable_if< std::is_arithmetic< typename ArrayType::ValueType >::value >::type >
+void testArrayAssignmentWithDifferentType()
+{
+   ArrayType a( 10 );
+   Array< short, typename ArrayType::DeviceType, short > b( 10 );
+   Array< short, Devices::Host, short > b_host( 10 );
+   typename ArrayType::HostType a_host( 10 );
+   for( int i = 0; i < 10; i++ ) {
+      a.setElement( i, i );
+      a_host.setElement( i, i );
+   }
+
+   using ViewType = ArrayView< typename ArrayType::ValueType, typename ArrayType::DeviceType, typename ArrayType::IndexType >;
+   ViewType u( a );
+   typename ViewType::HostType u_host( a_host );
+   using ShortViewType = ArrayView< short, typename ArrayType::DeviceType, short >;
+   ShortViewType v( b );
+   typename ShortViewType::HostType v_host( b_host );
+
+   v.setValue( 0 );
+   v = u;
+   EXPECT_EQ( v, u );
+   EXPECT_EQ( v.getData(), b.getData() );
+
+   // assignment from host to device
+   v.setValue( 0 );
+   v = u_host;
+   EXPECT_EQ( v, u_host );
+   EXPECT_EQ( v.getData(), b.getData() );
+
+   // assignment from device to host
+   v_host.setValue( 0 );
+   v_host = u;
+   EXPECT_EQ( v_host, u );
+   EXPECT_EQ( v_host.getData(), b_host.getData() );
+}
+
+template< typename ArrayType,
+          typename = typename std::enable_if< ! std::is_arithmetic< typename ArrayType::ValueType >::value >::type,
+          typename = void >
+void testArrayAssignmentWithDifferentType()
+{
+}
+
+TYPED_TEST( ArrayViewTest, assignmentOperatorWithDifferentType )
+{
+   using ArrayType = typename TestFixture::ArrayType;
+
+   testArrayAssignmentWithDifferentType< ArrayType >();
+}
+
+// TODO: test all __cuda_callable__ methods from a CUDA kernel
+
+#endif // HAVE_GTEST
+
+
+#include "../GtestMissingError.h"
+int main( int argc, char* argv[] )
+{
+#ifdef HAVE_GTEST
+   ::testing::InitGoogleTest( &argc, argv );
+   return RUN_ALL_TESTS();
+#else
+   throw GtestMissingError();
+#endif
+}
diff --git a/src/UnitTests/Containers/CMakeLists.txt b/src/UnitTests/Containers/CMakeLists.txt
index 58832b04d4108ede4165995ac9bb818b806afe7d..3fc3755814aea28bec83143bdb55d5e0a5879b23 100644
--- a/src/UnitTests/Containers/CMakeLists.txt
+++ b/src/UnitTests/Containers/CMakeLists.txt
@@ -24,12 +24,24 @@ IF( BUILD_CUDA )
    TARGET_LINK_LIBRARIES( ArrayTest
                               ${GTEST_BOTH_LIBRARIES}
                               tnl )
+
+   CUDA_ADD_EXECUTABLE( ArrayViewTest ArrayViewTest.cu
+                        OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( ArrayViewTest
+                              ${GTEST_BOTH_LIBRARIES}
+                              tnl )
 ELSE(  BUILD_CUDA )
    ADD_EXECUTABLE( ArrayTest ArrayTest.cpp )
    TARGET_COMPILE_OPTIONS( ArrayTest PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( ArrayTest
                               ${GTEST_BOTH_LIBRARIES}
                               tnl )
+
+   ADD_EXECUTABLE( ArrayViewTest ArrayViewTest.cpp )
+   TARGET_COMPILE_OPTIONS( ArrayViewTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( ArrayViewTest
+                              ${GTEST_BOTH_LIBRARIES}
+                              tnl )
 ENDIF( BUILD_CUDA )
 
 ADD_EXECUTABLE( StaticArrayTest StaticArrayTest.cpp )
@@ -38,7 +50,8 @@ TARGET_LINK_LIBRARIES( StaticArrayTest
                            ${GTEST_BOTH_LIBRARIES}
                            tnl )
 
-# NOTE: Vector = Array + VectorOperations, so we test Vector and VectorOperations at the same time
+# NOTE: Vector = Array + VectorOperations, VectorView = ArrayView + VectorOperations,
+# so we test Vector, VectorView and VectorOperations at the same time
 IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( VectorTest VectorTest.cu
                         OPTIONS ${CXX_TESTS_FLAGS} )
@@ -77,6 +90,7 @@ TARGET_LINK_LIBRARIES( StaticVectorTest
 ADD_TEST( ListTest ${EXECUTABLE_OUTPUT_PATH}/ListTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( ArrayOperationsTest ${EXECUTABLE_OUTPUT_PATH}/ArrayOperationsTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( ArrayTest ${EXECUTABLE_OUTPUT_PATH}/ArrayTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( ArrayViewTest ${EXECUTABLE_OUTPUT_PATH}/ArrayViewTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( StaticArrayTest ${EXECUTABLE_OUTPUT_PATH}/StaticArrayTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( VectorTest ${EXECUTABLE_OUTPUT_PATH}/VectorTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( StaticVectorTest ${EXECUTABLE_OUTPUT_PATH}/StaticVectorTest${CMAKE_EXECUTABLE_SUFFIX} )
diff --git a/src/UnitTests/Containers/ListTest.cpp b/src/UnitTests/Containers/ListTest.cpp
index ec81c1f0bf8f3d25be8d0daf98e2625cdc65ab69..d326f98f41847896544148add8dfa8a2b1afab21 100644
--- a/src/UnitTests/Containers/ListTest.cpp
+++ b/src/UnitTests/Containers/ListTest.cpp
@@ -59,7 +59,7 @@ TYPED_TEST( ListTest, constructor )
 TYPED_TEST( ListTest, operations )
 {
    using ListType = typename TestFixture::ListType;
-   using ElementType = typename ListType::ElementType;
+   using ValueType = typename ListType::ValueType;
 
    ListType a, b;
 
@@ -68,10 +68,10 @@ TYPED_TEST( ListTest, operations )
    a.Prepend( 2 );
    a.Insert( 3, 1 );
    EXPECT_EQ( a.getSize(), 4 );
-   EXPECT_EQ( a[ 0 ], (ElementType) 2 );
-   EXPECT_EQ( a[ 1 ], (ElementType) 3 );
-   EXPECT_EQ( a[ 2 ], (ElementType) 0 );
-   EXPECT_EQ( a[ 3 ], (ElementType) 1 );
+   EXPECT_EQ( a[ 0 ], (ValueType) 2 );
+   EXPECT_EQ( a[ 1 ], (ValueType) 3 );
+   EXPECT_EQ( a[ 2 ], (ValueType) 0 );
+   EXPECT_EQ( a[ 3 ], (ValueType) 1 );
 
    b = a;
    EXPECT_EQ( b.getSize(), 4 );
@@ -79,36 +79,36 @@ TYPED_TEST( ListTest, operations )
 
    b.Insert( 4, 4 );
    EXPECT_NE( a, b );
-   EXPECT_EQ( b[ 4 ], (ElementType) 4 );
+   EXPECT_EQ( b[ 4 ], (ValueType) 4 );
 
    a.AppendList( b );
    EXPECT_EQ( a.getSize(), 9 );
-   EXPECT_EQ( a[ 0 ], (ElementType) 2 );
-   EXPECT_EQ( a[ 1 ], (ElementType) 3 );
-   EXPECT_EQ( a[ 2 ], (ElementType) 0 );
-   EXPECT_EQ( a[ 3 ], (ElementType) 1 );
-   EXPECT_EQ( a[ 4 ], (ElementType) 2 );
-   EXPECT_EQ( a[ 5 ], (ElementType) 3 );
-   EXPECT_EQ( a[ 6 ], (ElementType) 0 );
-   EXPECT_EQ( a[ 7 ], (ElementType) 1 );
-   EXPECT_EQ( a[ 8 ], (ElementType) 4 );
+   EXPECT_EQ( a[ 0 ], (ValueType) 2 );
+   EXPECT_EQ( a[ 1 ], (ValueType) 3 );
+   EXPECT_EQ( a[ 2 ], (ValueType) 0 );
+   EXPECT_EQ( a[ 3 ], (ValueType) 1 );
+   EXPECT_EQ( a[ 4 ], (ValueType) 2 );
+   EXPECT_EQ( a[ 5 ], (ValueType) 3 );
+   EXPECT_EQ( a[ 6 ], (ValueType) 0 );
+   EXPECT_EQ( a[ 7 ], (ValueType) 1 );
+   EXPECT_EQ( a[ 8 ], (ValueType) 4 );
 
    a.PrependList( b );
    EXPECT_EQ( a.getSize(), 14 );
-   EXPECT_EQ( a[ 0 ],  (ElementType) 2 );
-   EXPECT_EQ( a[ 1 ],  (ElementType) 3 );
-   EXPECT_EQ( a[ 2 ],  (ElementType) 0 );
-   EXPECT_EQ( a[ 3 ],  (ElementType) 1 );
-   EXPECT_EQ( a[ 4 ],  (ElementType) 4 );
-   EXPECT_EQ( a[ 5 ],  (ElementType) 2 );
-   EXPECT_EQ( a[ 6 ],  (ElementType) 3 );
-   EXPECT_EQ( a[ 7 ],  (ElementType) 0 );
-   EXPECT_EQ( a[ 8 ],  (ElementType) 1 );
-   EXPECT_EQ( a[ 9 ],  (ElementType) 2 );
-   EXPECT_EQ( a[ 10 ], (ElementType) 3 );
-   EXPECT_EQ( a[ 11 ], (ElementType) 0 );
-   EXPECT_EQ( a[ 12 ], (ElementType) 1 );
-   EXPECT_EQ( a[ 13 ], (ElementType) 4 );
+   EXPECT_EQ( a[ 0 ],  (ValueType) 2 );
+   EXPECT_EQ( a[ 1 ],  (ValueType) 3 );
+   EXPECT_EQ( a[ 2 ],  (ValueType) 0 );
+   EXPECT_EQ( a[ 3 ],  (ValueType) 1 );
+   EXPECT_EQ( a[ 4 ],  (ValueType) 4 );
+   EXPECT_EQ( a[ 5 ],  (ValueType) 2 );
+   EXPECT_EQ( a[ 6 ],  (ValueType) 3 );
+   EXPECT_EQ( a[ 7 ],  (ValueType) 0 );
+   EXPECT_EQ( a[ 8 ],  (ValueType) 1 );
+   EXPECT_EQ( a[ 9 ],  (ValueType) 2 );
+   EXPECT_EQ( a[ 10 ], (ValueType) 3 );
+   EXPECT_EQ( a[ 11 ], (ValueType) 0 );
+   EXPECT_EQ( a[ 12 ], (ValueType) 1 );
+   EXPECT_EQ( a[ 13 ], (ValueType) 4 );
 }
 #endif
 
diff --git a/src/UnitTests/Containers/MultiArrayTest.h b/src/UnitTests/Containers/MultiArrayTest.h
index c1a506c26a2d451de59819c70cc3faf7c09552ab..66fc9dc08574d717409718c1ab7a97d94828941f 100644
--- a/src/UnitTests/Containers/MultiArrayTest.h
+++ b/src/UnitTests/Containers/MultiArrayTest.h
@@ -20,23 +20,23 @@ using namespace TNL;
 using namespace TNL::Containers;
 
 #ifdef HAVE_CUDA
-template< typename ElementType, typename IndexType >
-__global__ void testSetGetElementKernel( MultiArray< 1, ElementType, Devices::Cuda, IndexType >* u )
+template< typename ValueType, typename IndexType >
+__global__ void testSetGetElementKernel( MultiArray< 1, ValueType, Devices::Cuda, IndexType >* u )
 {
    if( threadIdx.x < ( *u ).getDimensions().x() )
       ( *u )( threadIdx.x ) = threadIdx.x;
 }
 
-template< typename ElementType, typename IndexType >
-__global__ void testSetGetElementKernel( MultiArray< 2, ElementType, Devices::Cuda, IndexType >* u )
+template< typename ValueType, typename IndexType >
+__global__ void testSetGetElementKernel( MultiArray< 2, ValueType, Devices::Cuda, IndexType >* u )
 {
    if( threadIdx.x < ( *u ).getDimensions().x() &&
        threadIdx.x < ( *u ).getDimensions().y() )
       ( *u )( threadIdx.x, threadIdx.x ) = threadIdx.x;
 }
 
-template< typename ElementType, typename IndexType >
-__global__ void testSetGetElementKernel( MultiArray< 3, ElementType, Devices::Cuda, IndexType >* u )
+template< typename ValueType, typename IndexType >
+__global__ void testSetGetElementKernel( MultiArray< 3, ValueType, Devices::Cuda, IndexType >* u )
 {
    if( threadIdx.x < ( *u ).getDimensions().x() &&
        threadIdx.x < ( *u ).getDimensions().y() &&
@@ -51,51 +51,51 @@ __global__ void testSetGetElementKernel( MultiArray< 3, ElementType, Devices::Cu
 TEST( MultiArrayTest, testConstructorDestructor )
 {
    using namespace TNL::Containers;
-   MultiArray< Dimension, ElementType, Device, IndexType > u;
+   MultiArray< Dimension, ValueType, Device, IndexType > u;
 }
 
 TEST( MultiArrayTest, testSetSize )
 {
    using namespace TNL::Containers;
-   MultiArray< Dimension, ElementType, Device, IndexType > u, v;
+   MultiArray< Dimension, ValueType, Device, IndexType > u, v;
    u. setDimensions( 10 );
    v. setDimensions( 10 );
 }
 
-void setDiagonalElement( Containers::MultiArray< 1, ElementType, Device, IndexType >& u,
+void setDiagonalElement( Containers::MultiArray< 1, ValueType, Device, IndexType >& u,
                          const IndexType& i,
-                         const ElementType& v )
+                         const ValueType& v )
 {
    u.setElement( i, v );
 }
 
-void setDiagonalElement( Containers::MultiArray< 2, ElementType, Device, IndexType >& u,
+void setDiagonalElement( Containers::MultiArray< 2, ValueType, Device, IndexType >& u,
                          const IndexType& i,
-                         const ElementType& v )
+                         const ValueType& v )
 {
    u.setElement( i, i, v );
 }
 
-void setDiagonalElement( Containers::MultiArray< 3, ElementType, Device, IndexType >& u,
+void setDiagonalElement( Containers::MultiArray< 3, ValueType, Device, IndexType >& u,
                          const IndexType& i,
-                         const ElementType& v )
+                         const ValueType& v )
 {
    u.setElement( i, i, i, v );
 }
 
-IndexType getDiagonalElement( Containers::MultiArray< 1, ElementType, Device, IndexType >& u,
+IndexType getDiagonalElement( Containers::MultiArray< 1, ValueType, Device, IndexType >& u,
                               const IndexType& i )
 {
    return u.getElement( i );
 }
 
-IndexType getDiagonalElement( Containers::MultiArray< 2, ElementType, Device, IndexType >& u,
+IndexType getDiagonalElement( Containers::MultiArray< 2, ValueType, Device, IndexType >& u,
                               const IndexType& i )
 {
    return u.getElement( i, i );
 }
 
-IndexType getDiagonalElement( Containers::MultiArray< 3, ElementType, Device, IndexType >& u,
+IndexType getDiagonalElement( Containers::MultiArray< 3, ValueType, Device, IndexType >& u,
                               const IndexType& i )
 {
    return u.getElement( i, i, i );
@@ -105,7 +105,7 @@ IndexType getDiagonalElement( Containers::MultiArray< 3, ElementType, Device, In
 TEST( MultiArrayTest, testSetGetElement )
 {
    using namespace TNL::Containers;
-   MultiArray< Dimension, ElementType, Device, IndexType > u;
+   MultiArray< Dimension, ValueType, Device, IndexType > u;
    u. setDimensions( 10 );
    if( std::is_same< Device, Devices::Host >::value )
    {
@@ -115,7 +115,7 @@ TEST( MultiArrayTest, testSetGetElement )
    if( std::is_same< Device, Devices::Cuda >::value )
    {
 #ifdef HAVE_CUDA
-      MultiArray< Dimension, ElementType, Device, IndexType >* kernel_u =
+      MultiArray< Dimension, ValueType, Device, IndexType >* kernel_u =
                Devices::Cuda::passToDevice( u );
       testSetGetElementKernel<<< 1, 16 >>>( kernel_u );
       Devices::Cuda::freeFromDevice( kernel_u );
@@ -129,7 +129,7 @@ TEST( MultiArrayTest, testSetGetElement )
 TEST( MultiArrayTest, testComparisonOperator )
 {
    using namespace TNL::Containers;
-   MultiArray< Dimension, ElementType, Device, IndexType > u, v, w;
+   MultiArray< Dimension, ValueType, Device, IndexType > u, v, w;
    u.setDimensions( 10 );
    v.setDimensions( 10 );
    w.setDimensions( 10 );
@@ -151,8 +151,8 @@ TEST( MultiArrayTest, testComparisonOperator )
 TEST( MultiArrayTest, testEquivalenceOperator )
 {
    using namespace TNL::Containers;
-   MultiArray< Dimension, ElementType, Device, IndexType > u;
-   MultiArray< Dimension, ElementType, Device, IndexType > v;
+   MultiArray< Dimension, ValueType, Device, IndexType > u;
+   MultiArray< Dimension, ValueType, Device, IndexType > v;
    u. setDimensions( 10 );
    v. setDimensions( 10 );
    for( int i = 0; i < 10; i ++ )
@@ -165,7 +165,7 @@ TEST( MultiArrayTest, testEquivalenceOperator )
 TEST( MultiArrayTest, testGetSize )
 {
    using namespace TNL::Containers;
-   MultiArray< Dimension, ElementType, Device, IndexType > u;
+   MultiArray< Dimension, ValueType, Device, IndexType > u;
    const int maxSize = 10;
    for( int i = 1; i < maxSize; i ++ )
       u. setDimensions( i );
@@ -176,7 +176,7 @@ TEST( MultiArrayTest, testGetSize )
 TEST( MultiArrayTest, testReset )
 {
    using namespace TNL::Containers;
-   MultiArray< Dimension, ElementType, Device, IndexType > u;
+   MultiArray< Dimension, ValueType, Device, IndexType > u;
    u.setDimensions( 100 );
    ASSERT_EQ( u. getDimensions().x(), 100 );
    u.reset();
@@ -193,7 +193,7 @@ TEST( MultiArrayTest, testSetSizeAndDestructor )
    using namespace TNL::Containers;
    for( int i = 1; i < 100; i ++ )
    {
-      MultiArray< Dimension, ElementType, Device, IndexType > u;
+      MultiArray< Dimension, ValueType, Device, IndexType > u;
       u. setDimensions( i );
    }
 }
@@ -201,7 +201,7 @@ TEST( MultiArrayTest, testSetSizeAndDestructor )
 TEST( MultiArrayTest, testSaveAndLoad )
 {
    using namespace TNL::Containers;
-   MultiArray< Dimension, ElementType, Device, IndexType > v;
+   MultiArray< Dimension, ValueType, Device, IndexType > v;
    const int size( 10 );
    ASSERT_TRUE( v. setDimensions( size ) );
    for( int i = 0; i < size; i ++ )
@@ -210,7 +210,7 @@ TEST( MultiArrayTest, testSaveAndLoad )
    file. open( "test-file.tnl", IOMode::write );
    ASSERT_TRUE( v. save( file ) );
    file. close();
-   MultiArray< Dimension, ElementType, Device, IndexType > u;
+   MultiArray< Dimension, ValueType, Device, IndexType > u;
    file. open( "test-file.tnl", IOMode::read );
    ASSERT_TRUE( u. load( file ) );
    file. close();
diff --git a/src/UnitTests/Containers/StaticArrayTest.cpp b/src/UnitTests/Containers/StaticArrayTest.cpp
index dfe99c5876ccc855d8ab62464f59456490e403e2..e3d0b1f63e28864b0980d71af6623b51224c1218 100644
--- a/src/UnitTests/Containers/StaticArrayTest.cpp
+++ b/src/UnitTests/Containers/StaticArrayTest.cpp
@@ -24,7 +24,7 @@ class StaticArrayTest : public ::testing::Test
 {
 protected:
    using ArrayType = Array;
-   using ElementType = typename Array::ElementType;
+   using ValueType = typename Array::ValueType;
 };
 
 // types for which ArrayTest is instantiated
@@ -61,10 +61,10 @@ TYPED_TEST_CASE( StaticArrayTest, StaticArrayTypes );
 TYPED_TEST( StaticArrayTest, constructors )
 {
    using ArrayType = typename TestFixture::ArrayType;
-   using ElementType = typename TestFixture::ElementType;
+   using ValueType = typename TestFixture::ValueType;
    constexpr int Size = ArrayType::size;
 
-   ElementType data[ Size ];
+   ValueType data[ Size ];
    for( int i = 0; i < Size; i++ )
       data[ i ] = i;
 
@@ -110,16 +110,16 @@ TYPED_TEST( StaticArrayTest, getData )
    EXPECT_TRUE( u2.getData() );
 }
 
-template< typename Element >
-void checkCoordinates( StaticArray< 1, Element >& u )
+template< typename Value >
+void checkCoordinates( StaticArray< 1, Value >& u )
 {
    EXPECT_EQ( u.x(), 0 );
    u.x() += 1;
    EXPECT_EQ( u.x(), 1 );
 }
 
-template< typename Element >
-void checkCoordinates( StaticArray< 2, Element >& u )
+template< typename Value >
+void checkCoordinates( StaticArray< 2, Value >& u )
 {
    EXPECT_EQ( u.x(), 0 );
    EXPECT_EQ( u.y(), 1 );
@@ -129,8 +129,8 @@ void checkCoordinates( StaticArray< 2, Element >& u )
    EXPECT_EQ( u.y(), 2 );
 }
 
-template< typename Element >
-void checkCoordinates( StaticArray< 3, Element >& u )
+template< typename Value >
+void checkCoordinates( StaticArray< 3, Value >& u )
 {
    EXPECT_EQ( u.x(), 0 );
    EXPECT_EQ( u.y(), 1 );
@@ -143,8 +143,8 @@ void checkCoordinates( StaticArray< 3, Element >& u )
    EXPECT_EQ( u.z(), 3 );
 }
 
-template< int _Size, typename Element >
-void checkCoordinates( StaticArray< _Size, Element >& u )
+template< int _Size, typename Value >
+void checkCoordinates( StaticArray< _Size, Value >& u )
 {
 }
 
@@ -284,14 +284,14 @@ TYPED_TEST( StaticArrayTest, streamOperator )
 TYPED_TEST( StaticArrayTest, BindToArray )
 {
    using ArrayType = typename TestFixture::ArrayType;
-   using ElementType = typename TestFixture::ElementType;
+   using ValueType = typename TestFixture::ValueType;
    constexpr int Size = ArrayType::size;
 
    ArrayType a;
    for( int i = 0; i < Size; i++ )
       a[ i ] = i+1;
 
-   Array< ElementType, Devices::Host > sharedArray;
+   Array< ValueType, Devices::Host > sharedArray;
    sharedArray.bind( a );
    for( int i = 0; i < Size; i++ )
       EXPECT_EQ( a[ i ], sharedArray[ i ] );
diff --git a/src/UnitTests/Containers/VectorTest.h b/src/UnitTests/Containers/VectorTest.h
index beb4243f391f9c11bbdbe7de7c5187a423a20b32..d2cf8217f80fbe4503eed73f62ab81bd3970fe6a 100644
--- a/src/UnitTests/Containers/VectorTest.h
+++ b/src/UnitTests/Containers/VectorTest.h
@@ -17,8 +17,7 @@
 
 #include <TNL/Experimental/Arithmetics/Quad.h>
 #include <TNL/Containers/Vector.h>
-#include <TNL/File.h>
-#include <TNL/Math.h>
+#include <TNL/Containers/VectorView.h>
 
 #include "gtest/gtest.h"
 
@@ -81,6 +80,7 @@ class VectorTest : public ::testing::Test
 protected:
    using VectorType = Vector;
    using VectorOperations = Algorithms::VectorOperations< typename VectorType::DeviceType >;
+   using ViewType = VectorView< typename Vector::RealType, typename Vector::DeviceType, typename Vector::IndexType >;
 };
 
 // types for which VectorTest is instantiated
@@ -149,13 +149,16 @@ TYPED_TEST( VectorTest, max )
 {
    using VectorType = typename TestFixture::VectorType;
    using VectorOperations = typename TestFixture::VectorOperations;
+   using ViewType = typename TestFixture::ViewType;
    const int size = VECTOR_TEST_SIZE;
 
    VectorType v;
    v.setSize( size );
+   ViewType v_view( v );
    setLinearSequence( v );
 
    EXPECT_EQ( v.max(), size - 1 );
+   EXPECT_EQ( v_view.max(), size - 1 );
    EXPECT_EQ( VectorOperations::getVectorMax( v ), size - 1 );
 }
 
@@ -163,13 +166,16 @@ TYPED_TEST( VectorTest, min )
 {
    using VectorType = typename TestFixture::VectorType;
    using VectorOperations = typename TestFixture::VectorOperations;
+   using ViewType = typename TestFixture::ViewType;
    const int size = VECTOR_TEST_SIZE;
 
    VectorType v;
    v.setSize( size );
+   ViewType v_view( v );
    setLinearSequence( v );
 
    EXPECT_EQ( v.min(), 0 );
+   EXPECT_EQ( v_view.min(), 0 );
    EXPECT_EQ( VectorOperations::getVectorMin( v ), 0 );
 }
 
@@ -177,13 +183,16 @@ TYPED_TEST( VectorTest, absMax )
 {
    using VectorType = typename TestFixture::VectorType;
    using VectorOperations = typename TestFixture::VectorOperations;
+   using ViewType = typename TestFixture::ViewType;
    const int size = VECTOR_TEST_SIZE;
 
    VectorType v;
    v.setSize( size );
+   ViewType v_view( v );
    setNegativeLinearSequence( v );
 
    EXPECT_EQ( v.absMax(), size - 1 );
+   EXPECT_EQ( v_view.absMax(), size - 1 );
    EXPECT_EQ( VectorOperations::getVectorAbsMax( v ), size - 1 );
 }
 
@@ -191,13 +200,16 @@ TYPED_TEST( VectorTest, absMin )
 {
    using VectorType = typename TestFixture::VectorType;
    using VectorOperations = typename TestFixture::VectorOperations;
+   using ViewType = typename TestFixture::ViewType;
    const int size = VECTOR_TEST_SIZE;
 
    VectorType v;
    v.setSize( size );
+   ViewType v_view( v );
    setNegativeLinearSequence( v );
 
    EXPECT_EQ( v.absMin(), 0 );
+   EXPECT_EQ( v_view.absMin(), 0 );
    EXPECT_EQ( VectorOperations::getVectorAbsMin( v ), 0 );
 }
 
@@ -206,11 +218,13 @@ TYPED_TEST( VectorTest, lpNorm )
    using VectorType = typename TestFixture::VectorType;
    using RealType = typename VectorType::RealType;
    using VectorOperations = typename TestFixture::VectorOperations;
+   using ViewType = typename TestFixture::ViewType;
    const int size = VECTOR_TEST_SIZE;
    const RealType epsilon = 64 * std::numeric_limits< RealType >::epsilon();
 
    VectorType v;
    v.setSize( size );
+   ViewType v_view( v );
    setConstantSequence( v, 1 );
 
    const RealType expectedL1norm = size;
@@ -219,6 +233,9 @@ TYPED_TEST( VectorTest, lpNorm )
    EXPECT_EQ( v.lpNorm( 1.0 ), expectedL1norm );
    EXPECT_EQ( v.lpNorm( 2.0 ), expectedL2norm );
    EXPECT_NEAR( v.lpNorm( 3.0 ), expectedL3norm, epsilon );
+   EXPECT_EQ( v_view.lpNorm( 1.0 ), expectedL1norm );
+   EXPECT_EQ( v_view.lpNorm( 2.0 ), expectedL2norm );
+   EXPECT_NEAR( v_view.lpNorm( 3.0 ), expectedL3norm, epsilon );
    EXPECT_EQ( VectorOperations::getVectorLpNorm( v, 1.0 ), expectedL1norm );
    EXPECT_EQ( VectorOperations::getVectorLpNorm( v, 2.0 ), expectedL2norm );
    EXPECT_NEAR( VectorOperations::getVectorLpNorm( v, 3.0 ), expectedL3norm, epsilon );
@@ -228,26 +245,32 @@ TYPED_TEST( VectorTest, sum )
 {
    using VectorType = typename TestFixture::VectorType;
    using VectorOperations = typename TestFixture::VectorOperations;
+   using ViewType = typename TestFixture::ViewType;
    // this test expect an even size
    const int size = VECTOR_TEST_SIZE % 2 ? VECTOR_TEST_SIZE - 1 : VECTOR_TEST_SIZE;
 
    VectorType v;
    v.setSize( size );
+   ViewType v_view( v );
 
    setConstantSequence( v, 1 );
    EXPECT_EQ( v.sum(), size );
+   EXPECT_EQ( v_view.sum(), size );
    EXPECT_EQ( VectorOperations::getVectorSum( v ), size );
 
    setLinearSequence( v );
    EXPECT_EQ( v.sum(), 0.5 * size * ( size - 1 ) );
+   EXPECT_EQ( v_view.sum(), 0.5 * size * ( size - 1 ) );
    EXPECT_EQ( VectorOperations::getVectorSum( v ), 0.5 * size * ( size - 1 ) );
 
    setNegativeLinearSequence( v );
    EXPECT_EQ( v.sum(), - 0.5 * size * ( size - 1 ) );
+   EXPECT_EQ( v_view.sum(), - 0.5 * size * ( size - 1 ) );
    EXPECT_EQ( VectorOperations::getVectorSum( v ), - 0.5 * size * ( size - 1 ) );
 
    setOscilatingSequence( v, 1.0 );
    EXPECT_EQ( v.sum(), 0 );
+   EXPECT_EQ( v_view.sum(), 0 );
    EXPECT_EQ( VectorOperations::getVectorSum( v ), 0 );
 }
 
@@ -255,15 +278,18 @@ TYPED_TEST( VectorTest, differenceMax )
 {
    using VectorType = typename TestFixture::VectorType;
    using VectorOperations = typename TestFixture::VectorOperations;
+   using ViewType = typename TestFixture::ViewType;
    const int size = VECTOR_TEST_SIZE;
 
    VectorType u, v;
    u.setSize( size );
    v.setSize( size );
+   ViewType u_view( u ), v_view( v );
    setLinearSequence( u );
    setConstantSequence( v, size / 2 );
 
    EXPECT_EQ( u.differenceMax( v ), size - 1 - size / 2 );
+   EXPECT_EQ( u_view.differenceMax( v_view ), size - 1 - size / 2 );
    EXPECT_EQ( VectorOperations::getVectorDifferenceMax( u, v ), size - 1 - size / 2 );
 }
 
@@ -271,17 +297,21 @@ TYPED_TEST( VectorTest, differenceMin )
 {
    using VectorType = typename TestFixture::VectorType;
    using VectorOperations = typename TestFixture::VectorOperations;
+   using ViewType = typename TestFixture::ViewType;
    const int size = VECTOR_TEST_SIZE;
 
    VectorType u, v;
    u.setSize( size );
    v.setSize( size );
+   ViewType u_view( u ), v_view( v );
    setLinearSequence( u );
    setConstantSequence( v, size / 2 );
 
    EXPECT_EQ( u.differenceMin( v ), - size / 2 );
+   EXPECT_EQ( u_view.differenceMin( v_view ), - size / 2 );
    EXPECT_EQ( VectorOperations::getVectorDifferenceMin( u, v ), - size / 2 );
    EXPECT_EQ( v.differenceMin( u ), size / 2 - size + 1 );
+   EXPECT_EQ( v_view.differenceMin( u_view ), size / 2 - size + 1 );
    EXPECT_EQ( VectorOperations::getVectorDifferenceMin( v, u ), size / 2 - size + 1 );
 }
 
@@ -289,16 +319,19 @@ TYPED_TEST( VectorTest, differenceAbsMax )
 {
    using VectorType = typename TestFixture::VectorType;
    using VectorOperations = typename TestFixture::VectorOperations;
+   using ViewType = typename TestFixture::ViewType;
    // this test expects an odd size
    const int size = VECTOR_TEST_SIZE % 2 ? VECTOR_TEST_SIZE : VECTOR_TEST_SIZE - 1;
 
    VectorType u, v;
    u.setSize( size );
    v.setSize( size );
+   ViewType u_view( u ), v_view( v );
    setNegativeLinearSequence( u );
    setConstantSequence( v, - size / 2 );
 
    EXPECT_EQ( u.differenceAbsMax( v ), size - 1 - size / 2 );
+   EXPECT_EQ( u_view.differenceAbsMax( v_view ), size - 1 - size / 2 );
    EXPECT_EQ( VectorOperations::getVectorDifferenceAbsMax( u, v ), size - 1 - size / 2 );
 }
 
@@ -306,17 +339,21 @@ TYPED_TEST( VectorTest, differenceAbsMin )
 {
    using VectorType = typename TestFixture::VectorType;
    using VectorOperations = typename TestFixture::VectorOperations;
+   using ViewType = typename TestFixture::ViewType;
    const int size = VECTOR_TEST_SIZE;
 
    VectorType u, v;
    u.setSize( size );
    v.setSize( size );
+   ViewType u_view( u ), v_view( v );
    setNegativeLinearSequence( u );
    setConstantSequence( v, - size / 2 );
 
    EXPECT_EQ( u.differenceAbsMin( v ), 0 );
+   EXPECT_EQ( u_view.differenceAbsMin( v_view ), 0 );
    EXPECT_EQ( VectorOperations::getVectorDifferenceAbsMin( u, v ), 0 );
    EXPECT_EQ( v.differenceAbsMin( u ), 0 );
+   EXPECT_EQ( v_view.differenceAbsMin( u_view ), 0 );
    EXPECT_EQ( VectorOperations::getVectorDifferenceAbsMin( v, u ), 0 );
 }
 
@@ -325,12 +362,14 @@ TYPED_TEST( VectorTest, differenceLpNorm )
    using VectorType = typename TestFixture::VectorType;
    using RealType = typename VectorType::RealType;
    using VectorOperations = typename TestFixture::VectorOperations;
+   using ViewType = typename TestFixture::ViewType;
    const int size = VECTOR_TEST_SIZE;
    const RealType epsilon = 64 * std::numeric_limits< RealType >::epsilon();
 
    VectorType u, v;
    u.setSize( size );
    v.setSize( size );
+   ViewType u_view( u ), v_view( v );
    u.setValue( 3.0 );
    v.setValue( 1.0 );
 
@@ -340,6 +379,9 @@ TYPED_TEST( VectorTest, differenceLpNorm )
    EXPECT_EQ( u.differenceLpNorm( v, 1.0 ), expectedL1norm );
    EXPECT_EQ( u.differenceLpNorm( v, 2.0 ), expectedL2norm );
    EXPECT_NEAR( u.differenceLpNorm( v, 3.0 ), expectedL3norm, epsilon );
+   EXPECT_EQ( u_view.differenceLpNorm( v_view, 1.0 ), expectedL1norm );
+   EXPECT_EQ( u_view.differenceLpNorm( v_view, 2.0 ), expectedL2norm );
+   EXPECT_NEAR( u_view.differenceLpNorm( v_view, 3.0 ), expectedL3norm, epsilon );
    EXPECT_EQ( VectorOperations::getVectorDifferenceLpNorm( u, v, 1.0 ), expectedL1norm );
    EXPECT_EQ( VectorOperations::getVectorDifferenceLpNorm( u, v, 2.0 ), expectedL2norm );
    EXPECT_NEAR( VectorOperations::getVectorDifferenceLpNorm( u, v, 3.0 ), expectedL3norm, epsilon );
@@ -349,28 +391,34 @@ TYPED_TEST( VectorTest, differenceSum )
 {
    using VectorType = typename TestFixture::VectorType;
    using VectorOperations = typename TestFixture::VectorOperations;
+   using ViewType = typename TestFixture::ViewType;
    // this test expect an even size
    const int size = VECTOR_TEST_SIZE % 2 ? VECTOR_TEST_SIZE - 1 : VECTOR_TEST_SIZE;
 
    VectorType u, v;
    u.setSize( size );
    v.setSize( size );
+   ViewType u_view( u ), v_view( v );
    v.setValue( 1.0 );
 
    setConstantSequence( u, 2 );
    EXPECT_EQ( u.differenceSum( v ), size );
+   EXPECT_EQ( u_view.differenceSum( v_view ), size );
    EXPECT_EQ( VectorOperations::getVectorDifferenceSum( u, v ), size );
 
    setLinearSequence( u );
    EXPECT_EQ( u.differenceSum( v ), 0.5 * size * ( size - 1 ) - size );
+   EXPECT_EQ( u_view.differenceSum( v_view ), 0.5 * size * ( size - 1 ) - size );
    EXPECT_EQ( VectorOperations::getVectorDifferenceSum( u, v ), 0.5 * size * ( size - 1 ) - size );
 
    setNegativeLinearSequence( u );
    EXPECT_EQ( u.differenceSum( v ), - 0.5 * size * ( size - 1 ) - size );
+   EXPECT_EQ( u_view.differenceSum( v_view ), - 0.5 * size * ( size - 1 ) - size );
    EXPECT_EQ( VectorOperations::getVectorDifferenceSum( u, v ), - 0.5 * size * ( size - 1 ) - size );
 
    setOscilatingSequence( u, 1.0 );
    EXPECT_EQ( u.differenceSum( v ), - size );
+   EXPECT_EQ( u_view.differenceSum( v_view ), - size );
    EXPECT_EQ( VectorOperations::getVectorDifferenceSum( u, v ), - size );
 }
 
@@ -378,10 +426,12 @@ TYPED_TEST( VectorTest, scalarMultiplication )
 {
    using VectorType = typename TestFixture::VectorType;
    using VectorOperations = typename TestFixture::VectorOperations;
+   using ViewType = typename TestFixture::ViewType;
    const int size = VECTOR_TEST_SIZE;
 
    VectorType u;
    u.setSize( size );
+   ViewType u_view( u );
 
    typename VectorType::HostType expected;
    expected.setSize( size );
@@ -396,25 +446,36 @@ TYPED_TEST( VectorTest, scalarMultiplication )
    u.scalarMultiplication( 2.0 );
    EXPECT_EQ( u, expected );
 
+   setLinearSequence( u );
+   u_view.scalarMultiplication( 2.0 );
+   EXPECT_EQ( u, expected );
+
    setLinearSequence( u );
    u *= 2.0;
    EXPECT_EQ( u, expected );
+
+   setLinearSequence( u );
+   u_view *= 2.0;
+   EXPECT_EQ( u, expected );
 }
 
 TYPED_TEST( VectorTest, scalarProduct )
 {
    using VectorType = typename TestFixture::VectorType;
    using VectorOperations = typename TestFixture::VectorOperations;
+   using ViewType = typename TestFixture::ViewType;
    // this test expects an odd size
    const int size = VECTOR_TEST_SIZE % 2 ? VECTOR_TEST_SIZE : VECTOR_TEST_SIZE - 1;
 
    VectorType u, v;
    u.setSize( size );
    v.setSize( size );
+   ViewType u_view( u ), v_view( v );
    setOscilatingSequence( u, 1.0 );
    setConstantSequence( v, 1 );
 
    EXPECT_EQ( u.scalarProduct( v ), 1.0 );
+   EXPECT_EQ( u_view.scalarProduct( v_view ), 1.0 );
    EXPECT_EQ( VectorOperations::getScalarProduct( u, v ), 1.0 );
 }
 
@@ -422,11 +483,13 @@ TYPED_TEST( VectorTest, addVector )
 {
    using VectorType = typename TestFixture::VectorType;
    using VectorOperations = typename TestFixture::VectorOperations;
+   using ViewType = typename TestFixture::ViewType;
    const int size = VECTOR_TEST_SIZE;
 
    VectorType x, y;
    x.setSize( size );
    y.setSize( size );
+   ViewType x_view( x ), y_view( y );
 
    typename VectorType::HostType expected1, expected2;
    expected1.setSize( size );
@@ -445,18 +508,25 @@ TYPED_TEST( VectorTest, addVector )
    setLinearSequence( y );
    x.addVector( y, 3.0, 1.0 );
    EXPECT_EQ( x, expected2 );
+
+   setConstantSequence( x, 1 );
+   setLinearSequence( y );
+   x_view.addVector( y_view, 3.0, 1.0 );
+   EXPECT_EQ( x, expected2 );
 }
 
 TYPED_TEST( VectorTest, addVectors )
 {
    using VectorType = typename TestFixture::VectorType;
    using VectorOperations = typename TestFixture::VectorOperations;
+   using ViewType = typename TestFixture::ViewType;
    const int size = VECTOR_TEST_SIZE;
 
    VectorType x, y, z;
    x.setSize( size );
    y.setSize( size );
    z.setSize( size );
+   ViewType x_view( x ), y_view( y ), z_view( z );
 
    typename VectorType::HostType expected1, expected2;
    expected1.setSize( size );
@@ -477,17 +547,24 @@ TYPED_TEST( VectorTest, addVectors )
    setConstantSequence( z, 2 );
    x.addVectors( y, 3.0, z, 1.0, 2.0 );
    EXPECT_EQ( x, expected2 );
+
+   setConstantSequence( x, 1 );
+   setLinearSequence( y );
+   setConstantSequence( z, 2 );
+   x_view.addVectors( y_view, 3.0, z_view, 1.0, 2.0 );
+   EXPECT_EQ( x, expected2 );
 }
 
-// TODO: fix the CUDA implementations
 TYPED_TEST( VectorTest, prefixSum )
 {
    using VectorType = typename TestFixture::VectorType;
    using VectorOperations = typename TestFixture::VectorOperations;
+   using ViewType = typename TestFixture::ViewType;
    const int size = VECTOR_TEST_SIZE;
 
    VectorType v;
    v.setSize( size );
+   ViewType v_view( v );
 
    setConstantSequence( v, 1 );
    v.computePrefixSum();
@@ -503,17 +580,33 @@ TYPED_TEST( VectorTest, prefixSum )
    v.computePrefixSum();
    for( int i = 1; i < size; i++ )
       EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i );
+
+   setConstantSequence( v, 1 );
+   v_view.computePrefixSum();
+   for( int i = 0; i < size; i++ )
+      EXPECT_EQ( v.getElement( i ), i + 1 );
+
+   v.setValue( 0 );
+   v_view.computePrefixSum();
+   for( int i = 0; i < size; i++ )
+      EXPECT_EQ( v.getElement( i ), 0 );
+
+   setLinearSequence( v );
+   v_view.computePrefixSum();
+   for( int i = 1; i < size; i++ )
+      EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i );
 }
 
-// TODO: fix the CUDA implementations
 TYPED_TEST( VectorTest, exclusivePrefixSum )
 {
    using VectorType = typename TestFixture::VectorType;
    using VectorOperations = typename TestFixture::VectorOperations;
+   using ViewType = typename TestFixture::ViewType;
    const int size = VECTOR_TEST_SIZE;
 
    VectorType v;
    v.setSize( size );
+   ViewType v_view( v );
 
    setConstantSequence( v, 1 );
    v.computeExclusivePrefixSum();
@@ -529,6 +622,21 @@ TYPED_TEST( VectorTest, exclusivePrefixSum )
    v.computeExclusivePrefixSum();
    for( int i = 1; i < size; i++ )
       EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i - 1 );
+
+   setConstantSequence( v, 1 );
+   v_view.computeExclusivePrefixSum();
+   for( int i = 0; i < size; i++ )
+      EXPECT_EQ( v.getElement( i ), i );
+
+   v.setValue( 0 );
+   v_view.computeExclusivePrefixSum();
+   for( int i = 0; i < size; i++ )
+      EXPECT_EQ( v.getElement( i ), 0 );
+
+   setLinearSequence( v );
+   v_view.computeExclusivePrefixSum();
+   for( int i = 1; i < size; i++ )
+      EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i - 1 );
 }
 
 // TODO: test prefix sum with custom begin and end parameters
@@ -536,9 +644,11 @@ TYPED_TEST( VectorTest, exclusivePrefixSum )
 TEST( VectorSpecialCasesTest, sumOfBoolVector )
 {
    using VectorType = Containers::Vector< bool, Devices::Host >;
+   using ViewType = VectorView< bool, Devices::Host >;
    const float epsilon = 64 * std::numeric_limits< float >::epsilon();
 
    VectorType v( 512 ), w( 512 );
+   ViewType v_view( v ), w_view( w );
    v.setValue( true );
    w.setValue( false );
 
@@ -559,6 +669,90 @@ TEST( VectorSpecialCasesTest, sumOfBoolVector )
    EXPECT_EQ( diff_l1norm, 512 );
    EXPECT_NEAR( diff_l2norm, std::sqrt( 512 ), epsilon );
    EXPECT_NEAR( diff_l3norm, std::cbrt( 512 ), epsilon );
+
+   // test views
+   const int sum_view = v_view.sum< int >();
+   const int l1norm_view = v_view.lpNorm< int >( 1.0 );
+   const float l2norm_view = v_view.lpNorm< float >( 2.0 );
+   const float l3norm_view = v_view.lpNorm< float >( 3.0 );
+   EXPECT_EQ( sum_view, 512 );
+   EXPECT_EQ( l1norm_view, 512 );
+   EXPECT_NEAR( l2norm_view, std::sqrt( 512 ), epsilon );
+   EXPECT_NEAR( l3norm_view, std::cbrt( 512 ), epsilon );
+
+   const int diff_sum_view = v_view.differenceSum< int >( w_view );
+   const int diff_l1norm_view = v_view.differenceLpNorm< int >( w_view, 1.0 );
+   const float diff_l2norm_view = v_view.differenceLpNorm< float >( w_view, 2.0 );
+   const float diff_l3norm_view = v_view.differenceLpNorm< float >( w_view, 3.0 );
+   EXPECT_EQ( diff_sum_view, 512 );
+   EXPECT_EQ( diff_l1norm_view, 512 );
+   EXPECT_NEAR( diff_l2norm_view, std::sqrt( 512 ), epsilon );
+   EXPECT_NEAR( diff_l3norm_view, std::cbrt( 512 ), epsilon );
+}
+
+TEST( VectorSpecialCasesTest, assignmentThroughView )
+{
+   using VectorType = Containers::Vector< int, Devices::Host >;
+   using ViewType = VectorView< int, Devices::Host >;
+
+   VectorType u( 100 ), v( 100 );
+   ViewType u_view( u ), v_view( v );
+
+   u.setValue( 42 );
+   v.setValue( 0 );
+   v_view = u_view;
+   EXPECT_EQ( u_view.getData(), u.getData() );
+   EXPECT_EQ( v_view.getData(), v.getData() );
+   for( int i = 0; i < 100; i++ )
+      EXPECT_EQ( v_view[ i ], 42 );
+
+   u.setValue( 42 );
+   v.setValue( 0 );
+   v_view = u;
+   EXPECT_EQ( u_view.getData(), u.getData() );
+   EXPECT_EQ( v_view.getData(), v.getData() );
+   for( int i = 0; i < 100; i++ )
+      EXPECT_EQ( v_view[ i ], 42 );
+}
+
+TEST( VectorSpecialCasesTest, operationsOnConstView )
+{
+   using VectorType = Containers::Vector< int, Devices::Host >;
+   using ViewType = VectorView< const int, Devices::Host >;
+
+   VectorType u( 100 ), v( 100 );
+   ViewType u_view( u ), v_view( v );
+
+   u.setValue( 1 );
+   v.setValue( 1 );
+
+   EXPECT_EQ( u_view.max(), 1 );
+   EXPECT_EQ( u_view.min(), 1 );
+   EXPECT_EQ( u_view.absMax(), 1 );
+   EXPECT_EQ( u_view.absMin(), 1 );
+   EXPECT_EQ( u_view.lpNorm( 1 ), 100 );
+   EXPECT_EQ( u_view.differenceMax( v_view ), 0 );
+   EXPECT_EQ( u_view.differenceMin( v_view ), 0 );
+   EXPECT_EQ( u_view.differenceAbsMax( v_view ), 0 );
+   EXPECT_EQ( u_view.differenceAbsMin( v_view ), 0 );
+   EXPECT_EQ( u_view.differenceLpNorm( v_view, 1 ), 0 );
+   EXPECT_EQ( u_view.differenceSum( v_view ), 0 );
+   EXPECT_EQ( u_view.scalarProduct( v_view ), 100 );
+}
+
+TEST( VectorSpecialCasesTest, initializationOfVectorViewByArrayView )
+{
+   using ArrayType = Containers::Array< int, Devices::Host >;
+   using VectorViewType = VectorView< const int, Devices::Host >;
+   using ArrayViewType = ArrayView< int, Devices::Host >;
+
+   ArrayType a( 100 );
+   a.setValue( 0 );
+   ArrayViewType a_view( a );
+
+   VectorViewType v_view( a_view );
+   EXPECT_EQ( v_view.getData(), a_view.getData() );
+   EXPECT_EQ( v_view.sum(), 0 );
 }
 
 #endif // HAVE_GTEST
diff --git a/src/UnitTests/Functions/CMakeLists.txt b/src/UnitTests/Functions/CMakeLists.txt
index 9641a5b329ea3069f4edc6e0c0664eedb4aa94e2..cf9466de4525130a3ba2c6c0f9dd3c3bd988af8c 100644
--- a/src/UnitTests/Functions/CMakeLists.txt
+++ b/src/UnitTests/Functions/CMakeLists.txt
@@ -1,13 +1,20 @@
 IF( BUILD_CUDA )
+   CUDA_ADD_EXECUTABLE( MeshFunctionTest MeshFunctionTest.h MeshFunctionTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_COMPILE_OPTIONS( MeshFunctionTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MeshFunctionTest ${GTEST_BOTH_LIBRARIES} tnl )
+
    CUDA_ADD_EXECUTABLE( BoundaryMeshFunctionTest BoundaryMeshFunctionTest.h BoundaryMeshFunctionTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( BoundaryMeshFunctionTest ${GTEST_BOTH_LIBRARIES}
-                                                           tnl )
+   TARGET_COMPILE_OPTIONS( BoundaryMeshFunctionTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( BoundaryMeshFunctionTest ${GTEST_BOTH_LIBRARIES} tnl )
 ELSE(  BUILD_CUDA )
+   ADD_EXECUTABLE( MeshFunctionTest MeshFunctionTest.h MeshFunctionTest.cpp )
+   TARGET_COMPILE_OPTIONS( MeshFunctionTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MeshFunctionTest ${GTEST_BOTH_LIBRARIES} tnl )
+
    ADD_EXECUTABLE( BoundaryMeshFunctionTest BoundaryMeshFunctionTest.h BoundaryMeshFunctionTest.cpp )
    TARGET_COMPILE_OPTIONS( BoundaryMeshFunctionTest PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( BoundaryMeshFunctionTest ${GTEST_BOTH_LIBRARIES}
-                                                           tnl )
+   TARGET_LINK_LIBRARIES( BoundaryMeshFunctionTest ${GTEST_BOTH_LIBRARIES} tnl )
 ENDIF( BUILD_CUDA )
 
-
+ADD_TEST( MeshFunctionTest ${EXECUTABLE_OUTPUT_PATH}/MeshFunctionTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( BoundaryMeshFunctionTest ${EXECUTABLE_OUTPUT_PATH}/BoundaryMeshFunctionTest${CMAKE_EXECUTABLE_SUFFIX} )
\ No newline at end of file
diff --git a/src/UnitTests/Functions/MeshFunctionTest.cpp b/src/UnitTests/Functions/MeshFunctionTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..85943c29fe36235a6a3dd57b9bd1fdbe133e7b69
--- /dev/null
+++ b/src/UnitTests/Functions/MeshFunctionTest.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          MeshFunctionTest.cpp  -  description
+                             -------------------
+    begin                : Sep 11, 2018
+    copyright            : (C) 2018 by oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "MeshFunctionTest.h"
diff --git a/src/UnitTests/Functions/MeshFunctionTest.cu b/src/UnitTests/Functions/MeshFunctionTest.cu
new file mode 100644
index 0000000000000000000000000000000000000000..9f8affcd9ce70c9d94a5cde4a0ef7a008c90e412
--- /dev/null
+++ b/src/UnitTests/Functions/MeshFunctionTest.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          MeshFunctionTest.cu  -  description
+                             -------------------
+    begin                : Sep 11, 2018
+    copyright            : (C) 2018 by oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "MeshFunctionTest.h"
diff --git a/src/UnitTests/Functions/MeshFunctionTest.h b/src/UnitTests/Functions/MeshFunctionTest.h
new file mode 100644
index 0000000000000000000000000000000000000000..b60daf091a291579f3c4e50318805ed2dc699163
--- /dev/null
+++ b/src/UnitTests/Functions/MeshFunctionTest.h
@@ -0,0 +1,57 @@
+/***************************************************************************
+                          MeshFunctionTest.h  -  description
+                             -------------------
+    begin                : Sep 11, 2018
+    copyright            : (C) 2018 by oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include "../GtestMissingError.h"
+
+#ifdef HAVE_GTEST 
+#include <gtest/gtest.h>
+#include <sstream>
+#include <TNL/Functions/MeshFunction.h>
+#include <TNL/Meshes/Grid.h>
+#include <TNL/Pointers/SharedPointer.h>
+
+TEST( MeshFunctionTest, BasicConstructor )
+{
+   using Grid = TNL::Meshes::Grid< 2 >;
+   TNL::Functions::MeshFunction< Grid > meshFunction;
+}
+
+TEST( MeshFunctionTest, OstreamOperatorTest )
+{
+   using GridType = TNL::Meshes::Grid< 2 >;
+   using GridPointer = TNL::Pointers::SharedPointer< GridType >;
+   using CoordinatesType = typename GridType::CoordinatesType;
+   using MeshFunctionType = TNL::Functions::MeshFunction< GridType >;
+   GridPointer grid;
+   grid->setDimensions( CoordinatesType( 3, 3 ) );
+   MeshFunctionType meshFunction( grid );
+   meshFunction.getData().setValue( 1.0 );
+   
+   const char* str = "[ 1, 1, 1, 1, 1, 1, 1, 1, 1 ]";
+   std::stringstream string_stream1, string_stream2( str );
+   string_stream1 << meshFunction;
+   EXPECT_EQ( string_stream1.str(), string_stream2.str() );
+}
+
+
+#endif
+
+
+int main( int argc, char* argv[] )
+{
+#ifdef HAVE_GTEST
+   ::testing::InitGoogleTest( &argc, argv );
+   return RUN_ALL_TESTS();
+#else
+   throw GtestMissingError();
+#endif
+}
diff --git a/src/UnitTests/Meshes/CMakeLists.txt b/src/UnitTests/Meshes/CMakeLists.txt
index 9795b46589562e2e2502ceab1db6293e5f4e5918..e35856258b5f1adab69a84e27fc429ebe1a1c020 100644
--- a/src/UnitTests/Meshes/CMakeLists.txt
+++ b/src/UnitTests/Meshes/CMakeLists.txt
@@ -59,19 +59,22 @@ ADD_TEST( MeshEntityTest ${EXECUTABLE_OUTPUT_PATH}/MeshEntityTest${CMAKE_EXECUTA
 #   SET( VTK_COMMON_LIBRARIES vtkCommonCore ; vtkIOLegacy )
 #endif( VTK_FOUND )
 
-# Mesh cannot be compiled by nvcc < 9 due to bugs in the compiler
-if( ${BUILD_CUDA} AND ( ${CUDA_VERSION_MAJOR} GREATER 9 OR ${CUDA_VERSION_MAJOR} EQUAL 9 ) )
-   CUDA_ADD_EXECUTABLE( MeshReaderTest MeshReaderTest.cu
-                        OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( MeshReaderTest
-                           ${GTEST_BOTH_LIBRARIES}
-                           ${VTK_COMMON_LIBRARIES}
-                           tnl )
-else()
-   ADD_EXECUTABLE( MeshReaderTest MeshReaderTest.cpp )
-   TARGET_COMPILE_OPTIONS( MeshReaderTest PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( MeshReaderTest
-                           ${GTEST_BOTH_LIBRARIES}
-                           ${VTK_COMMON_LIBRARIES}
-                           tnl )
-endif()
+## MeshReaderTest is not a unit test so we disable it, because it takes
+## a long time to compile.
+##
+## Mesh cannot be compiled by nvcc < 9 due to bugs in the compiler
+#if( ${BUILD_CUDA} AND ${CUDA_VERSION_MAJOR} GREATER_EQUAL 9 )
+#   CUDA_ADD_EXECUTABLE( MeshReaderTest MeshReaderTest.cu
+#                        OPTIONS ${CXX_TESTS_FLAGS} )
+#   TARGET_LINK_LIBRARIES( MeshReaderTest
+#                           ${GTEST_BOTH_LIBRARIES}
+#                           ${VTK_COMMON_LIBRARIES}
+#                           tnl )
+#else()
+#   ADD_EXECUTABLE( MeshReaderTest MeshReaderTest.cpp )
+#   TARGET_COMPILE_OPTIONS( MeshReaderTest PRIVATE ${CXX_TESTS_FLAGS} )
+#   TARGET_LINK_LIBRARIES( MeshReaderTest
+#                           ${GTEST_BOTH_LIBRARIES}
+#                           ${VTK_COMMON_LIBRARIES}
+#                           tnl )
+#endif()
diff --git a/src/UnitTests/Meshes/DistributedMeshes/CMakeLists.txt b/src/UnitTests/Meshes/DistributedMeshes/CMakeLists.txt
index 71cca245d18c157a98d2533c42f32b80ff16190a..ad4127dbd724d0fba9cc03bc21e065e3ae91c179 100644
--- a/src/UnitTests/Meshes/DistributedMeshes/CMakeLists.txt
+++ b/src/UnitTests/Meshes/DistributedMeshes/CMakeLists.txt
@@ -20,7 +20,7 @@ ADD_TEST( NAME DirectionsTest COMMAND ${EXECUTABLE_OUTPUT_PATH}/DirectionsTest${
 ADD_TEST( NAME CopyEntitesTest COMMAND ${EXECUTABLE_OUTPUT_PATH}/CopyEntitesTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( NAME CutMeshFunctionTest COMMAND ${EXECUTABLE_OUTPUT_PATH}/CutMeshFunctionTest${CMAKE_EXECUTABLE_SUFFIX} )
 
-if( ${CXX_COMPILER_NAME} STREQUAL "mpic++" )
+if( BUILD_MPI )
 ADD_EXECUTABLE( DistributedGridTest_1D DistributedGridTest_1D.cpp )
    TARGET_COMPILE_OPTIONS( DistributedGridTest_1D PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( DistributedGridTest_1D
diff --git a/src/UnitTests/Meshes/DistributedMeshes/CopyEntitiesTest.cpp b/src/UnitTests/Meshes/DistributedMeshes/CopyEntitiesTest.cpp
index 7c4c944e73326c0cc840fc84f44ba92f43620596..383f8328b3e1b11475cc19a955b180cea646246f 100644
--- a/src/UnitTests/Meshes/DistributedMeshes/CopyEntitiesTest.cpp
+++ b/src/UnitTests/Meshes/DistributedMeshes/CopyEntitiesTest.cpp
@@ -164,7 +164,7 @@ class TestCopyEntities
 
 			PointType origin;
 			PointType proportions;
-			SharedPointer<MeshType> gridptr;
+			Pointers::SharedPointer<MeshType> gridptr;
 
 			origin.setValue(-0.5);
 			proportions.setValue(10);
@@ -182,7 +182,7 @@ class TestCopyEntities
 
 			PointType originOut;
 			PointType proportionsOut;
-			SharedPointer<MeshType> gridOutPtr;
+			Pointers::SharedPointer<MeshType> gridOutPtr;
 
 			originOut.setValue(0.5);	
 			proportionsOut.setValue(8);		
diff --git a/src/UnitTests/Meshes/DistributedMeshes/CutDistributedGridTest.cpp b/src/UnitTests/Meshes/DistributedMeshes/CutDistributedGridTest.cpp
index bf7755e5fc3c6b1f1ced0b96bdbc0cfff7a47165..587ec807ec0ad01515832f777cd444c5c9ac386a 100644
--- a/src/UnitTests/Meshes/DistributedMeshes/CutDistributedGridTest.cpp
+++ b/src/UnitTests/Meshes/DistributedMeshes/CutDistributedGridTest.cpp
@@ -4,6 +4,7 @@
 #ifdef HAVE_MPI  
 
 #include <TNL/Communicators/MpiCommunicator.h>
+#include <TNL/Communicators/ScopedInitializer.h>
 #include <TNL/Meshes/DistributedMeshes/DistributedMesh.h>
 #include <TNL/Meshes/DistributedMeshes/SubdomainOverlapsGetter.h>
 
@@ -404,7 +405,7 @@ TEST(NoMPI, NoTest)
   };
 #endif
 
-#include "../../src/UnitTests/GtestMissingError.h"
+#include "../../GtestMissingError.h"
 int main( int argc, char* argv[] )
 {
 #ifdef HAVE_GTEST
@@ -417,14 +418,9 @@ int main( int argc, char* argv[] )
        delete listeners.Release(listeners.default_result_printer());
        listeners.Append(new MinimalistBufferedPrinter);
 
-       CommunicatorType::Init(argc,argv);
+       Communicators::ScopedInitializer< CommunicatorType > mpi(argc, argv);
     #endif
-       int result= RUN_ALL_TESTS();
-
-    #ifdef HAVE_MPI
-       CommunicatorType::Finalize();
-    #endif
-       return result;
+       return RUN_ALL_TESTS();
 #else
    
    throw GtestMissingError();
diff --git a/src/UnitTests/Meshes/DistributedMeshes/CutDistributedMeshFunctionTest.cpp b/src/UnitTests/Meshes/DistributedMeshes/CutDistributedMeshFunctionTest.cpp
index a0df0810b7ce503fe15eadeb7cf1fcfae789aa37..4907f1d269cfc3d7f5d205405546675f1b8d58fe 100644
--- a/src/UnitTests/Meshes/DistributedMeshes/CutDistributedMeshFunctionTest.cpp
+++ b/src/UnitTests/Meshes/DistributedMeshes/CutDistributedMeshFunctionTest.cpp
@@ -6,6 +6,7 @@
 #include <TNL/Devices/Host.h> 
 #include <TNL/Functions/CutMeshFunction.h>
 #include <TNL/Communicators/MpiCommunicator.h>
+#include <TNL/Communicators/ScopedInitializer.h>
 #include <TNL/Meshes/DistributedMeshes/DistributedGridIO.h>
 #include <TNL/Meshes/DistributedMeshes/SubdomainOverlapsGetter.h>
 
@@ -54,24 +55,24 @@ TEST(CutDistributedMeshFunction, 2D_Data)
    distributedGrid.setOverlaps( lowerOverlap, upperOverlap );
 
 
-   SharedPointer<MeshType> originalGrid;
+   Pointers::SharedPointer<MeshType> originalGrid;
    distributedGrid.setupGrid(*originalGrid);
 
    DofType dof(originalGrid->template getEntitiesCount< Cell >());
    dof.setValue(0); 
 
-   SharedPointer<MeshFunction<MeshType>> meshFunctionptr;
+   Pointers::SharedPointer<MeshFunction<MeshType>> meshFunctionptr;
    meshFunctionptr->bind(originalGrid,dof);
 
    MeshFunctionEvaluator< MeshFunction<MeshType>, LinearFunctionType > linearFunctionEvaluator;
-   SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
+   Pointers::SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
    linearFunctionEvaluator.evaluateAllEntities(meshFunctionptr , linearFunctionPtr);
 
    meshFunctionptr->template synchronize<CommunicatorType>();
  
    //Prepare Mesh Function parts for Cut 
    CutDistributedMeshType cutDistributedGrid;
-   SharedPointer<CutMeshType> cutGrid;
+   Pointers::SharedPointer<CutMeshType> cutGrid;
    cutGrid->setDistMesh(&cutDistributedGrid);
    DofType cutDof(0);
    bool inCut=CutMeshFunction<CommunicatorType, MeshFunction<MeshType>,CutMeshType,DofType>::Cut(
@@ -133,24 +134,24 @@ TEST(CutDistributedMeshFunction, 3D_1_Data)
    SubdomainOverlapsGetter< MeshType, CommunicatorType >::getOverlaps( &distributedGrid, lowerOverlap, upperOverlap, 1 );
    distributedGrid.setOverlaps( lowerOverlap, upperOverlap );
 
-   SharedPointer<MeshType> originalGrid;
+   Pointers::SharedPointer<MeshType> originalGrid;
    distributedGrid.setupGrid(*originalGrid);
 
    DofType dof(originalGrid->template getEntitiesCount< Cell >());
    dof.setValue(0); 
 
-   SharedPointer<MeshFunction<MeshType>> meshFunctionptr;
+   Pointers::SharedPointer<MeshFunction<MeshType>> meshFunctionptr;
    meshFunctionptr->bind(originalGrid,dof);
 
    MeshFunctionEvaluator< MeshFunction<MeshType>, LinearFunctionType > linearFunctionEvaluator;
-   SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
+   Pointers::SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
    linearFunctionEvaluator.evaluateAllEntities(meshFunctionptr , linearFunctionPtr);
 
    meshFunctionptr->template synchronize<CommunicatorType>();
 
    //Prepare Mesh Function parts for Cut 
    CutDistributedMeshType cutDistributedGrid;
-   SharedPointer<CutMeshType> cutGrid;
+   Pointers::SharedPointer<CutMeshType> cutGrid;
    cutGrid->setDistMesh(&cutDistributedGrid);
    DofType cutDof(0);
    bool inCut=CutMeshFunction<CommunicatorType, MeshFunction<MeshType>,CutMeshType,DofType>::Cut(
@@ -212,24 +213,24 @@ TEST(CutDistributedMeshFunction, 3D_2_Data)
    SubdomainOverlapsGetter< MeshType, CommunicatorType >::getOverlaps( &distributedGrid, lowerOverlap, upperOverlap, 1 );
    distributedGrid.setOverlaps( lowerOverlap, upperOverlap );
 
-   SharedPointer<MeshType> originalGrid;
+   Pointers::SharedPointer<MeshType> originalGrid;
    distributedGrid.setupGrid(*originalGrid);
 
    DofType dof(originalGrid->template getEntitiesCount< Cell >());
    dof.setValue(0); 
 
-   SharedPointer<MeshFunction<MeshType>> meshFunctionptr;
+   Pointers::SharedPointer<MeshFunction<MeshType>> meshFunctionptr;
    meshFunctionptr->bind(originalGrid,dof);
 
    MeshFunctionEvaluator< MeshFunction<MeshType>, LinearFunctionType > linearFunctionEvaluator;
-   SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
+   Pointers::SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
    linearFunctionEvaluator.evaluateAllEntities(meshFunctionptr , linearFunctionPtr);
 
    meshFunctionptr->template synchronize<CommunicatorType>();
 
    //Prepare Mesh Function parts for Cut 
    CutDistributedMeshType cutDistributedGrid;
-   SharedPointer<CutMeshType> cutGrid;
+   Pointers::SharedPointer<CutMeshType> cutGrid;
    cutGrid->setDistMesh(&cutDistributedGrid);
    DofType cutDof(0);
    bool inCut=CutMeshFunction<CommunicatorType, MeshFunction<MeshType>,CutMeshType,DofType>::Cut(
@@ -297,22 +298,22 @@ TEST(CutDistributedMeshFunction, 2D_Synchronization)
    SubdomainOverlapsGetter< MeshType, CommunicatorType >::getOverlaps( &distributedGrid, lowerOverlap, upperOverlap, 1 );
    distributedGrid.setOverlaps( lowerOverlap, upperOverlap );
 
-   SharedPointer<MeshType> originalGrid;
+   Pointers::SharedPointer<MeshType> originalGrid;
    distributedGrid.setupGrid(*originalGrid);
 
    DofType dof(originalGrid->template getEntitiesCount< Cell >());
    dof.setValue(0); 
 
-   SharedPointer<MeshFunction<MeshType>> meshFunctionptr;
+   Pointers::SharedPointer<MeshFunction<MeshType>> meshFunctionptr;
    meshFunctionptr->bind(originalGrid,dof);
 
    MeshFunctionEvaluator< MeshFunction<MeshType>, LinearFunctionType > linearFunctionEvaluator;
-   SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
+   Pointers::SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
    linearFunctionEvaluator.evaluateAllEntities(meshFunctionptr , linearFunctionPtr);
 
    //Prepare Mesh Function parts for Cut 
    CutDistributedMeshType cutDistributedGrid;
-   SharedPointer<CutMeshType> cutGrid;
+   Pointers::SharedPointer<CutMeshType> cutGrid;
    cutGrid->setDistMesh(&cutDistributedGrid);
    DofType cutDof(0);
    bool inCut=CutMeshFunction<CommunicatorType, MeshFunction<MeshType>,CutMeshType,DofType>::Cut(
@@ -380,22 +381,22 @@ TEST(CutDistributedMeshFunction, 3D_1_Synchronization)
    SubdomainOverlapsGetter< MeshType, CommunicatorType >::getOverlaps( &distributedGrid, lowerOverlap, upperOverlap, 1 );
    distributedGrid.setOverlaps( lowerOverlap, upperOverlap );
 
-   SharedPointer<MeshType> originalGrid;
+   Pointers::SharedPointer<MeshType> originalGrid;
    distributedGrid.setupGrid(*originalGrid);
 
    DofType dof(originalGrid->template getEntitiesCount< Cell >());
    dof.setValue(0); 
 
-   SharedPointer<MeshFunction<MeshType>> meshFunctionptr;
+   Pointers::SharedPointer<MeshFunction<MeshType>> meshFunctionptr;
    meshFunctionptr->bind(originalGrid,dof);
 
    MeshFunctionEvaluator< MeshFunction<MeshType>, LinearFunctionType > linearFunctionEvaluator;
-   SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
+   Pointers::SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
    linearFunctionEvaluator.evaluateAllEntities(meshFunctionptr , linearFunctionPtr);
 
    //Prepare Mesh Function parts for Cut 
    CutDistributedMeshType cutDistributedGrid;
-   SharedPointer<CutMeshType> cutGrid;
+   Pointers::SharedPointer<CutMeshType> cutGrid;
    cutGrid->setDistMesh(&cutDistributedGrid);
    DofType cutDof(0);
    bool inCut=CutMeshFunction<CommunicatorType, MeshFunction<MeshType>,CutMeshType,DofType>::Cut(
@@ -467,22 +468,22 @@ TEST(CutDistributedMeshFunction, 3D_2_Synchronization)
    SubdomainOverlapsGetter< MeshType, CommunicatorType >::getOverlaps( &distributedGrid, lowerOverlap, upperOverlap, 1 );
    distributedGrid.setOverlaps( lowerOverlap, upperOverlap );
 
-   SharedPointer<MeshType> originalGrid;
+   Pointers::SharedPointer<MeshType> originalGrid;
    distributedGrid.setupGrid(*originalGrid);
 
    DofType dof(originalGrid->template getEntitiesCount< Cell >());
    dof.setValue(0); 
 
-   SharedPointer<MeshFunction<MeshType>> meshFunctionptr;
+   Pointers::SharedPointer<MeshFunction<MeshType>> meshFunctionptr;
    meshFunctionptr->bind(originalGrid,dof);
 
    MeshFunctionEvaluator< MeshFunction<MeshType>, LinearFunctionType > linearFunctionEvaluator;
-   SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
+   Pointers::SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
    linearFunctionEvaluator.evaluateAllEntities(meshFunctionptr , linearFunctionPtr);
 
    //Prepare Mesh Function parts for Cut 
    CutDistributedMeshType cutDistributedGrid;
-   SharedPointer<CutMeshType> cutGrid;
+   Pointers::SharedPointer<CutMeshType> cutGrid;
    cutGrid->setDistMesh(&cutDistributedGrid);
    DofType cutDof(0);
    bool inCut=CutMeshFunction<CommunicatorType, MeshFunction<MeshType>,CutMeshType,DofType>::Cut(
@@ -552,22 +553,22 @@ TEST(CutDistributedMeshFunction, 3D_2_Save)
    SubdomainOverlapsGetter< MeshType, CommunicatorType >::getOverlaps( &distributedGrid, lowerOverlap, upperOverlap, 1 );
    distributedGrid.setOverlaps( lowerOverlap, upperOverlap );
 
-   SharedPointer<MeshType> originalGrid;
+   Pointers::SharedPointer<MeshType> originalGrid;
    distributedGrid.setupGrid(*originalGrid);
 
    DofType dof(originalGrid->template getEntitiesCount< Cell >());
    dof.setValue(0); 
 
-   SharedPointer<MeshFunction<MeshType>> meshFunctionptr;
+   Pointers::SharedPointer<MeshFunction<MeshType>> meshFunctionptr;
    meshFunctionptr->bind(originalGrid,dof);
 
    MeshFunctionEvaluator< MeshFunction<MeshType>, LinearFunctionType > linearFunctionEvaluator;
-   SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
+   Pointers::SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
    linearFunctionEvaluator.evaluateAllEntities(meshFunctionptr , linearFunctionPtr);
 
    //Prepare Mesh Function parts for Cut 
    CutDistributedMeshType cutDistributedGrid;
-   SharedPointer<CutMeshType> cutGrid;
+   Pointers::SharedPointer<CutMeshType> cutGrid;
    cutGrid->setDistMesh(&cutDistributedGrid);
    DofType cutDof(0);
    bool inCut=CutMeshFunction<CommunicatorType, MeshFunction<MeshType>,CutMeshType,DofType>::Cut(
@@ -600,7 +601,7 @@ TEST(CutDistributedMeshFunction, 3D_2_Save)
 
    if(CommunicatorType::GetRank(CommunicatorType::AllGroup)==0)
    {
-       SharedPointer<CutMeshType> globalCutGrid;
+       Pointers::SharedPointer<CutMeshType> globalCutGrid;
        MeshFunction<CutMeshType> loadMeshFunctionptr;
 
        globalCutGrid->setDimensions(typename CutMeshType::CoordinatesType(10));
@@ -684,7 +685,7 @@ TEST(NoMPI, NoTest)
   };
 #endif
 
-#include "../../src/UnitTests/GtestMissingError.h"
+#include "../../GtestMissingError.h"
 int main( int argc, char* argv[] )
 {
 #ifdef HAVE_GTEST
@@ -697,14 +698,9 @@ int main( int argc, char* argv[] )
        delete listeners.Release(listeners.default_result_printer());
        listeners.Append(new MinimalistBufferedPrinter);
 
-       CommunicatorType::Init(argc,argv);
+       Communicators::ScopedInitializer< CommunicatorType > mpi(argc, argv);
     #endif
-       int result= RUN_ALL_TESTS();
-
-    #ifdef HAVE_MPI
-       CommunicatorType::Finalize();
-    #endif
-       return result;
+       return RUN_ALL_TESTS();
 #else
    
    throw GtestMissingError();
diff --git a/src/UnitTests/Meshes/DistributedMeshes/CutMeshFunctionTest.cpp b/src/UnitTests/Meshes/DistributedMeshes/CutMeshFunctionTest.cpp
index ba3822db3351f19cb81b523521eeafb02ff8fa52..ce78b85680c69bba791d205661d918d1498ef3a0 100644
--- a/src/UnitTests/Meshes/DistributedMeshes/CutMeshFunctionTest.cpp
+++ b/src/UnitTests/Meshes/DistributedMeshes/CutMeshFunctionTest.cpp
@@ -30,8 +30,8 @@ TEST(CutMeshFunction, 2D)
   
 
    //Original MeshFunciton --filed with linear function
-   SharedPointer<MeshType> originalGrid;
-   SharedPointer<MeshFunction<MeshType>> meshFunctionptr;
+   Pointers::SharedPointer<MeshType> originalGrid;
+   Pointers::SharedPointer<MeshFunction<MeshType>> meshFunctionptr;
  
    PointType origin;
    origin.setValue(-0.5);
@@ -46,11 +46,11 @@ TEST(CutMeshFunction, 2D)
    meshFunctionptr->bind(originalGrid,dof);
 
    MeshFunctionEvaluator< MeshFunction<MeshType>, LinearFunctionType > linearFunctionEvaluator;
-   SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
+   Pointers::SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
    linearFunctionEvaluator.evaluateAllEntities(meshFunctionptr , linearFunctionPtr);
  
    //Prepare Mesh Function parts for Cut 
-   SharedPointer<CutMeshType> cutGrid;
+   Pointers::SharedPointer<CutMeshType> cutGrid;
    DofType cutDof(0);
    bool inCut=CutMeshFunction<NoDistrCommunicator,MeshFunction<MeshType>,CutMeshType,DofType>::Cut(
             *meshFunctionptr,*cutGrid, cutDof, 
@@ -93,8 +93,8 @@ TEST(CutMeshFunction, 3D_1)
   
 
    //Original MeshFunciton --filed with linear function
-   SharedPointer<MeshType> originalGrid;
-   SharedPointer<MeshFunction<MeshType>> meshFunctionptr;
+   Pointers::SharedPointer<MeshType> originalGrid;
+   Pointers::SharedPointer<MeshFunction<MeshType>> meshFunctionptr;
  
    PointType origin;
    origin.setValue(-0.5);
@@ -109,11 +109,11 @@ TEST(CutMeshFunction, 3D_1)
    meshFunctionptr->bind(originalGrid,dof);
 
    MeshFunctionEvaluator< MeshFunction<MeshType>, LinearFunctionType > linearFunctionEvaluator;
-   SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
+   Pointers::SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
    linearFunctionEvaluator.evaluateAllEntities(meshFunctionptr , linearFunctionPtr);
  
    //Prepare Mesh Function parts for Cut 
-   SharedPointer<CutMeshType> cutGrid;
+   Pointers::SharedPointer<CutMeshType> cutGrid;
    DofType cutDof(0);
    bool inCut=CutMeshFunction<NoDistrCommunicator,MeshFunction<MeshType>,CutMeshType,DofType>::Cut(
             *meshFunctionptr,*cutGrid, cutDof, 
@@ -156,8 +156,8 @@ TEST(CutMeshFunction, 3D_2)
   
 
    //Original MeshFunciton --filed with linear function
-   SharedPointer<MeshType> originalGrid;
-   SharedPointer<MeshFunction<MeshType>> meshFunctionptr;
+   Pointers::SharedPointer<MeshType> originalGrid;
+   Pointers::SharedPointer<MeshFunction<MeshType>> meshFunctionptr;
  
    PointType origin;
    origin.setValue(-0.5);
@@ -172,11 +172,11 @@ TEST(CutMeshFunction, 3D_2)
    meshFunctionptr->bind(originalGrid,dof);
 
    MeshFunctionEvaluator< MeshFunction<MeshType>, LinearFunctionType > linearFunctionEvaluator;
-   SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
+   Pointers::SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
    linearFunctionEvaluator.evaluateAllEntities(meshFunctionptr , linearFunctionPtr);
  
    //Prepare Mesh Function parts for Cut 
-   SharedPointer<CutMeshType> cutGrid;
+   Pointers::SharedPointer<CutMeshType> cutGrid;
    DofType cutDof(0);
    bool inCut=CutMeshFunction<NoDistrCommunicator, MeshFunction<MeshType>,CutMeshType,DofType>::Cut(
             *meshFunctionptr,*cutGrid, cutDof, 
@@ -213,15 +213,13 @@ TEST(CutMeshFunction, 3D_2)
 
 #endif
 
-#include "../../src/UnitTests/GtestMissingError.h"
+#include "../../GtestMissingError.h"
 int main( int argc, char* argv[] )
 {
 #ifdef HAVE_GTEST
    ::testing::InitGoogleTest( &argc, argv );
-       int result= RUN_ALL_TESTS();
-       return result;
+   return RUN_ALL_TESTS();
 #else
-   
    throw GtestMissingError();
 #endif
 }
diff --git a/src/UnitTests/Meshes/DistributedMeshes/DirectionsTest.cpp b/src/UnitTests/Meshes/DistributedMeshes/DirectionsTest.cpp
index 52ed831d0baced3dbac35f36de9ac2719dd8363f..e8625bc3d6ab57d3ae01b23d99e68dc34792d7a2 100644
--- a/src/UnitTests/Meshes/DistributedMeshes/DirectionsTest.cpp
+++ b/src/UnitTests/Meshes/DistributedMeshes/DirectionsTest.cpp
@@ -121,7 +121,7 @@ TEST(XYZ, 3D )
 
 #endif
 
-#include "../../src/UnitTests/GtestMissingError.h"
+#include "../../GtestMissingError.h"
 int main( int argc, char* argv[] )
 {
 #ifdef HAVE_GTEST
diff --git a/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIOTest.h b/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIOTest.h
index 193ed0b23aa049b6d6e5d6fd78be5730a9f8759e..9ac299621b15b85ff2bd31126af19c647a9158e2 100644
--- a/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIOTest.h
+++ b/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIOTest.h
@@ -206,7 +206,7 @@ class TestDistributedGridIO
 
     static void TestSave()
     {
-        SharedPointer< LinearFunctionType, Device > linearFunctionPtr;
+        Pointers::SharedPointer< LinearFunctionType, Device > linearFunctionPtr;
         MeshFunctionEvaluator< MeshFunctionType, LinearFunctionType > linearFunctionEvaluator;    
         
         ParameterProvider<dim,Device> parameters;
@@ -234,8 +234,8 @@ class TestDistributedGridIO
 
         //std::cout << distributedGrid.printProcessDistr() <<std::endl;
 
-        SharedPointer<MeshType> gridptr;
-        SharedPointer<MeshFunctionType> meshFunctionptr;
+        Pointers::SharedPointer<MeshType> gridptr;
+        Pointers::SharedPointer<MeshFunctionType> meshFunctionptr;
         distributedGrid.setupGrid(*gridptr);
        
         DofType dof(gridptr->template getEntitiesCount< Cell >());
@@ -252,23 +252,23 @@ class TestDistributedGridIO
         PointType localOrigin=parameters.getOrigin(CommunicatorType::GetRank(CommunicatorType::AllGroup));        
         PointType localProportions=parameters.getProportions(CommunicatorType::GetRank(CommunicatorType::AllGroup));;
             
-        SharedPointer<MeshType>  localGridptr;
+        Pointers::SharedPointer<MeshType>  localGridptr;
         localGridptr->setDimensions(localProportions);
         localGridptr->setDomain(localOrigin,localProportions);
 
         DofType localDof(localGridptr->template getEntitiesCount< Cell >());
 
-        SharedPointer<MeshFunctionType> localMeshFunctionptr;
+        Pointers::SharedPointer<MeshFunctionType> localMeshFunctionptr;
         localMeshFunctionptr->bind(localGridptr,localDof);
         linearFunctionEvaluator.evaluateAllEntities(localMeshFunctionptr , linearFunctionPtr);
 
         //load other meshfunction on same localgrid from created file
-        SharedPointer<MeshType>  loadGridptr;
+        Pointers::SharedPointer<MeshType>  loadGridptr;
         loadGridptr->setDimensions(localProportions);
         loadGridptr->setDomain(localOrigin,localProportions);
 
         DofType loadDof(localGridptr->template getEntitiesCount< Cell >());
-        SharedPointer<MeshFunctionType> loadMeshFunctionptr;
+        Pointers::SharedPointer<MeshFunctionType> loadMeshFunctionptr;
         loadMeshFunctionptr->bind(loadGridptr,loadDof);
 
         loadDof.setValue(-1);
@@ -286,7 +286,7 @@ class TestDistributedGridIO
     
     static void TestLoad()
     {
-        SharedPointer< LinearFunctionType, Device > linearFunctionPtr;
+        Pointers::SharedPointer< LinearFunctionType, Device > linearFunctionPtr;
         MeshFunctionEvaluator< MeshFunctionType, LinearFunctionType > linearFunctionEvaluator;    
         
         ParameterProvider<dim,Device> parameters;
@@ -315,13 +315,13 @@ class TestDistributedGridIO
         PointType localOrigin=parameters.getOrigin(CommunicatorType::GetRank(CommunicatorType::AllGroup));        
         PointType localProportions=parameters.getProportions(CommunicatorType::GetRank(CommunicatorType::AllGroup));;
             
-        SharedPointer<MeshType> localGridptr;
+        Pointers::SharedPointer<MeshType> localGridptr;
         localGridptr->setDimensions(localProportions);
         localGridptr->setDomain(localOrigin,localProportions);
 
         DofType localDof(localGridptr->template getEntitiesCount< Cell >());
 
-        SharedPointer<MeshFunctionType> localMeshFunctionptr;
+        Pointers::SharedPointer<MeshFunctionType> localMeshFunctionptr;
         localMeshFunctionptr->bind(localGridptr,localDof);
         linearFunctionEvaluator.evaluateAllEntities(localMeshFunctionptr , linearFunctionPtr);
 
@@ -335,8 +335,8 @@ class TestDistributedGridIO
 
 
         //Crete "distributedgrid driven" grid filed by load
-        SharedPointer<MeshType> loadGridptr;
-        SharedPointer<MeshFunctionType> loadMeshFunctionptr;
+        Pointers::SharedPointer<MeshType> loadGridptr;
+        Pointers::SharedPointer<MeshFunctionType> loadMeshFunctionptr;
         distributedGrid.setupGrid(*loadGridptr);
         
         DofType loadDof(loadGridptr->template getEntitiesCount< Cell >());
@@ -349,8 +349,8 @@ class TestDistributedGridIO
 
 
         //Crete "distributedgrid driven" grid filed by evaluated linear function
-        SharedPointer<MeshType> gridptr;
-        SharedPointer<MeshFunctionType> meshFunctionptr;
+        Pointers::SharedPointer<MeshType> gridptr;
+        Pointers::SharedPointer<MeshFunctionType> meshFunctionptr;
         distributedGrid.setupGrid(*gridptr);
         
         DofType dof(gridptr->template getEntitiesCount< Cell >());
diff --git a/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIOTestBase.h b/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIOTestBase.h
index d7774436c5da27645b62fd5f81f5954a154f9c28..537bcd9239ddde38785b4c462d4bf9d71c0537c7 100644
--- a/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIOTestBase.h
+++ b/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIOTestBase.h
@@ -12,6 +12,7 @@
 #ifdef HAVE_MPI
 
 #include "DistributedGridIOTest.h"
+#include <TNL/Communicators/ScopedInitializer.h>
 
 TEST( DistributedGridIO, Save_1D )
 {
@@ -134,16 +135,11 @@ int main( int argc, char* argv[] )
        delete listeners.Release(listeners.default_result_printer());
        listeners.Append(new MinimalistBufferedPrinter);
 
-       CommunicatorType::Init(argc,argv );
+       Communicators::ScopedInitializer< CommunicatorType > mpi(argc, argv);
        CommunicatorType::setRedirection( false );
        CommunicatorType::setupRedirection();
     #endif
-       int result= RUN_ALL_TESTS();
-
-    #ifdef HAVE_MPI
-       CommunicatorType::Finalize();
-    #endif
-       return result;
+       return RUN_ALL_TESTS();
 #else
    
    throw GtestMissingError();
diff --git a/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIO_MPIIOTest.h b/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIO_MPIIOTest.h
index de4e4d48f4b5f9ac88e738ef78496519197b381d..a68fe628e69bcf434dabe68953180dfa6a4a6b9c 100644
--- a/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIO_MPIIOTest.h
+++ b/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIO_MPIIOTest.h
@@ -48,7 +48,7 @@ class TestDistributedGridMPIIO{
 
     static void TestSave()
     {
-        SharedPointer< LinearFunctionType, Device > linearFunctionPtr;
+        Pointers::SharedPointer< LinearFunctionType, Device > linearFunctionPtr;
         MeshFunctionEvaluator< MeshFunctionType, LinearFunctionType > linearFunctionEvaluator;    
         
         //save distributed meshfunction into file
@@ -58,7 +58,7 @@ class TestDistributedGridMPIIO{
         PointType globalProportions;
         globalProportions.setValue(50);
 
-        SharedPointer<MeshType> globalGrid;
+        Pointers::SharedPointer<MeshType> globalGrid;
         globalGrid->setDimensions(globalProportions);
         globalGrid->setDomain(globalOrigin,globalProportions);
         
@@ -70,8 +70,8 @@ class TestDistributedGridMPIIO{
 
         ///std::cout << distributedGrid.printProcessDistr() <<std::endl;
 
-        SharedPointer<MeshType> gridptr;
-        SharedPointer<MeshFunctionType> meshFunctionptr;
+        Pointers::SharedPointer<MeshType> gridptr;
+        Pointers::SharedPointer<MeshFunctionType> meshFunctionptr;
         distributedGrid.setupGrid(*gridptr);
        
         DofType dof(gridptr->template getEntitiesCount< Cell >());
@@ -88,13 +88,13 @@ class TestDistributedGridMPIIO{
        {
             DofType globalEvaluatedDof(globalGrid->template getEntitiesCount< Cell >());
 
-            SharedPointer<MeshFunctionType> globalEvaluatedMeshFunctionptr;
+            Pointers::SharedPointer<MeshFunctionType> globalEvaluatedMeshFunctionptr;
             globalEvaluatedMeshFunctionptr->bind(globalGrid,globalEvaluatedDof);
             linearFunctionEvaluator.evaluateAllEntities(globalEvaluatedMeshFunctionptr , linearFunctionPtr);
 
 
             DofType loadDof(globalGrid->template getEntitiesCount< Cell >());
-            SharedPointer<MeshFunctionType> loadMeshFunctionptr;
+            Pointers::SharedPointer<MeshFunctionType> loadMeshFunctionptr;
             loadMeshFunctionptr->bind(globalGrid,loadDof);
 
             loadDof.setValue(-1);
@@ -113,7 +113,7 @@ class TestDistributedGridMPIIO{
     
     static void TestLoad()
     {
-        SharedPointer< LinearFunctionType, Device > linearFunctionPtr;
+        Pointers::SharedPointer< LinearFunctionType, Device > linearFunctionPtr;
         MeshFunctionEvaluator< MeshFunctionType, LinearFunctionType > linearFunctionEvaluator;    
 
         //Crete distributed grid            
@@ -123,7 +123,7 @@ class TestDistributedGridMPIIO{
         PointType globalProportions;
         globalProportions.setValue(50);
 
-        SharedPointer<MeshType> globalGrid;
+        Pointers::SharedPointer<MeshType> globalGrid;
         globalGrid->setDimensions(globalProportions);
         globalGrid->setDomain(globalOrigin,globalProportions);
 
@@ -142,7 +142,7 @@ class TestDistributedGridMPIIO{
         {   
             DofType saveDof(globalGrid->template getEntitiesCount< Cell >());
 
-            SharedPointer<MeshFunctionType> saveMeshFunctionptr;
+            Pointers::SharedPointer<MeshFunctionType> saveMeshFunctionptr;
             saveMeshFunctionptr->bind(globalGrid,saveDof);
             linearFunctionEvaluator.evaluateAllEntities(saveMeshFunctionptr , linearFunctionPtr);
       
@@ -152,8 +152,8 @@ class TestDistributedGridMPIIO{
             file.close();
         }
 
-        SharedPointer<MeshType> loadGridptr;
-        SharedPointer<MeshFunctionType> loadMeshFunctionptr;
+        Pointers::SharedPointer<MeshType> loadGridptr;
+        Pointers::SharedPointer<MeshFunctionType> loadMeshFunctionptr;
         distributedGrid.setupGrid(*loadGridptr);
         
         DofType loadDof(loadGridptr->template getEntitiesCount< Cell >());
@@ -163,8 +163,8 @@ class TestDistributedGridMPIIO{
         DistributedGridIO<MeshFunctionType,MpiIO> ::load(FileName, *loadMeshFunctionptr );
         loadMeshFunctionptr->template synchronize<CommunicatorType>(); //need synchronization for overlaps to be filled corectly in loadDof
 
-        SharedPointer<MeshType> evalGridPtr;
-        SharedPointer<MeshFunctionType> evalMeshFunctionptr;
+        Pointers::SharedPointer<MeshType> evalGridPtr;
+        Pointers::SharedPointer<MeshFunctionType> evalMeshFunctionptr;
         distributedGrid.setupGrid(*evalGridPtr);
         
         DofType evalDof(evalGridPtr->template getEntitiesCount< Cell >());
diff --git a/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIO_MPIIOTestBase.h b/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIO_MPIIOTestBase.h
index aaf5073ec86aa4961d6a40b7528c7b4ac0b73772..4e3603a7a40c36cc04d247f52148f029868dcbdf 100644
--- a/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIO_MPIIOTestBase.h
+++ b/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIO_MPIIOTestBase.h
@@ -10,6 +10,7 @@
 #ifdef HAVE_MPI
 
 #include "DistributedGridIO_MPIIOTest.h"
+#include <TNL/Communicators/ScopedInitializer.h>
 
 TEST( DistributedGridMPIIO, Save_1D )
 {
@@ -131,16 +132,11 @@ int main( int argc, char* argv[] )
        delete listeners.Release(listeners.default_result_printer());
        listeners.Append(new MinimalistBufferedPrinter);
 
-       CommunicatorType::Init(argc,argv );
+       Communicators::ScopedInitializer< CommunicatorType > mpi(argc, argv);
        CommunicatorType::setRedirection( false );
        CommunicatorType::setupRedirection();
     #endif
-       int result= RUN_ALL_TESTS();
-
-    #ifdef HAVE_MPI
-       CommunicatorType::Finalize();
-    #endif
-       return result;
+       return RUN_ALL_TESTS();
 #else
    
    throw GtestMissingError();
diff --git a/src/UnitTests/Meshes/DistributedMeshes/DistributedGridTest_1D.cpp b/src/UnitTests/Meshes/DistributedMeshes/DistributedGridTest_1D.cpp
index 3b72394fc8c645cc2dc5368e71ad08df983ef74a..251b9f553a4fc82b7d1bb5ef768991a519aec047 100644
--- a/src/UnitTests/Meshes/DistributedMeshes/DistributedGridTest_1D.cpp
+++ b/src/UnitTests/Meshes/DistributedMeshes/DistributedGridTest_1D.cpp
@@ -13,6 +13,7 @@
 #ifdef HAVE_MPI    
 
 #include <TNL/Communicators/MpiCommunicator.h>
+#include <TNL/Communicators/ScopedInitializer.h>
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Meshes/DistributedMeshes/DistributedMesh.h>
 #include <TNL/Meshes/DistributedMeshes/SubdomainOverlapsGetter.h>
@@ -79,17 +80,19 @@ void check_Inner_1D(int rank, int nproc, const DofType& dof, typename DofType::R
 };
 
 /*
- * Light check of 1D distriover grid and its synchronization. 
- * Number of process is not limitated.
- * Overlap is limitated to 1
+ * Light check of 1D distributed grid and its synchronization. 
+ * Number of process is not limited.
+ * Overlap is limited to 1
  * Only double is tested as dof Real type -- it may be changed, extend test
  * Global size is hardcoded as 10 -- it can be changed, extend test
  */
 
 typedef MpiCommunicator CommunicatorType;
 typedef Grid<1,double,Host,int> GridType;
-typedef MeshFunction<GridType> MeshFunctionType;
-typedef Vector<double,Host,int> DofType;
+typedef MeshFunction< GridType > MeshFunctionType;
+typedef MeshFunction< GridType, GridType::getMeshDimension(), bool > MaskType;
+typedef Vector< double,Host,int> DofType;
+typedef Vector< bool, Host, int > MaskDofType;
 typedef typename GridType::Cell Cell;
 typedef typename GridType::IndexType IndexType; 
 typedef typename GridType::PointType PointType; 
@@ -101,15 +104,17 @@ class DistributedGridTest_1D : public ::testing::Test
 
       DistributedMesh< GridType > *distributedGrid;
       DofType dof;
+      MaskDofType maskDofs;
 
-      SharedPointer< GridType > gridptr;
-      SharedPointer< MeshFunctionType > meshFunctionPtr;
+      Pointers::SharedPointer< GridType > gridptr;
+      Pointers::SharedPointer< MeshFunctionType > meshFunctionPtr;
+      Pointers::SharedPointer< MaskType > maskPointer;
 
       MeshFunctionEvaluator< MeshFunctionType, ConstFunction< double, 1 > > constFunctionEvaluator;
-      SharedPointer< ConstFunction< double, 1 >, Host > constFunctionPtr;
+      Pointers::SharedPointer< ConstFunction< double, 1 >, Host > constFunctionPtr;
 
       MeshFunctionEvaluator< MeshFunctionType, LinearFunction< double, 1 > > linearFunctionEvaluator;
-      SharedPointer< LinearFunction< double, 1 >, Host > linearFunctionPtr;
+      Pointers::SharedPointer< LinearFunction< double, 1 >, Host > linearFunctionPtr;
 
       int rank;
       int nproc;
@@ -218,9 +223,9 @@ TEST_F(DistributedGridTest_1D, SynchronizerNeighborsTest )
 }
 
 
-TEST_F(DistributedGridTest_1D, LinearFunctionTest )
+TEST_F(DistributedGridTest_1D, EvaluateLinearFunction )
 {
-   //fill meshfunction with linear function (physical center of cell corresponds with its coordinates in grid) 
+   //fill mesh function with linear function (physical center of cell corresponds with its coordinates in grid) 
    setDof_1D(dof,-1);
    linearFunctionEvaluator.evaluateAllEntities(meshFunctionPtr, linearFunctionPtr);
    meshFunctionPtr->template synchronize<CommunicatorType>();
@@ -235,7 +240,7 @@ TEST_F(DistributedGridTest_1D, LinearFunctionTest )
 }
 
 
-TEST_F(DistributedGridTest_1D, SynchronizePeriodicNeighborsTest )
+TEST_F(DistributedGridTest_1D, SynchronizePeriodicNeighborsWithoutMask )
 {
    // Setup periodic boundaries
    // TODO: I do not know how to do it better with GTEST
@@ -245,18 +250,98 @@ TEST_F(DistributedGridTest_1D, SynchronizePeriodicNeighborsTest )
    distributedGrid->setOverlaps( lowerOverlap, upperOverlap );
    distributedGrid->setupGrid(*gridptr);
    dof.setSize( gridptr->template getEntitiesCount< Cell >() );
+   maskDofs.setSize( gridptr->template getEntitiesCount< Cell >() );
    meshFunctionPtr->bind( gridptr, dof );
-
+   maskPointer->bind( gridptr, maskDofs );
+   
    setDof_1D( dof, -rank-1 );
-   constFunctionEvaluator.evaluateAllEntities( meshFunctionPtr , constFunctionPtr );
+   maskDofs.setValue( true );
+   constFunctionEvaluator.evaluateAllEntities( meshFunctionPtr, constFunctionPtr );
    meshFunctionPtr->template synchronize<CommunicatorType>( true );
+   if( rank == 0 )
+      EXPECT_EQ( dof[ 1 ], -nproc ) << "Left Overlap was filled by wrong process.";
+   if( rank == nproc-1 )
+      EXPECT_EQ( dof[ dof.getSize() - 2 ], -1 )<< "Right Overlap was filled by wrong process.";
+}
 
+TEST_F(DistributedGridTest_1D, SynchronizePeriodicNeighborsWithActiveMask )
+{
+   // Setup periodic boundaries
+   // TODO: I do not know how to do it better with GTEST
+   typename DistributedGridType::SubdomainOverlapsType lowerOverlap, upperOverlap;
+   SubdomainOverlapsGetter< GridType, CommunicatorType >::
+      getOverlaps( distributedGrid, lowerOverlap, upperOverlap, 1, 1 );
+   distributedGrid->setOverlaps( lowerOverlap, upperOverlap );
+   distributedGrid->setupGrid(*gridptr);
+   dof.setSize( gridptr->template getEntitiesCount< Cell >() );
+   maskDofs.setSize( gridptr->template getEntitiesCount< Cell >() );
+   meshFunctionPtr->bind( gridptr, dof );
+   maskPointer->bind( gridptr, maskDofs );
+   
+   setDof_1D( dof, -rank-1 );
+   maskDofs.setValue( true );
+   constFunctionEvaluator.evaluateAllEntities( meshFunctionPtr, constFunctionPtr );
+   meshFunctionPtr->template synchronize<CommunicatorType>( true, maskPointer );
    if( rank == 0 )
       EXPECT_EQ( dof[ 1 ], -nproc ) << "Left Overlap was filled by wrong process.";
    if( rank == nproc-1 )
       EXPECT_EQ( dof[ dof.getSize() - 2 ], -1 )<< "Right Overlap was filled by wrong process.";
 }
 
+TEST_F(DistributedGridTest_1D, SynchronizePeriodicNeighborsWithInactiveMaskOnLeft )
+{
+   // Setup periodic boundaries
+   // TODO: I do not know how to do it better with GTEST
+   typename DistributedGridType::SubdomainOverlapsType lowerOverlap, upperOverlap;
+   SubdomainOverlapsGetter< GridType, CommunicatorType >::
+      getOverlaps( distributedGrid, lowerOverlap, upperOverlap, 1, 1 );
+   distributedGrid->setOverlaps( lowerOverlap, upperOverlap );
+   distributedGrid->setupGrid(*gridptr);
+   dof.setSize( gridptr->template getEntitiesCount< Cell >() );
+   maskDofs.setSize( gridptr->template getEntitiesCount< Cell >() );
+   meshFunctionPtr->bind( gridptr, dof );
+   maskPointer->bind( gridptr, maskDofs );
+
+   setDof_1D( dof, -rank-1 );
+   maskDofs.setValue( true );
+   maskDofs.setElement( 1, false );
+   constFunctionEvaluator.evaluateAllEntities( meshFunctionPtr , constFunctionPtr );
+   meshFunctionPtr->template synchronize<CommunicatorType>( true, maskPointer );
+   
+   if( rank == 0 )
+      EXPECT_EQ( dof[ 1 ], 0 ) << "Left Overlap was filled by wrong process.";
+   if( rank == nproc-1 )
+      EXPECT_EQ( dof[ dof.getSize() - 2 ], -1 )<< "Right Overlap was filled by wrong process.";
+}
+
+TEST_F(DistributedGridTest_1D, SynchronizePeriodicNeighborsWithInactiveMask )
+{
+   // Setup periodic boundaries
+   // TODO: I do not know how to do it better with GTEST
+   typename DistributedGridType::SubdomainOverlapsType lowerOverlap, upperOverlap;
+   SubdomainOverlapsGetter< GridType, CommunicatorType >::
+      getOverlaps( distributedGrid, lowerOverlap, upperOverlap, 1, 1 );
+   distributedGrid->setOverlaps( lowerOverlap, upperOverlap );
+   distributedGrid->setupGrid(*gridptr);
+   dof.setSize( gridptr->template getEntitiesCount< Cell >() );
+   maskDofs.setSize( gridptr->template getEntitiesCount< Cell >() );
+   meshFunctionPtr->bind( gridptr, dof );
+   maskPointer->bind( gridptr, maskDofs );
+
+   setDof_1D( dof, -rank-1 );
+   maskDofs.setValue( true );
+   maskDofs.setElement( 1, false );   
+   maskDofs.setElement( dof.getSize() - 2, false );
+   constFunctionEvaluator.evaluateAllEntities( meshFunctionPtr , constFunctionPtr );
+   meshFunctionPtr->template synchronize<CommunicatorType>( true, maskPointer );
+   
+   if( rank == 0 )
+      EXPECT_EQ( dof[ 1 ], 0 ) << "Left Overlap was filled by wrong process.";
+   if( rank == nproc-1 )
+      EXPECT_EQ( dof[ dof.getSize() - 2 ], nproc - 1 )<< "Right Overlap was filled by wrong process.";   
+   
+}
+
 TEST_F(DistributedGridTest_1D, SynchronizePeriodicBoundariesLinearTest )
 {
    // Setup periodic boundaries
@@ -338,7 +423,7 @@ TEST(NoMPI, NoTest)
   };
 #endif
 
-#include "../../src/UnitTests/GtestMissingError.h"
+#include "../../GtestMissingError.h"
 int main( int argc, char* argv[] )
 {
 #ifdef HAVE_GTEST
@@ -351,14 +436,9 @@ int main( int argc, char* argv[] )
        delete listeners.Release(listeners.default_result_printer());
        listeners.Append(new MinimalistBufferedPrinter);
 
-       CommunicatorType::Init(argc,argv);
-    #endif
-       int result= RUN_ALL_TESTS();
-
-    #ifdef HAVE_MPI
-       CommunicatorType::Finalize();
+       Communicators::ScopedInitializer< CommunicatorType > mpi(argc, argv);
     #endif
-       return result;
+       return RUN_ALL_TESTS();
 #else
    
    throw GtestMissingError();
diff --git a/src/UnitTests/Meshes/DistributedMeshes/DistributedGridTest_2D.cpp b/src/UnitTests/Meshes/DistributedMeshes/DistributedGridTest_2D.cpp
index 5ebcd46c69d1a1aa422fb8c5ffb6e4bc35266c58..6075f721ad2ca3311b45757362e81db8eeda32be 100644
--- a/src/UnitTests/Meshes/DistributedMeshes/DistributedGridTest_2D.cpp
+++ b/src/UnitTests/Meshes/DistributedMeshes/DistributedGridTest_2D.cpp
@@ -15,6 +15,7 @@
 #include <TNL/Meshes/DistributedMeshes/DistributedMesh.h>
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Communicators/MpiCommunicator.h>
+#include <TNL/Communicators/ScopedInitializer.h>
 #include <TNL/Meshes/DistributedMeshes/SubdomainOverlapsGetter.h>
 
 #include "../../Functions/Functions.h"
@@ -397,28 +398,34 @@ void check_Inner_2D(int rank, GridType grid, DofType dof, typename DofType::Real
 typedef MpiCommunicator CommunicatorType;
 typedef Grid<2,double,Host,int> GridType;
 typedef MeshFunction<GridType> MeshFunctionType;
+typedef MeshFunction< GridType, GridType::getMeshDimension(), bool > MaskType;
 typedef Vector<double,Host,int> DofType;
+typedef Vector< bool, Host, int > MaskDofType;
 typedef typename GridType::Cell Cell;
 typedef typename GridType::IndexType IndexType; 
 typedef typename GridType::PointType PointType; 
 typedef DistributedMesh<GridType> DistributedGridType;
 
-class DistributedGirdTest_2D : public ::testing::Test
+class DistributedGridTest_2D : public ::testing::Test
 {
     
    public:
+      
+      using CoordinatesType = typename GridType::CoordinatesType;
 
       DistributedGridType *distributedGrid;
       DofType *dof;
+      MaskDofType maskDofs;
 
-      SharedPointer<GridType> gridPtr;
-      SharedPointer<MeshFunctionType> meshFunctionPtr;
+      Pointers::SharedPointer<GridType> gridPtr;
+      Pointers::SharedPointer<MeshFunctionType> meshFunctionPtr;
+      Pointers::SharedPointer< MaskType > maskPointer;
 
       MeshFunctionEvaluator< MeshFunctionType, ConstFunction<double,2> > constFunctionEvaluator;
-      SharedPointer< ConstFunction<double,2>, Host > constFunctionPtr;
+      Pointers::SharedPointer< ConstFunction<double,2>, Host > constFunctionPtr;
 
       MeshFunctionEvaluator< MeshFunctionType, LinearFunction<double,2> > linearFunctionEvaluator;
-      SharedPointer< LinearFunction<double,2>, Host > linearFunctionPtr;
+      Pointers::SharedPointer< LinearFunction<double,2>, Host > linearFunctionPtr;
 
       int rank;
       int nproc;    
@@ -463,7 +470,7 @@ class DistributedGirdTest_2D : public ::testing::Test
       }
 };
 
-TEST_F(DistributedGirdTest_2D, evaluateAllEntities)
+TEST_F(DistributedGridTest_2D, evaluateAllEntities)
 {
    //Check Traversars
    //All entities, without overlap
@@ -475,7 +482,7 @@ TEST_F(DistributedGirdTest_2D, evaluateAllEntities)
    check_Inner_2D(rank, *gridPtr, *dof, rank);
 }
 
-TEST_F(DistributedGirdTest_2D, evaluateBoundaryEntities)
+TEST_F(DistributedGridTest_2D, evaluateBoundaryEntities)
 {
     //Boundary entities, without overlap
     setDof_2D(*dof,-1);
@@ -486,7 +493,7 @@ TEST_F(DistributedGirdTest_2D, evaluateBoundaryEntities)
     check_Inner_2D(rank, *gridPtr, *dof, -1);
 }
 
-TEST_F(DistributedGirdTest_2D, evaluateInteriorEntities)
+TEST_F(DistributedGridTest_2D, evaluateInteriorEntities)
 {
     //Inner entities, without overlap
     setDof_2D(*dof,-1);
@@ -496,7 +503,7 @@ TEST_F(DistributedGirdTest_2D, evaluateInteriorEntities)
     check_Inner_2D(rank, *gridPtr, *dof, rank);
 }    
 
-TEST_F(DistributedGirdTest_2D, LinearFunctionTest)
+TEST_F(DistributedGridTest_2D, LinearFunctionTest)
 {
     //fill meshfunction with linear function (physical center of cell corresponds with its coordinates in grid) 
     setDof_2D(*dof,-1);
@@ -512,7 +519,7 @@ TEST_F(DistributedGirdTest_2D, LinearFunctionTest)
     }
 }
 
-TEST_F(DistributedGirdTest_2D, SynchronizerNeighborTest )
+TEST_F(DistributedGridTest_2D, SynchronizerNeighborTest )
 {
    //Expect 9 processes
    setDof_2D(*dof,-1);
@@ -598,7 +605,7 @@ TEST_F(DistributedGirdTest_2D, SynchronizerNeighborTest )
     }   
 }
 
-TEST_F(DistributedGirdTest_2D, SynchronizerNeighborPeriodicBoundariesTest )
+TEST_F(DistributedGridTest_2D, SynchronizerNeighborPeriodicBoundariesWithoutMask )
 {
    // Setup periodic boundaries
    // TODO: I do not know how to do it better with GTEST - additional setup 
@@ -669,6 +676,416 @@ TEST_F(DistributedGirdTest_2D, SynchronizerNeighborPeriodicBoundariesTest )
    }
 }
 
+TEST_F(DistributedGridTest_2D, SynchronizerNeighborPeriodicBoundariesWithActiveMask )
+{
+   // Setup periodic boundaries
+   // TODO: I do not know how to do it better with GTEST - additional setup 
+   // of the periodic boundaries
+   typename DistributedGridType::SubdomainOverlapsType lowerOverlap, upperOverlap;
+   SubdomainOverlapsGetter< GridType, CommunicatorType >::
+      getOverlaps( distributedGrid, lowerOverlap, upperOverlap, 1, 1 );
+   distributedGrid->setOverlaps( lowerOverlap, upperOverlap );
+   distributedGrid->setupGrid(*gridPtr);
+   dof->setSize( gridPtr->template getEntitiesCount< Cell >() );
+   maskDofs.setSize( gridPtr->template getEntitiesCount< Cell >() );
+   meshFunctionPtr->bind( gridPtr, *dof );
+   maskPointer->bind( gridPtr, maskDofs );
+   
+   //Expecting 9 processes
+   setDof_2D(*dof, -rank-1 );
+   maskDofs.setValue( true );
+   constFunctionEvaluator.evaluateAllEntities( meshFunctionPtr , constFunctionPtr );
+   meshFunctionPtr->template synchronize<CommunicatorType>( true, maskPointer );
+   
+   if( rank == 0 )
+   {
+      SCOPED_TRACE( "Up Left" );
+      checkLeftBoundary( *gridPtr, *dof, false,  true, -3 );
+      checkUpBoundary(   *gridPtr, *dof, false,  true, -7 );
+   }
+    
+   if( rank == 1 )
+   {
+      SCOPED_TRACE( "Up Center" );
+      checkUpBoundary( *gridPtr, *dof, true, true, -8 );
+   }
+    
+   if( rank == 2 )
+   {
+      SCOPED_TRACE( "Up Right" );
+      checkRightBoundary( *gridPtr, *dof, false, true, -1 );
+      checkUpBoundary(    *gridPtr, *dof, true, false, -9 );
+   }
+    
+   if( rank == 3 )
+   {
+      SCOPED_TRACE( "Center Left" );
+      checkLeftBoundary( *gridPtr, *dof, true, true, -6 );
+   } 
+        
+   if( rank == 5 )
+   {
+      SCOPED_TRACE( "Center Right" );
+      checkRightBoundary( *gridPtr, *dof, true, true, -4 );
+   }
+    
+   if( rank == 6 )
+   {
+      SCOPED_TRACE( "Down Left" );
+      checkDownBoundary( *gridPtr, *dof, false,  true, -1 );
+      checkLeftBoundary( *gridPtr, *dof, true,  false,  -9 );
+   }
+    
+   if( rank == 7 )
+   {
+      SCOPED_TRACE( "Down Center" );
+      checkDownBoundary( *gridPtr, *dof, true, true, -2 );
+   }
+
+   if( rank == 8 )
+   {
+      SCOPED_TRACE( "Down Right" );
+      checkDownBoundary(  *gridPtr, *dof, true, false, -3 );
+      checkRightBoundary( *gridPtr, *dof, true, false, -7 );
+   }
+}
+
+TEST_F(DistributedGridTest_2D, SynchronizerNeighborPeriodicBoundariesWithInactiveMaskOnLeft )
+{
+   // Setup periodic boundaries
+   // TODO: I do not know how to do it better with GTEST - additional setup 
+   // of the periodic boundaries
+   typename DistributedGridType::SubdomainOverlapsType lowerOverlap, upperOverlap;
+   SubdomainOverlapsGetter< GridType, CommunicatorType >::
+      getOverlaps( distributedGrid, lowerOverlap, upperOverlap, 1, 1 );
+   distributedGrid->setOverlaps( lowerOverlap, upperOverlap );
+   distributedGrid->setupGrid(*gridPtr);
+   dof->setSize( gridPtr->template getEntitiesCount< Cell >() );
+   maskDofs.setSize( gridPtr->template getEntitiesCount< Cell >() );
+   meshFunctionPtr->bind( gridPtr, *dof );
+   maskPointer->bind( gridPtr, maskDofs );
+   
+   //Expecting 9 processes
+   setDof_2D(*dof, -rank-1 );
+   maskDofs.setValue( true );
+   if( distributedGrid->getNeighbors()[ Left ] == -1 )
+   {
+      for( IndexType i = 0; i < gridPtr->getDimensions().y(); i++ )
+      {
+         typename GridType::Cell cell( *gridPtr );
+         cell.getCoordinates() = CoordinatesType( 1, i );
+         cell.refresh();
+         maskPointer->getData().setElement( cell.getIndex(), false );
+      }
+   }
+   constFunctionEvaluator.evaluateAllEntities( meshFunctionPtr , constFunctionPtr );
+   meshFunctionPtr->template synchronize<CommunicatorType>( true, maskPointer );
+   
+   if( rank == 0 )
+   {
+      SCOPED_TRACE( "Up Left" );
+      checkLeftBoundary( *gridPtr, *dof, false,  true, 0 );
+      checkUpBoundary(   *gridPtr, *dof, false,  true, -7 );
+   }
+    
+   if( rank == 1 )
+   {
+      SCOPED_TRACE( "Up Center" );
+      checkUpBoundary( *gridPtr, *dof, true, true, -8 );
+   }
+    
+   if( rank == 2 )
+   {
+      SCOPED_TRACE( "Up Right" );
+      checkRightBoundary( *gridPtr, *dof, false, true, -1 );
+      checkUpBoundary(    *gridPtr, *dof, true, false, -9 );
+   }
+    
+   if( rank == 3 )
+   {
+      SCOPED_TRACE( "Center Left" );
+      checkLeftBoundary( *gridPtr, *dof, true, true, 3 );
+   } 
+        
+   if( rank == 5 )
+   {
+      SCOPED_TRACE( "Center Right" );
+      checkRightBoundary( *gridPtr, *dof, true, true, -4 );
+   }
+    
+   if( rank == 6 )
+   {
+      SCOPED_TRACE( "Down Left" );
+      checkDownBoundary( *gridPtr, *dof, false,  true, -1 );
+      checkLeftBoundary( *gridPtr, *dof, true,  false,  6 );
+   }
+    
+   if( rank == 7 )
+   {
+      SCOPED_TRACE( "Down Center" );
+      checkDownBoundary( *gridPtr, *dof, true, true, -2 );
+   }
+
+   if( rank == 8 )
+   {
+      SCOPED_TRACE( "Down Right" );
+      checkDownBoundary(  *gridPtr, *dof, true, false, -3 );
+      checkRightBoundary( *gridPtr, *dof, true, false, -7 );
+   }
+}
+
+TEST_F(DistributedGridTest_2D, SynchronizerNeighborPeriodicBoundariesWithInActiveMaskOnRight )
+{
+   // Setup periodic boundaries
+   // TODO: I do not know how to do it better with GTEST - additional setup 
+   // of the periodic boundaries
+   typename DistributedGridType::SubdomainOverlapsType lowerOverlap, upperOverlap;
+   SubdomainOverlapsGetter< GridType, CommunicatorType >::
+      getOverlaps( distributedGrid, lowerOverlap, upperOverlap, 1, 1 );
+   distributedGrid->setOverlaps( lowerOverlap, upperOverlap );
+   distributedGrid->setupGrid(*gridPtr);
+   dof->setSize( gridPtr->template getEntitiesCount< Cell >() );
+   maskDofs.setSize( gridPtr->template getEntitiesCount< Cell >() );
+   meshFunctionPtr->bind( gridPtr, *dof );
+   maskPointer->bind( gridPtr, maskDofs );
+   
+   //Expecting 9 processes
+   setDof_2D(*dof, -rank-1 );
+   maskDofs.setValue( true );
+   if( distributedGrid->getNeighbors()[ Right ] == -1 )
+   {
+      for( IndexType i = 0; i < gridPtr->getDimensions().y(); i++ )
+      {
+         typename GridType::Cell cell( *gridPtr );
+         cell.getCoordinates() = CoordinatesType( gridPtr->getDimensions().x() - 2, i );
+         cell.refresh();
+         maskPointer->getData().setElement( cell.getIndex(), false );
+      }
+   }
+   constFunctionEvaluator.evaluateAllEntities( meshFunctionPtr , constFunctionPtr );
+   meshFunctionPtr->template synchronize<CommunicatorType>( true, maskPointer );
+   
+   if( rank == 0 )
+   {
+      SCOPED_TRACE( "Up Left" );
+      checkLeftBoundary( *gridPtr, *dof, false,  true, -3 );
+      checkUpBoundary(   *gridPtr, *dof, false,  true, -7 );
+   }
+    
+   if( rank == 1 )
+   {
+      SCOPED_TRACE( "Up Center" );
+      checkUpBoundary( *gridPtr, *dof, true, true, -8 );
+   }
+    
+   if( rank == 2 )
+   {
+      SCOPED_TRACE( "Up Right" );
+      checkRightBoundary( *gridPtr, *dof, false, true, 2 );
+      checkUpBoundary(    *gridPtr, *dof, true, false, -9 );
+   }
+    
+   if( rank == 3 )
+   {
+      SCOPED_TRACE( "Center Left" );
+      checkLeftBoundary( *gridPtr, *dof, true, true, -6 );
+   } 
+        
+   if( rank == 5 )
+   {
+      SCOPED_TRACE( "Center Right" );
+      checkRightBoundary( *gridPtr, *dof, true, true, 5 );
+   }
+    
+   if( rank == 6 )
+   {
+      SCOPED_TRACE( "Down Left" );
+      checkDownBoundary( *gridPtr, *dof, false,  true, -1 );
+      checkLeftBoundary( *gridPtr, *dof, true,  false,  -9 );
+   }
+    
+   if( rank == 7 )
+   {
+      SCOPED_TRACE( "Down Center" );
+      checkDownBoundary( *gridPtr, *dof, true, true, -2 );
+   }
+
+   if( rank == 8 )
+   {
+      SCOPED_TRACE( "Down Right" );
+      checkDownBoundary(  *gridPtr, *dof, true, false, -3 );
+      checkRightBoundary( *gridPtr, *dof, true, false, 8 );
+   }
+}
+
+TEST_F(DistributedGridTest_2D, SynchronizerNeighborPeriodicBoundariesWithInActiveMaskUp )
+{
+   // Setup periodic boundaries
+   // TODO: I do not know how to do it better with GTEST - additional setup 
+   // of the periodic boundaries
+   typename DistributedGridType::SubdomainOverlapsType lowerOverlap, upperOverlap;
+   SubdomainOverlapsGetter< GridType, CommunicatorType >::
+      getOverlaps( distributedGrid, lowerOverlap, upperOverlap, 1, 1 );
+   distributedGrid->setOverlaps( lowerOverlap, upperOverlap );
+   distributedGrid->setupGrid(*gridPtr);
+   dof->setSize( gridPtr->template getEntitiesCount< Cell >() );
+   maskDofs.setSize( gridPtr->template getEntitiesCount< Cell >() );
+   meshFunctionPtr->bind( gridPtr, *dof );
+   maskPointer->bind( gridPtr, maskDofs );
+   
+   //Expecting 9 processes
+   setDof_2D(*dof, -rank-1 );
+   maskDofs.setValue( true );
+   if( distributedGrid->getNeighbors()[ Up ] == -1 )
+   {
+      for( IndexType i = 0; i < gridPtr->getDimensions().x(); i++ )
+      {
+         typename GridType::Cell cell( *gridPtr );
+         cell.getCoordinates() = CoordinatesType( i, 1 );
+         cell.refresh();
+         maskPointer->getData().setElement( cell.getIndex(), false );
+      }
+   }
+   constFunctionEvaluator.evaluateAllEntities( meshFunctionPtr , constFunctionPtr );
+   meshFunctionPtr->template synchronize<CommunicatorType>( true, maskPointer );
+   
+   if( rank == 0 )
+   {
+      SCOPED_TRACE( "Up Left" );
+      checkLeftBoundary( *gridPtr, *dof, false,  true, -3 );
+      checkUpBoundary(   *gridPtr, *dof, false,  true, 0 );
+   }
+    
+   if( rank == 1 )
+   {
+      SCOPED_TRACE( "Up Center" );
+      checkUpBoundary( *gridPtr, *dof, true, true, 1 );
+   }
+    
+   if( rank == 2 )
+   {
+      SCOPED_TRACE( "Up Right" );
+      checkRightBoundary( *gridPtr, *dof, false, true, -1 );
+      checkUpBoundary(    *gridPtr, *dof, true, false, 2 );
+   }
+    
+   if( rank == 3 )
+   {
+      SCOPED_TRACE( "Center Left" );
+      checkLeftBoundary( *gridPtr, *dof, true, true, -6 );
+   } 
+        
+   if( rank == 5 )
+   {
+      SCOPED_TRACE( "Center Right" );
+      checkRightBoundary( *gridPtr, *dof, true, true, -4 );
+   }
+    
+   if( rank == 6 )
+   {
+      SCOPED_TRACE( "Down Left" );
+      checkDownBoundary( *gridPtr, *dof, false,  true, -1 );
+      checkLeftBoundary( *gridPtr, *dof, true,  false,  -9 );
+   }
+    
+   if( rank == 7 )
+   {
+      SCOPED_TRACE( "Down Center" );
+      checkDownBoundary( *gridPtr, *dof, true, true, -2 );
+   }
+
+   if( rank == 8 )
+   {
+      SCOPED_TRACE( "Down Right" );
+      checkDownBoundary(  *gridPtr, *dof, true, false, -3 );
+      checkRightBoundary( *gridPtr, *dof, true, false, -7 );
+   }
+}
+
+TEST_F(DistributedGridTest_2D, SynchronizerNeighborPeriodicBoundariesWithInActiveMaskDown )
+{
+   // Setup periodic boundaries
+   // TODO: I do not know how to do it better with GTEST - additional setup 
+   // of the periodic boundaries
+   typename DistributedGridType::SubdomainOverlapsType lowerOverlap, upperOverlap;
+   SubdomainOverlapsGetter< GridType, CommunicatorType >::
+      getOverlaps( distributedGrid, lowerOverlap, upperOverlap, 1, 1 );
+   distributedGrid->setOverlaps( lowerOverlap, upperOverlap );
+   distributedGrid->setupGrid(*gridPtr);
+   dof->setSize( gridPtr->template getEntitiesCount< Cell >() );
+   maskDofs.setSize( gridPtr->template getEntitiesCount< Cell >() );
+   meshFunctionPtr->bind( gridPtr, *dof );
+   maskPointer->bind( gridPtr, maskDofs );
+   
+   //Expecting 9 processes
+   setDof_2D(*dof, -rank-1 );
+   maskDofs.setValue( true );
+   if( distributedGrid->getNeighbors()[ Down ] == -1 )
+   {
+      for( IndexType i = 0; i < gridPtr->getDimensions().x(); i++ )
+      {
+         typename GridType::Cell cell( *gridPtr );
+         cell.getCoordinates() = CoordinatesType( i, gridPtr->getDimensions().y() - 2 );
+         cell.refresh();
+         maskPointer->getData().setElement( cell.getIndex(), false );
+      }
+   }
+   constFunctionEvaluator.evaluateAllEntities( meshFunctionPtr , constFunctionPtr );
+   meshFunctionPtr->template synchronize<CommunicatorType>( true, maskPointer );
+   
+   if( rank == 0 )
+   {
+      SCOPED_TRACE( "Up Left" );
+      checkLeftBoundary( *gridPtr, *dof, false,  true, -3 );
+      checkUpBoundary(   *gridPtr, *dof, false,  true, -7 );
+   }
+    
+   if( rank == 1 )
+   {
+      SCOPED_TRACE( "Up Center" );
+      checkUpBoundary( *gridPtr, *dof, true, true, -8 );
+   }
+    
+   if( rank == 2 )
+   {
+      SCOPED_TRACE( "Up Right" );
+      checkRightBoundary( *gridPtr, *dof, false, true, -1 );
+      checkUpBoundary(    *gridPtr, *dof, true, false, -9 );
+   }
+    
+   if( rank == 3 )
+   {
+      SCOPED_TRACE( "Center Left" );
+      checkLeftBoundary( *gridPtr, *dof, true, true, -6 );
+   } 
+        
+   if( rank == 5 )
+   {
+      SCOPED_TRACE( "Center Right" );
+      checkRightBoundary( *gridPtr, *dof, true, true, -4 );
+   }
+    
+   if( rank == 6 )
+   {
+      SCOPED_TRACE( "Down Left" );
+      checkDownBoundary( *gridPtr, *dof, false,  true, 6 );
+      checkLeftBoundary( *gridPtr, *dof, true,  false,  -9 );
+   }
+    
+   if( rank == 7 )
+   {
+      SCOPED_TRACE( "Down Center" );
+      checkDownBoundary( *gridPtr, *dof, true, true, 7 );
+   }
+
+   if( rank == 8 )
+   {
+      SCOPED_TRACE( "Down Right" );
+      checkDownBoundary(  *gridPtr, *dof, true, false, 8 );
+      checkRightBoundary( *gridPtr, *dof, true, false, -7 );
+   }
+}
+ 
 #else
 TEST(NoMPI, NoTest)
 {
@@ -715,7 +1132,7 @@ TEST(NoMPI, NoTest)
   };
 #endif
 
-#include "../../src/UnitTests/GtestMissingError.h"
+#include "../../GtestMissingError.h"
 int main( int argc, char* argv[] )
 {
 #ifdef HAVE_GTEST
@@ -728,14 +1145,9 @@ int main( int argc, char* argv[] )
        delete listeners.Release(listeners.default_result_printer());
        listeners.Append(new MinimalistBufferedPrinter);
 
-       CommunicatorType::Init(argc,argv);
-    #endif
-       int result= RUN_ALL_TESTS();
-
-    #ifdef HAVE_MPI
-       CommunicatorType::Finalize();
+       Communicators::ScopedInitializer< CommunicatorType > mpi(argc, argv);
     #endif
-       return result;
+       return RUN_ALL_TESTS();
 #else
    
    throw GtestMissingError();
diff --git a/src/UnitTests/Meshes/DistributedMeshes/DistributedGridTest_3D.cpp b/src/UnitTests/Meshes/DistributedMeshes/DistributedGridTest_3D.cpp
index f3e77cccadd1f70e824a983185255baab12f9a8f..6bbd7ad257a176f8f10f2dcefcd5315b385307ce 100644
--- a/src/UnitTests/Meshes/DistributedMeshes/DistributedGridTest_3D.cpp
+++ b/src/UnitTests/Meshes/DistributedMeshes/DistributedGridTest_3D.cpp
@@ -4,6 +4,7 @@
 #ifdef HAVE_MPI    
 
 #include <TNL/Communicators/MpiCommunicator.h>
+#include <TNL/Communicators/ScopedInitializer.h>
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Meshes/DistributedMeshes/DistributedMesh.h>
 #include <TNL/Meshes/DistributedMeshes/SubdomainOverlapsGetter.h>
@@ -608,14 +609,14 @@ class DistributedGirdTest_3D : public ::testing::Test
       DistributedGridType *distributedGrid;
       DofType *dof;
 
-      SharedPointer<GridType> gridptr;
-      SharedPointer<MeshFunctionType> meshFunctionptr;
+      Pointers::SharedPointer<GridType> gridptr;
+      Pointers::SharedPointer<MeshFunctionType> meshFunctionptr;
 
       MeshFunctionEvaluator< MeshFunctionType, ConstFunction<double,3> > constFunctionEvaluator;
-      SharedPointer< ConstFunction<double,3>, Host > constFunctionPtr;
+      Pointers::SharedPointer< ConstFunction<double,3>, Host > constFunctionPtr;
 
       MeshFunctionEvaluator< MeshFunctionType, LinearFunction<double,3> > linearFunctionEvaluator;
-      SharedPointer< LinearFunction<double,3>, Host > linearFunctionPtr;
+      Pointers::SharedPointer< LinearFunction<double,3>, Host > linearFunctionPtr;
 
       int rank;
       int nproc;    
@@ -765,7 +766,7 @@ TEST(NoMPI, NoTest)
   };
 #endif
 
-#include "../../src/UnitTests/GtestMissingError.h"
+#include "../../GtestMissingError.h"
 int main( int argc, char* argv[] )
 {
 #ifdef HAVE_GTEST
@@ -778,14 +779,9 @@ int main( int argc, char* argv[] )
        delete listeners.Release(listeners.default_result_printer());
        listeners.Append(new MinimalistBufferedPrinter);
 
-       CommunicatorType::Init(argc,argv);
+       Communicators::ScopedInitializer< CommunicatorType > mpi(argc, argv);
     #endif
-       int result= RUN_ALL_TESTS();
-
-    #ifdef HAVE_MPI
-       CommunicatorType::Finalize();
-    #endif
-       return result;
+       return RUN_ALL_TESTS();
 #else
    
    throw GtestMissingError();
diff --git a/src/UnitTests/Meshes/DistributedMeshes/DistributedVectorFieldIO_MPIIOTest.cpp b/src/UnitTests/Meshes/DistributedMeshes/DistributedVectorFieldIO_MPIIOTest.cpp
index d3a1cd55267ad569ecfcf2196a8922a0384f5e36..67098fc5db6c801410425378ab5b609778b376f3 100644
--- a/src/UnitTests/Meshes/DistributedMeshes/DistributedVectorFieldIO_MPIIOTest.cpp
+++ b/src/UnitTests/Meshes/DistributedMeshes/DistributedVectorFieldIO_MPIIOTest.cpp
@@ -6,6 +6,7 @@
 #ifdef HAVE_MPI
 
 #include <TNL/Communicators/MpiCommunicator.h>
+#include <TNL/Communicators/ScopedInitializer.h>
 #include "DistributedVectorFieldIO_MPIIOTestBase.h"
 
 using namespace TNL::Communicators;
@@ -102,16 +103,11 @@ int main( int argc, char* argv[] )
        delete listeners.Release(listeners.default_result_printer());
        listeners.Append(new MinimalistBufferedPrinter);
 
-       CommunicatorType::Init(argc,argv );
+       Communicators::ScopedInitializer< CommunicatorType > mpi(argc, argv);
        CommunicatorType::setRedirection( false );
        CommunicatorType::setupRedirection();
     #endif
-       int result= RUN_ALL_TESTS();
-
-    #ifdef HAVE_MPI
-       CommunicatorType::Finalize();
-    #endif
-       return result;
+       return RUN_ALL_TESTS();
 #else
    
    throw GtestMissingError();
diff --git a/src/UnitTests/Meshes/DistributedMeshes/DistributedVectorFieldIO_MPIIOTestBase.h b/src/UnitTests/Meshes/DistributedMeshes/DistributedVectorFieldIO_MPIIOTestBase.h
index d839dbc3a2d3df02622f3c25d8b62fa349b2b72a..e668847832825318d7da057e2909ba350fbcd347 100644
--- a/src/UnitTests/Meshes/DistributedMeshes/DistributedVectorFieldIO_MPIIOTestBase.h
+++ b/src/UnitTests/Meshes/DistributedMeshes/DistributedVectorFieldIO_MPIIOTestBase.h
@@ -41,7 +41,7 @@ class TestDistributedVectorFieldMPIIO{
 
     static void TestSave()
     {
-        SharedPointer< LinearFunctionType, Device > linearFunctionPtr;
+        Pointers::SharedPointer< LinearFunctionType, Device > linearFunctionPtr;
         MeshFunctionEvaluator< MeshFunctionType, LinearFunctionType > linearFunctionEvaluator;    
         
         //save distributed meshfunction into file
@@ -51,14 +51,14 @@ class TestDistributedVectorFieldMPIIO{
         PointType globalProportions;
         globalProportions.setValue(10);
 
-        SharedPointer<MeshType> globalGrid;
+        Pointers::SharedPointer<MeshType> globalGrid;
         globalGrid->setDimensions(globalProportions);
         globalGrid->setDomain(globalOrigin,globalProportions);
         
         DistributedGridType distributedGrid;
         distributedGrid.template setGlobalGrid<CommunicatorType>( *globalGrid );
 
-        SharedPointer<MeshType> gridptr;        
+        Pointers::SharedPointer<MeshType> gridptr;        
         distributedGrid.setupGrid(*gridptr);
         typename DistributedGridType::SubdomainOverlapsType lowerOverlap, upperOverlap;
         SubdomainOverlapsGetter< MeshType, CommunicatorType >::getOverlaps( &distributedGrid, lowerOverlap, upperOverlap, 1 );
@@ -117,7 +117,7 @@ class TestDistributedVectorFieldMPIIO{
     
     static void TestLoad()
     {
-        SharedPointer< LinearFunctionType, Device > linearFunctionPtr;
+        Pointers::SharedPointer< LinearFunctionType, Device > linearFunctionPtr;
         MeshFunctionEvaluator< MeshFunctionType, LinearFunctionType > linearFunctionEvaluator;    
 
         //Crete distributed grid            
@@ -127,7 +127,7 @@ class TestDistributedVectorFieldMPIIO{
         PointType globalProportions;
         globalProportions.setValue(50);
 
-        SharedPointer<MeshType> globalGrid;
+        Pointers::SharedPointer<MeshType> globalGrid;
         globalGrid->setDimensions(globalProportions);
         globalGrid->setDomain(globalOrigin,globalProportions);
 
@@ -158,7 +158,7 @@ class TestDistributedVectorFieldMPIIO{
             file.close();
         }
 
-        SharedPointer<MeshType> loadGridptr;
+        Pointers::SharedPointer<MeshType> loadGridptr;
         VectorFieldType loadVectorField;
         distributedGrid.setupGrid(*loadGridptr);
         
@@ -171,7 +171,7 @@ class TestDistributedVectorFieldMPIIO{
         for(int i=0;i<vctdim;i++)
             (loadVectorField[i])->template synchronize<CommunicatorType>(); //need synchronization for overlaps to be filled corectly in loadDof
 
-        SharedPointer<MeshType> evalGridPtr;
+        Pointers::SharedPointer<MeshType> evalGridPtr;
         VectorFieldType evalVectorField;
         distributedGrid.setupGrid(*evalGridPtr);
         
diff --git a/src/UnitTests/Mpi/CMakeLists.txt b/src/UnitTests/Mpi/CMakeLists.txt
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/src/UnitTests/Pointers/CMakeLists.txt b/src/UnitTests/Pointers/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd727636361f20d3516b26a81b1e3f48d9beeaaf
--- /dev/null
+++ b/src/UnitTests/Pointers/CMakeLists.txt
@@ -0,0 +1,23 @@
+ADD_EXECUTABLE( UniquePointerTest UniquePointerTest.cpp )
+TARGET_COMPILE_OPTIONS( UniquePointerTest PRIVATE ${CXX_TESTS_FLAGS} )
+TARGET_LINK_LIBRARIES( UniquePointerTest
+                           ${GTEST_BOTH_LIBRARIES}
+                           tnl )
+ADD_TEST( UniquePointerTest ${EXECUTABLE_OUTPUT_PATH}/UniquePointerTest${CMAKE_EXECUTABLE_SUFFIX} )
+
+
+ADD_EXECUTABLE( SharedPointerHostTest SharedPointerHostTest.cpp )
+TARGET_COMPILE_OPTIONS( SharedPointerHostTest PRIVATE ${CXX_TESTS_FLAGS} )
+TARGET_LINK_LIBRARIES( SharedPointerHostTest
+                           ${GTEST_BOTH_LIBRARIES}
+                           tnl )
+ADD_TEST( SharedPointerHostTest ${EXECUTABLE_OUTPUT_PATH}/SharedPointerHostTest${CMAKE_EXECUTABLE_SUFFIX} )
+
+if( BUILD_CUDA )
+   CUDA_ADD_EXECUTABLE( SharedPointerCudaTest SharedPointerCudaTest.cu 
+                        OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SharedPointerCudaTest
+                              ${GTEST_BOTH_LIBRARIES}
+                              tnl )
+   ADD_TEST( SharedPointerCudaTest ${EXECUTABLE_OUTPUT_PATH}/SharedPointerCudaTest${CMAKE_EXECUTABLE_SUFFIX} )
+endif( BUILD_CUDA )
diff --git a/src/UnitTests/Pointers/SharedPointerCudaTest.cu b/src/UnitTests/Pointers/SharedPointerCudaTest.cu
new file mode 100644
index 0000000000000000000000000000000000000000..813054c140fc9ec7aa07784adf596670d5d17c40
--- /dev/null
+++ b/src/UnitTests/Pointers/SharedPointerCudaTest.cu
@@ -0,0 +1,148 @@
+/***************************************************************************
+                          SharedPointerCudaTest.cpp  -  description
+                             -------------------
+    begin                : Aug 22, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <cstdlib>
+#include <TNL/Devices/Host.h>
+#include <TNL/Pointers/SharedPointer.h>
+#include <TNL/Containers/StaticArray.h>
+#include <TNL/Containers/Array.h>
+
+#ifdef HAVE_GTEST 
+#include <gtest/gtest.h>
+#endif
+
+#include <TNL/Devices/Cuda.h>
+#include "../GtestMissingError.h"
+
+using namespace TNL;
+
+#ifdef HAVE_GTEST
+TEST( SharedPointerCudaTest, ConstructorTest )
+{
+#ifdef HAVE_CUDA
+   typedef TNL::Containers::StaticArray< 2, int  > TestType;
+   Pointers::SharedPointer< TestType, Devices::Cuda > ptr1;
+
+   ptr1->x() = 0;
+   ptr1->y() = 0;
+   ASSERT_EQ( ptr1->x(), 0 );
+   ASSERT_EQ( ptr1->y(), 0 );
+
+   Pointers::SharedPointer< TestType, Devices::Cuda > ptr2( 1, 2 );
+   ASSERT_EQ( ptr2->x(), 1 );
+   ASSERT_EQ( ptr2->y(), 2 );
+
+   ptr1 = ptr2;
+   ASSERT_EQ( ptr1->x(), 1 );
+   ASSERT_EQ( ptr1->y(), 2 );
+#endif
+};
+
+TEST( SharedPointerCudaTest, getDataTest )
+{
+#ifdef HAVE_CUDA
+   typedef TNL::Containers::StaticArray< 2, int  > TestType;
+   Pointers::SharedPointer< TestType, Devices::Cuda > ptr1( 1, 2 );
+   
+#ifdef HAVE_CUDA_UNIFIED_MEMORY
+   ASSERT_EQ( ptr1->x(), 1 );
+   ASSERT_EQ( ptr1->y(), 2 );
+#else
+   
+   Devices::Cuda::synchronizeDevice();
+   
+   TestType aux;
+   
+   cudaMemcpy( ( void*) &aux, &ptr1.getData< Devices::Cuda >(), sizeof( TestType ), cudaMemcpyDeviceToHost );
+   
+   ASSERT_EQ( aux[ 0 ], 1 );
+   ASSERT_EQ( aux[ 1 ], 2 );
+#endif  // HAVE_CUDA_UNIFIED_MEMORY
+#endif  // HAVE_CUDA
+};
+
+#ifdef HAVE_CUDA
+__global__ void copyArrayKernel( const TNL::Containers::Array< int, Devices::Cuda >* inArray,
+                                 int* outArray )
+{
+   if( threadIdx.x < 2 )
+   {
+      outArray[ threadIdx.x ] = ( *inArray )[ threadIdx.x ];
+   }
+}
+
+#endif
+
+TEST( SharedPointerCudaTest, getDataArrayTest )
+{
+#ifdef HAVE_CUDA
+   typedef TNL::Containers::Array< int, Devices::Cuda  > TestType;
+   Pointers::SharedPointer< TestType > ptr;
+   
+   ptr->setSize( 2 );
+   ptr->setElement( 0, 1 );
+   ptr->setElement( 1, 2 );
+
+   Devices::Cuda::synchronizeDevice();
+
+   int *testArray_device, *testArray_host;
+   cudaMalloc( ( void** ) &testArray_device, 2 * sizeof( int ) );
+   copyArrayKernel<<< 1, 2 >>>( &ptr.getData< Devices::Cuda >(), testArray_device );
+   testArray_host = new int [ 2 ];
+   cudaMemcpy( testArray_host, testArray_device, 2 * sizeof( int ), cudaMemcpyDeviceToHost );
+   
+   ASSERT_EQ( testArray_host[ 0 ], 1 );
+   ASSERT_EQ( testArray_host[ 1 ], 2 );
+   
+   delete[] testArray_host;
+   cudaFree( testArray_device );
+
+#endif
+};
+
+TEST( SharedPointerCudaTest, nullptrAssignement )
+{
+#ifdef HAVE_CUDA
+   using TestType = Pointers::SharedPointer< double, Devices::Cuda >;
+   TestType p1( 5 ), p2( nullptr );
+   
+   // This should not crash
+   p1 = p2;
+   
+   ASSERT_FALSE( p1 );
+   ASSERT_FALSE( p2 );
+#endif
+}
+
+TEST( SharedPointerCudaTest, swap )
+{
+#ifdef HAVE_CUDA
+   using TestType = Pointers::SharedPointer< double, Devices::Cuda >;
+   TestType p1( 1 ), p2( 2 );
+   
+   p1.swap( p2 );
+   
+   ASSERT_EQ( *p1, 2 );
+   ASSERT_EQ( *p2, 1 );
+#endif
+}
+
+
+#endif
+
+int main( int argc, char* argv[] )
+{
+#ifdef HAVE_GTEST
+   ::testing::InitGoogleTest( &argc, argv );
+   return RUN_ALL_TESTS();
+#else
+   throw GtestMissingError();
+#endif
+}
diff --git a/src/UnitTests/Pointers/SharedPointerHostTest.cpp b/src/UnitTests/Pointers/SharedPointerHostTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b7d441f70e8e714cd911ae27d8e3a6a92204277e
--- /dev/null
+++ b/src/UnitTests/Pointers/SharedPointerHostTest.cpp
@@ -0,0 +1,76 @@
+/***************************************************************************
+                          SharedPointerHostTest.cpp  -  description
+                             -------------------
+    begin                : Aug 22, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <cstdlib>
+#include <TNL/Devices/Host.h>
+#include <TNL/Pointers/SharedPointer.h>
+#include <TNL/Containers/StaticArray.h>
+
+#ifdef HAVE_GTEST 
+#include <gtest/gtest.h>
+#endif
+
+using namespace TNL;
+
+#ifdef HAVE_GTEST 
+TEST( SharedPointerHostTest, ConstructorTest )
+{
+   typedef TNL::Containers::StaticArray< 2, int  > TestType;
+   Pointers::SharedPointer< TestType, Devices::Host > ptr1;
+
+   ptr1->x() = 0;
+   ptr1->y() = 0;
+   ASSERT_EQ( ptr1->x(), 0 );
+   ASSERT_EQ( ptr1->y(), 0 );
+
+   Pointers::SharedPointer< TestType, Devices::Host > ptr2( 1, 2 );
+   ASSERT_EQ( ptr2->x(), 1 );
+   ASSERT_EQ( ptr2->y(), 2 );
+
+   ptr1 = ptr2;
+   ASSERT_EQ( ptr1->x(), 1 );
+   ASSERT_EQ( ptr1->y(), 2 );
+};
+
+TEST( SharedPointerCudaTest, nullptrAssignement )
+{
+   using TestType = Pointers::SharedPointer< double, Devices::Host >;
+   TestType p1( 5 ), p2( nullptr );
+   
+   // This should not crash
+   p1 = p2;
+   
+   ASSERT_FALSE( p1 );
+   ASSERT_FALSE( p2 );
+}
+
+TEST( SharedPointerCudaTest, swap )
+{
+   using TestType = Pointers::SharedPointer< double, Devices::Host >;
+   TestType p1( 1 ), p2( 2 );
+   
+   p1.swap( p2 );
+   
+   ASSERT_EQ( *p1, 2 );
+   ASSERT_EQ( *p2, 1 );
+}
+
+#endif
+
+#include "../GtestMissingError.h"
+int main( int argc, char* argv[] )
+{
+#ifdef HAVE_GTEST
+   ::testing::InitGoogleTest( &argc, argv );
+   return RUN_ALL_TESTS();
+#else
+   throw GtestMissingError();
+#endif
+}
diff --git a/src/UnitTests/UniquePointerTest.cpp b/src/UnitTests/Pointers/UniquePointerTest.cpp
similarity index 64%
rename from src/UnitTests/UniquePointerTest.cpp
rename to src/UnitTests/Pointers/UniquePointerTest.cpp
index 677b3e2bb98a07508ec059b2e6a350375785c9b1..2fba352f588ed01a9369598f8cdfcd8506dc6a97 100644
--- a/src/UnitTests/UniquePointerTest.cpp
+++ b/src/UnitTests/Pointers/UniquePointerTest.cpp
@@ -1,12 +1,3 @@
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
 /***************************************************************************
                           UniquePointerTest.cpp  -  description
                              -------------------
@@ -15,9 +6,11 @@
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
 
+/* See Copyright Notice in tnl/Copyright */
+
 #include <cstdlib>
 #include <TNL/Devices/Host.h>
-#include <TNL/UniquePointer.h>
+#include <TNL/Pointers/UniquePointer.h>
 #include <TNL/Containers/StaticArray.h>
 
 #ifdef HAVE_GTEST 
@@ -25,6 +18,7 @@
 #endif
 
 using namespace TNL;
+using namespace TNL::Pointers;
 
 #ifdef HAVE_GTEST 
 TEST( UniquePointerTest, ConstructorTest )
@@ -47,7 +41,7 @@ TEST( UniquePointerTest, ConstructorTest )
 };
 #endif
 
-#include "GtestMissingError.h"
+#include "../GtestMissingError.h"
 int main( int argc, char* argv[] )
 {
 #ifdef HAVE_GTEST
diff --git a/src/UnitTests/SaveAndLoadMeshfunctionTest.cpp b/src/UnitTests/SaveAndLoadMeshfunctionTest.cpp
index 62c181cee49d4187ad42076410792663df701250..f4b0cc7e6467ded039417d15b60b2f99fdb55335 100644
--- a/src/UnitTests/SaveAndLoadMeshfunctionTest.cpp
+++ b/src/UnitTests/SaveAndLoadMeshfunctionTest.cpp
@@ -42,7 +42,7 @@ class TestSaveAndLoadMeshfunction
             typedef typename MeshType::CoordinatesType CoordinatesType;
             typedef LinearFunction<double,dim> LinearFunctionType;
 
-            SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
+            Pointers::SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
             MeshFunctionEvaluator< MeshFunctionType, LinearFunctionType > linearFunctionEvaluator;    
 
 
@@ -51,13 +51,13 @@ class TestSaveAndLoadMeshfunction
             PointType localProportions;
             localProportions.setValue(10);
             
-            SharedPointer<MeshType>  localGridptr;
+            Pointers::SharedPointer<MeshType>  localGridptr;
             localGridptr->setDimensions(localProportions);
             localGridptr->setDomain(localOrigin,localProportions);
 
             DofType localDof(localGridptr->template getEntitiesCount< Cell >());
 
-            SharedPointer<MeshFunctionType> localMeshFunctionptr;
+            Pointers::SharedPointer<MeshFunctionType> localMeshFunctionptr;
             localMeshFunctionptr->bind(localGridptr,localDof);
             linearFunctionEvaluator.evaluateAllEntities(localMeshFunctionptr , linearFunctionPtr);
 
@@ -67,12 +67,12 @@ class TestSaveAndLoadMeshfunction
             ASSERT_TRUE( file.close() );
 
             //load other meshfunction on same localgrid from created file
-            SharedPointer<MeshType>  loadGridptr;
+            Pointers::SharedPointer<MeshType>  loadGridptr;
             loadGridptr->setDimensions(localProportions);
             loadGridptr->setDomain(localOrigin,localProportions);
 
             DofType loadDof(loadGridptr->template getEntitiesCount< Cell >());
-            SharedPointer<MeshFunctionType> loadMeshFunctionptr;
+            Pointers::SharedPointer<MeshFunctionType> loadMeshFunctionptr;
             loadMeshFunctionptr->bind(loadGridptr,loadDof);
 
             for(int i=0;i<loadDof.getSize();i++)
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index ad92941eee33fd59cadfbb96b4f6f76f14821b14..93c15ce593a9db00fc240c6263706f9c10a46ac5 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1,10 +1,4 @@
-set( ENABLE_CODECOVERAGE )
-
 ADD_SUBDIRECTORY( data )
-ADD_SUBDIRECTORY( benchmarks )
 #ADD_SUBDIRECTORY( unit-tests )
 ADD_SUBDIRECTORY( long-time-unit-tests )
 ADD_SUBDIRECTORY( mpi )
-ADD_SUBDIRECTORY( simple_examples )
-
-unset( ENABLE_CODECOVERAGE )
diff --git a/tests/benchmarks/CMakeLists.txt b/tests/benchmarks/CMakeLists.txt
deleted file mode 100644
index e53ba6878070e6b8f8d3b830ab8f64afcccbcf77..0000000000000000000000000000000000000000
--- a/tests/benchmarks/CMakeLists.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-ADD_SUBDIRECTORY( share )
-ADD_SUBDIRECTORY( heat-equation-benchmark )
-
-IF( BUILD_CUDA )
-    CUDA_ADD_EXECUTABLE( tnl-benchmark-blas tnl-benchmark-blas.cu )
-    CUDA_ADD_CUBLAS_TO_TARGET( tnl-benchmark-blas )
-    TARGET_LINK_LIBRARIES( tnl-benchmark-blas tnl )
-
-    CUDA_ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cu )
-    TARGET_LINK_LIBRARIES( tnl-benchmark-spmv tnl ${CUDA_cusparse_LIBRARY} )
-
-    CUDA_ADD_EXECUTABLE( tnl-benchmark-linear-solvers tnl-benchmark-linear-solvers.cu )
-    TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers tnl )
-ELSE()
-    ADD_EXECUTABLE( tnl-benchmark-blas tnl-benchmark-blas.cpp )
-    TARGET_LINK_LIBRARIES( tnl-benchmark-blas tnl )
-
-    ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cpp )
-    TARGET_LINK_LIBRARIES( tnl-benchmark-spmv tnl )
-
-    ADD_EXECUTABLE( tnl-benchmark-linear-solvers tnl-benchmark-linear-solvers.cpp )
-    TARGET_LINK_LIBRARIES( tnl-benchmark-linear-solvers tnl )
-ENDIF()
-
-INSTALL( TARGETS
-            tnl-benchmark-blas
-            tnl-benchmark-spmv
-            tnl-benchmark-linear-solvers
-         RUNTIME DESTINATION bin )
diff --git a/tests/benchmarks/array-operations.h b/tests/benchmarks/array-operations.h
deleted file mode 100644
index 504dcc1da03a91fa87af913008f9355579e62930..0000000000000000000000000000000000000000
--- a/tests/benchmarks/array-operations.h
+++ /dev/null
@@ -1,167 +0,0 @@
-/***************************************************************************
-                          array-operations.h  -  description
-                             -------------------
-    begin                : Dec 30, 2015
-    copyright            : (C) 2015 by Tomas Oberhuber et al.
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-// Implemented by: Jakub Klinkovsky
-
-#pragma once
-
-#include "benchmarks.h"
-
-#include <TNL/Containers/Array.h>
-
-namespace TNL
-{
-namespace benchmarks
-{
-
-template< typename Real = double,
-          typename Index = int >
-bool
-benchmarkArrayOperations( Benchmark & benchmark,
-                          const int & loops,
-                          const long & size )
-{
-    typedef Containers::Array< Real, Devices::Host, Index > HostArray;
-    typedef Containers::Array< Real, Devices::Cuda, Index > CudaArray;
-    using namespace std;
-
-    double datasetSize = ( double ) ( loops * size ) * sizeof( Real ) / oneGB;
-
-    HostArray hostArray, hostArray2;
-    CudaArray deviceArray, deviceArray2;
-    hostArray.setSize( size );
-    hostArray2.setSize( size );
-#ifdef HAVE_CUDA
-    deviceArray.setSize( size );
-    deviceArray2.setSize( size );
-#endif
-
-    Real resultHost, resultDevice;
-
-
-    // reset functions
-    auto reset1 = [&]() {
-        hostArray.setValue( 1.0 );
-#ifdef HAVE_CUDA
-        deviceArray.setValue( 1.0 );
-#endif
-    };
-    auto reset2 = [&]() {
-        hostArray2.setValue( 1.0 );
-#ifdef HAVE_CUDA
-        deviceArray2.setValue( 1.0 );
-#endif
-    };
-    auto reset12 = [&]() {
-        reset1();
-        reset2();
-    };
-
-
-    reset12();
-
-
-    auto compareHost = [&]() {
-        resultHost = (int) hostArray == hostArray2;
-    };
-    auto compareCuda = [&]() {
-        resultDevice = (int) deviceArray == deviceArray2;
-    };
-    benchmark.setOperation( "comparison (operator==)", 2 * datasetSize );
-    benchmark.time( reset1, "CPU", compareHost );
-#ifdef HAVE_CUDA
-    benchmark.time( reset1, "GPU", compareCuda );
-#endif
-
-
-    auto copyAssignHostHost = [&]() {
-        hostArray = hostArray2;
-    };
-    auto copyAssignCudaCuda = [&]() {
-        deviceArray = deviceArray2;
-    };
-    benchmark.setOperation( "copy (operator=)", 2 * datasetSize );
-    // copyBasetime is used later inside HAVE_CUDA guard, so the compiler will
-    // complain when compiling without CUDA
-    const double copyBasetime = benchmark.time( reset1, "CPU", copyAssignHostHost );
-#ifdef HAVE_CUDA
-    benchmark.time( reset1, "GPU", copyAssignCudaCuda );
-#endif
-
-
-    auto copyAssignHostCuda = [&]() {
-        deviceArray = hostArray;
-    };
-    auto copyAssignCudaHost = [&]() {
-        hostArray = deviceArray;
-    };
-#ifdef HAVE_CUDA
-    benchmark.setOperation( "copy (operator=)", datasetSize, copyBasetime );
-    benchmark.time( reset1,
-                    "CPU->GPU", copyAssignHostCuda,
-                    "GPU->CPU", copyAssignCudaHost );
-#endif
-
-
-    auto setValueHost = [&]() {
-        hostArray.setValue( 3.0 );
-    };
-    auto setValueCuda = [&]() {
-        deviceArray.setValue( 3.0 );
-    };
-    benchmark.setOperation( "setValue", datasetSize );
-    benchmark.time( reset1, "CPU", setValueHost );
-#ifdef HAVE_CUDA
-    benchmark.time( reset1, "GPU", setValueCuda );
-#endif
-
-
-    auto setSizeHost = [&]() {
-        hostArray.setSize( size );
-    };
-    auto setSizeCuda = [&]() {
-        deviceArray.setSize( size );
-    };
-    auto resetSize1 = [&]() {
-        hostArray.reset();
-#ifdef HAVE_CUDA
-        deviceArray.reset();
-#endif
-    };
-    benchmark.setOperation( "allocation (setSize)", datasetSize );
-    benchmark.time( resetSize1, "CPU", setSizeHost );
-#ifdef HAVE_CUDA
-    benchmark.time( resetSize1, "GPU", setSizeCuda );
-#endif
-
-
-    auto resetSizeHost = [&]() {
-        hostArray.reset();
-    };
-    auto resetSizeCuda = [&]() {
-        deviceArray.reset();
-    };
-    auto setSize1 = [&]() {
-        hostArray.setSize( size );
-#ifdef HAVE_CUDA
-        deviceArray.setSize( size );
-#endif
-    };
-    benchmark.setOperation( "deallocation (reset)", datasetSize );
-    benchmark.time( setSize1, "CPU", resetSizeHost );
-#ifdef HAVE_CUDA
-    benchmark.time( setSize1, "GPU", resetSizeCuda );
-#endif
-
-    return true;
-}
-
-} // namespace benchmarks
-} // namespace tnl
diff --git a/tests/benchmarks/benchmarks.h b/tests/benchmarks/benchmarks.h
deleted file mode 100644
index ce5e631a6899170cfeba58911be71e5cc17eb7e6..0000000000000000000000000000000000000000
--- a/tests/benchmarks/benchmarks.h
+++ /dev/null
@@ -1,437 +0,0 @@
-/***************************************************************************
-                          benchmarks.h  -  description
-                             -------------------
-    begin                : Dec 30, 2015
-    copyright            : (C) 2015 by Tomas Oberhuber et al.
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-// Implemented by: Jakub Klinkovsky
-
-#pragma once
-
-#include <iostream>
-#include <iomanip>
-#include <map>
-#include <vector>
-
-#include <TNL/Timer.h>
-#include <TNL/String.h>
-
-namespace TNL
-{
-namespace benchmarks
-{
-
-const double oneGB = 1024.0 * 1024.0 * 1024.0;
-
-template< typename ComputeFunction,
-          typename ResetFunction >
-double
-timeFunction( ComputeFunction compute,
-              ResetFunction reset,
-              const int & loops )
-{
-    // the timer is constructed zero-initialized and stopped
-    Timer timer;
-
-    reset();
-    for(int i = 0; i < loops; ++i) {
-        // Explicit synchronization of the CUDA device
-        // TODO: not necessary for host computations
-#ifdef HAVE_CUDA
-        cudaDeviceSynchronize();
-#endif
-        timer.start();
-        compute();
-#ifdef HAVE_CUDA
-        cudaDeviceSynchronize();
-#endif
-        timer.stop();
-
-        reset();
-    }
-
-    return timer.getRealTime();
-}
-
-
-struct InternalError {};
-
-
-class Logging
-{
-public:
-    using MetadataElement = std::pair< const char*, String >;
-    using MetadataMap = std::map< const char*, String >;
-    using MetadataColumns = std::vector<MetadataElement>;
-
-    using HeaderElements = std::initializer_list< String >;
-    using RowElements = std::initializer_list< double >;
-
-    Logging( bool verbose = true )
-        : verbose(verbose)
-    { }
-
-    void
-    writeTitle( const String & title )
-    {
-        if( verbose )
-            std::cout << std::endl << "== " << title << " ==" << std::endl << std::endl;
-        log << ": title = " << title << std::endl;
-    }
-
-    void
-    writeMetadata( const MetadataMap & metadata )
-    {
-        if( verbose )
-            std::cout << "properties:" << std::endl;
-
-        for( auto & it : metadata ) {
-            if( verbose )
-                std::cout << "   " << it.first << " = " << it.second << std::endl;
-            log << ": " << it.first << " = " << it.second << std::endl;
-        }
-        if( verbose )
-            std::cout << std::endl;
-    }
-
-    void
-    writeTableHeader( const String & spanningElement,
-                      const HeaderElements & subElements )
-    {
-        using namespace std;
-
-        if( verbose && header_changed ) {
-            for( auto & it : metadataColumns ) {
-               std::cout << std::setw( 20 ) << it.first;
-            }
-
-            // spanning element is printed as usual column to stdout,
-            // but is excluded from header
-           std::cout << std::setw( 15 ) << "";
-
-            for( auto & it : subElements ) {
-               std::cout << std::setw( 15 ) << it;
-            }
-           std::cout << std::endl;
-
-            header_changed = false;
-        }
-
-        // initial indent string
-        header_indent = "!";
-        log << std::endl;
-        for( auto & it : metadataColumns ) {
-            log << header_indent << " " << it.first << std::endl;
-        }
-
-        // dump stacked spanning columns
-        if( horizontalGroups.size() > 0 )
-            while( horizontalGroups.back().second <= 0 ) {
-                horizontalGroups.pop_back();
-                header_indent.pop_back();
-            }
-        for( size_t i = 0; i < horizontalGroups.size(); i++ ) {
-            if( horizontalGroups[ i ].second > 0 ) {
-                log << header_indent << " " << horizontalGroups[ i ].first << std::endl;
-                header_indent += "!";
-            }
-        }
-
-        log << header_indent << " " << spanningElement << std::endl;
-        for( auto & it : subElements ) {
-            log << header_indent << "! " << it << std::endl;
-        }
-
-        if( horizontalGroups.size() > 0 ) {
-            horizontalGroups.back().second--;
-            header_indent.pop_back();
-        }
-    }
-
-    void
-    writeTableRow( const String & spanningElement,
-                   const RowElements & subElements )
-    {
-        using namespace std;
-
-        if( verbose ) {
-            for( auto & it : metadataColumns ) {
-               std::cout << std::setw( 20 ) << it.second;
-            }
-            // spanning element is printed as usual column to stdout
-           std::cout << std::setw( 15 ) << spanningElement;
-            for( auto & it : subElements ) {
-               std::cout << std::setw( 15 );
-                if( it != 0.0 )std::cout << it;
-                else std::cout << "N/A";
-            }
-           std::cout << std::endl;
-        }
-
-        // only when changed (the header has been already adjusted)
-        // print each element on separate line
-        for( auto & it : metadataColumns ) {
-            log << it.second << std::endl;
-        }
-
-        // benchmark data are indented
-        const String indent = "    ";
-        for( auto & it : subElements ) {
-            if( it != 0.0 ) log << indent << it << std::endl;
-            else log << indent << "N/A" << std::endl;
-        }
-    }
-
-    void
-    writeErrorMessage( const char* msg,
-                       const int & colspan = 1 )
-    {
-        // initial indent string
-        header_indent = "!";
-        log << std::endl;
-        for( auto & it : metadataColumns ) {
-            log << header_indent << " " << it.first << std::endl;
-        }
-
-        // make sure there is a header column for the message
-        if( horizontalGroups.size() == 0 )
-            horizontalGroups.push_back( {"", 1} );
-
-        // dump stacked spanning columns
-        while( horizontalGroups.back().second <= 0 ) {
-            horizontalGroups.pop_back();
-            header_indent.pop_back();
-        }
-        for( size_t i = 0; i < horizontalGroups.size(); i++ ) {
-            if( horizontalGroups[ i ].second > 0 ) {
-                log << header_indent << " " << horizontalGroups[ i ].first << std::endl;
-                header_indent += "!";
-            }
-        }
-        if( horizontalGroups.size() > 0 ) {
-            horizontalGroups.back().second -= colspan;
-            header_indent.pop_back();
-        }
-
-        // only when changed (the header has been already adjusted)
-        // print each element on separate line
-        for( auto & it : metadataColumns ) {
-            log << it.second << std::endl;
-        }
-        log << msg << std::endl;
-    }
-
-    void
-    closeTable()
-    {
-        log << std::endl;
-        header_indent = body_indent = "";
-        header_changed = true;
-        horizontalGroups.clear();
-    }
-
-    bool save( std::ostream & logFile )
-    {
-        closeTable();
-        logFile << log.str();
-        if( logFile.good() ) {
-            log.str() = "";
-            return true;
-        }
-        return false;
-    }
-
-protected:
-
-    // manual double -> String conversion with fixed precision
-    static String
-    _to_string( const double & num, const int & precision = 0, bool fixed = false )
-    {
-        std::stringstream str;
-        if( fixed )
-            str << std::fixed;
-        if( precision )
-            str << std::setprecision( precision );
-        str << num;
-        return String( str.str().data() );
-    }
-
-    std::stringstream log;
-    std::string header_indent;
-    std::string body_indent;
-
-    bool verbose;
-    MetadataColumns metadataColumns;
-    bool header_changed = true;
-    std::vector< std::pair< String, int > > horizontalGroups;
-};
-
-
-class Benchmark
-    : protected Logging
-{
-public:
-    using Logging::MetadataElement;
-    using Logging::MetadataMap;
-    using Logging::MetadataColumns;
-
-    Benchmark( const int & loops = 10,
-               bool verbose = true )
-        : Logging(verbose), loops(loops)
-    { }
-
-    // TODO: ensure that this is not called in the middle of the benchmark
-    // (or just remove it completely?)
-    void
-    setLoops( const int & loops )
-    {
-        this->loops = loops;
-    }
-
-    // Marks the start of a new benchmark
-    void
-    newBenchmark( const String & title )
-    {
-        closeTable();
-        writeTitle( title );
-    }
-
-    // Marks the start of a new benchmark (with custom metadata)
-    void
-    newBenchmark( const String & title,
-                  MetadataMap metadata )
-    {
-        closeTable();
-        writeTitle( title );
-        // add loops to metadata
-        metadata["loops"] = String(loops);
-        writeMetadata( metadata );
-    }
-
-    // Sets metadata columns -- values used for all subsequent rows until
-    // the next call to this function.
-    void
-    setMetadataColumns( const MetadataColumns & metadata )
-    {
-        if( metadataColumns != metadata )
-            header_changed = true;
-        metadataColumns = metadata;
-    }
-
-    // TODO: maybe should be renamed to createVerticalGroup and ensured that vertical and horizontal groups are not used within the same "Benchmark"
-    // Sets current operation -- operations expand the table vertically
-    //  - baseTime should be reset to 0.0 for most operations, but sometimes
-    //    it is useful to override it
-    //  - Order of operations inside a "Benchmark" does not matter, rows can be
-    //    easily sorted while converting to HTML.)
-    void
-    setOperation( const String & operation,
-                  const double & datasetSize = 0.0, // in GB
-                  const double & baseTime = 0.0 )
-    {
-        if( metadataColumns.size() > 0 && String(metadataColumns[ 0 ].first) == "operation" ) {
-            metadataColumns[ 0 ].second = operation;
-        }
-        else {
-            metadataColumns.insert( metadataColumns.begin(), {"operation", operation} );
-        }
-        setOperation( datasetSize, baseTime );
-        header_changed = true;
-    }
-
-    void
-    setOperation( const double & datasetSize = 0.0,
-                  const double & baseTime = 0.0 )
-    {
-        this->datasetSize = datasetSize;
-        this->baseTime = baseTime;
-    }
-
-    // Creates new horizontal groups inside a benchmark -- increases the number
-    // of columns in the "Benchmark", implies column spanning.
-    // (Useful e.g. for SpMV formats, different configurations etc.)
-    void
-    createHorizontalGroup( const String & name,
-                           const int & subcolumns )
-    {
-        if( horizontalGroups.size() == 0 ) {
-            horizontalGroups.push_back( {name, subcolumns} );
-        }
-        else {
-            auto & last = horizontalGroups.back();
-            if( last.first != name && last.second > 0 ) {
-                horizontalGroups.push_back( {name, subcolumns} );
-            }
-            else {
-                last.first = name;
-                last.second = subcolumns;
-            }
-        }
-    }
-
-    // Times a single ComputeFunction. Subsequent calls implicitly split
-    // the current "horizontal group" into sub-columns identified by
-    // "performer", which are further split into "bandwidth", "time" and
-    // "speedup" columns.
-    // TODO: allow custom columns bound to lambda functions (e.g. for Gflops calculation)
-    // Also terminates the recursion of the following variadic template.
-    template< typename ResetFunction,
-              typename ComputeFunction >
-    double
-    time( ResetFunction reset,
-          const String & performer,
-          ComputeFunction & compute )
-    {
-        const double time = timeFunction( compute, reset, loops );
-        const double bandwidth = datasetSize / time;
-        const double speedup = this->baseTime / time;
-        if( this->baseTime == 0.0 )
-            this->baseTime = time;
-
-        writeTableHeader( performer, HeaderElements({"bandwidth", "time", "speedup"}) );
-        writeTableRow( performer, RowElements({ bandwidth, time, speedup }) );
-
-        return this->baseTime;
-    }
-
-    // Recursive template function to deal with multiple computations with the
-    // same reset function.
-    template< typename ResetFunction,
-              typename ComputeFunction,
-              typename... NextComputations >
-    inline double
-    time( ResetFunction reset,
-          const String & performer,
-          ComputeFunction & compute,
-          NextComputations & ... nextComputations )
-    {
-        time( reset, performer, compute );
-        time( reset, nextComputations... );
-        return this->baseTime;
-    }
-
-    // Adds an error message to the log. Should be called in places where the
-    // "time" method could not be called (e.g. due to failed allocation).
-    void
-    addErrorMessage( const char* msg,
-                     const int & numberOfComputations = 1 )
-    {
-        // each computation has 3 subcolumns
-        const int colspan = 3 * numberOfComputations;
-        writeErrorMessage( msg, colspan );
-    }
-
-    using Logging::save;
-
-protected:
-    int loops;
-    double datasetSize = 0.0;
-    double baseTime = 0.0;
-};
-
-} // namespace benchmarks
-} // namespace tnl
diff --git a/tests/benchmarks/share/tnl-run-heat-equation-benchmark b/tests/benchmarks/share/tnl-run-heat-equation-benchmark
deleted file mode 100644
index 193ef001dd891bdc0f17d09ecd4886bbd1ef39b2..0000000000000000000000000000000000000000
--- a/tests/benchmarks/share/tnl-run-heat-equation-benchmark
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/bin/bash
-
-device="cuda"
-dofSize=128
-dimension=2;
-proportions=2
-finalTime=1.0e-1
-timeStep=1.0e-5
-
-analyticFunction="exp-bump"
-sigma=0.25
-
-tnl-grid-setup --dimensions ${dimension} \
-               --proportions-x ${proportions} \
-               --proportions-y ${proportions} \
-               --proportions-z ${proportions} \
-               --origin-x -1 \
-               --origin-y -1 \
-               --origin-z -1 \
-               --size-x ${dofSize} \
-               --size-y ${dofSize} \
-               --size-z ${dofSize} \
-               
-tnl-init --mesh mesh.tnl \
-         --test-function ${analyticFunction} \
-         --output-file init.tnl \
-         --sigma ${sigma}
-
-#valgrind --tool=memcheck \
-echo "==================================================================="
-echo "================ Heat equation solver ============================="
-echo "==================================================================="
-tnl-heat-equation --device ${device} \
-                 --time-discretisation explicit \
-                  --boundary-conditions-type dirichlet \
-                  --boundary-conditions-constant 0 \
-                  --discrete-solver euler \
-                  --snapshot-period ${finalTime} \
-                  --final-time ${finalTime} \
-                  --time-step ${timeStep} \
-                  --max-iterations 100000000 \
-                  --refresh-rate 1000 \
-                  --openmp-enabled false
-
-echo "==================================================================="
-echo "================ Heat equation benchmark =========================="
-echo "==================================================================="
-tnl-benchmark-heat-equation --device ${device} \
-                  --cuda-kernel-type templated-compact \
-                  --time-discretisation explicit \
-                  --boundary-conditions-type dirichlet \
-                  --boundary-conditions-constant 0 \
-                  --discrete-solver euler \
-                  --snapshot-period ${finalTime} \
-                  --final-time ${finalTime} \
-                  --time-step ${timeStep} \
-                  --max-iterations 100000000 \
-                  --refresh-rate 1000 \
-                  --openmp-enabled false
-             
-echo "==================================================================="
-echo "=============== Heat equation pure C benchmark ===================="
-echo "==================================================================="
-tnl-benchmark-simple-heat-equation --device ${device} \
-                                   --domain-x-size 2 \
-                                   --domain-y-size 2 \
-                                   --grid-x-size ${dofSize} \
-                                   --grid-y-size ${dofSize} \
-                                   --sigma ${sigma} \
-                                   --time-step ${timeStep} \
-                                   --final-time ${finalTime}       
-
-#tnl-benchmark-simple-heat-equation-bug --device ${device} \
-#                                   --domain-x-size 2 \
-#                                   --domain-y-size 2 \
-#                                   --grid-x-size ${dofSize} \
-#                                   --grid-y-size ${dofSize} \
-#                                   --sigma ${sigma} \
-#                                   --time-step ${timeStep} \
-#                                   --final-time ${finalTime}       
-                                   
-tnl-diff --input-files simple-heat-equation-result.tnl u-00001.tnl
-
-cat tnl-diff.log                                         
diff --git a/tests/benchmarks/spmv.h b/tests/benchmarks/spmv.h
deleted file mode 100644
index 99ab0066bc3ff0726895c4f21d632ff0c60067b4..0000000000000000000000000000000000000000
--- a/tests/benchmarks/spmv.h
+++ /dev/null
@@ -1,191 +0,0 @@
-/***************************************************************************
-                          spmv.h  -  description
-                             -------------------
-    begin                : Dec 30, 2015
-    copyright            : (C) 2015 by Tomas Oberhuber et al.
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-// Implemented by: Jakub Klinkovsky
-
-#pragma once
-
-#include "benchmarks.h"
-
-#include <TNL/Containers/List.h>
-#include <TNL/DevicePointer.h>
-#include <TNL/Matrices/CSR.h>
-#include <TNL/Matrices/Ellpack.h>
-#include <TNL/Matrices/SlicedEllpack.h>
-#include <TNL/Matrices/ChunkedEllpack.h>
-
-namespace TNL
-{
-namespace benchmarks
-{
-
-// silly alias to match the number of template parameters with other formats
-template< typename Real, typename Device, typename Index >
-using SlicedEllpack = Matrices::SlicedEllpack< Real, Device, Index >;
-
-template< typename Matrix >
-int setHostTestMatrix( Matrix& matrix,
-                       const int elementsPerRow )
-{
-    const int size = matrix.getRows();
-    int elements( 0 );
-    for( int row = 0; row < size; row++ ) {
-        int col = row - elementsPerRow / 2;
-        for( int element = 0; element < elementsPerRow; element++ ) {
-            if( col + element >= 0 &&
-                col + element < size )
-            {
-                matrix.setElement( row, col + element, element + 1 );
-                elements++;
-            }
-        }
-    }
-    return elements;
-}
-
-#ifdef HAVE_CUDA
-template< typename Matrix >
-__global__ void setCudaTestMatrixKernel( Matrix* matrix,
-                                         const int elementsPerRow,
-                                         const int gridIdx )
-{
-    const int rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-    if( rowIdx >= matrix->getRows() )
-        return;
-    int col = rowIdx - elementsPerRow / 2;
-    for( int element = 0; element < elementsPerRow; element++ ) {
-        if( col + element >= 0 &&
-            col + element < matrix->getColumns() )
-           matrix->setElementFast( rowIdx, col + element, element + 1 );
-    }
-}
-#endif
-
-template< typename Matrix >
-void setCudaTestMatrix( Matrix& matrix,
-                        const int elementsPerRow )
-{
-#ifdef HAVE_CUDA
-    typedef typename Matrix::IndexType IndexType;
-    typedef typename Matrix::RealType RealType;
-    DevicePointer< Matrix > kernel_matrix( matrix );
-    dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
-    const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-    const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
-    for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) {
-        if( gridIdx == cudaGrids - 1 )
-            cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
-        setCudaTestMatrixKernel< Matrix >
-            <<< cudaGridSize, cudaBlockSize >>>
-            ( &kernel_matrix.template modifyData< Devices::Cuda >(), elementsPerRow, gridIdx );
-        TNL_CHECK_CUDA_DEVICE;
-    }
-#endif
-}
-
-
-// TODO: rename as benchmark_SpMV_synthetic and move to spmv-synthetic.h
-template< typename Real,
-          template< typename, typename, typename > class Matrix,
-          template< typename, typename, typename > class Vector = Containers::Vector >
-bool
-benchmarkSpMV( Benchmark & benchmark,
-               const int & loops,
-               const int & size,
-               const int elementsPerRow = 5 )
-{
-    typedef Matrix< Real, Devices::Host, int > HostMatrix;
-    typedef Matrix< Real, Devices::Cuda, int > DeviceMatrix;
-    typedef Containers::Vector< Real, Devices::Host, int > HostVector;
-    typedef Containers::Vector< Real, Devices::Cuda, int > CudaVector;
-
-    HostMatrix hostMatrix;
-    DeviceMatrix deviceMatrix;
-    Containers::Vector< int, Devices::Host, int > hostRowLengths;
-    Containers::Vector< int, Devices::Cuda, int > deviceRowLengths;
-    HostVector hostVector, hostVector2;
-    CudaVector deviceVector, deviceVector2;
-
-    // create benchmark group
-    Containers::List< String > parsedType;
-    parseObjectType( HostMatrix::getType(), parsedType );
-    benchmark.createHorizontalGroup( parsedType[ 0 ], 2 );
-
-    hostRowLengths.setSize( size );
-    hostMatrix.setDimensions( size, size );
-    hostVector.setSize( size );
-    hostVector2.setSize( size );
-#ifdef HAVE_CUDA
-    deviceRowLengths.setSize( size );
-    deviceMatrix.setDimensions( size, size );
-    deviceVector.setSize( size );
-    deviceVector2.setSize( size );
-#endif
-
-    hostRowLengths.setValue( elementsPerRow );
-#ifdef HAVE_CUDA
-    deviceRowLengths.setValue( elementsPerRow );
-#endif
-
-    hostMatrix.setCompressedRowLengths( hostRowLengths );
-#ifdef HAVE_CUDA
-    deviceMatrix.setCompressedRowLengths( deviceRowLengths );
-#endif
-
-    const int elements = setHostTestMatrix< HostMatrix >( hostMatrix, elementsPerRow );
-    setCudaTestMatrix< DeviceMatrix >( deviceMatrix, elementsPerRow );
-    const double datasetSize = ( double ) loops * elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
-
-    // reset function
-    auto reset = [&]() {
-        hostVector.setValue( 1.0 );
-        hostVector2.setValue( 0.0 );
-#ifdef HAVE_CUDA
-        deviceVector.setValue( 1.0 );
-        deviceVector2.setValue( 0.0 );
-#endif
-    };
-
-    // compute functions
-    auto spmvHost = [&]() {
-        hostMatrix.vectorProduct( hostVector, hostVector2 );
-    };
-    auto spmvCuda = [&]() {
-        deviceMatrix.vectorProduct( deviceVector, deviceVector2 );
-    };
-
-    benchmark.setOperation( datasetSize );
-    benchmark.time( reset, "CPU", spmvHost );
-#ifdef HAVE_CUDA
-    benchmark.time( reset, "GPU", spmvCuda );
-#endif
-
-    return true;
-}
-
-template< typename Real = double,
-          typename Index = int >
-bool
-benchmarkSpmvSynthetic( Benchmark & benchmark,
-                        const int & loops,
-                        const int & size,
-                        const int & elementsPerRow )
-{
-    bool result = true;
-    // TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats)
-    result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, loops, size, elementsPerRow );
-    result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, loops, size, elementsPerRow );
-    result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, loops, size, elementsPerRow );
-    result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, loops, size, elementsPerRow );
-    return result;
-}
-
-} // namespace benchmarks
-} // namespace tnl
diff --git a/tests/benchmarks/tnl-benchmark-blas.h b/tests/benchmarks/tnl-benchmark-blas.h
deleted file mode 100644
index 8c71209534894d10a430bae874035c9d14d7bbc3..0000000000000000000000000000000000000000
--- a/tests/benchmarks/tnl-benchmark-blas.h
+++ /dev/null
@@ -1,192 +0,0 @@
-/***************************************************************************
-                          tnl-benchmark-blas.h  -  description
-                             -------------------
-    begin                : Jan 27, 2010
-    copyright            : (C) 2010 by Tomas Oberhuber et al.
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-// Implemented by: Jakub Klinkovsky
-
-#pragma once
-
-#include <TNL/Devices/Host.h>
-#include <TNL/Devices/CudaDeviceInfo.h>
-#include <TNL/Devices/SystemInfo.h>
-#include <TNL/Config/ConfigDescription.h>
-#include <TNL/Config/ParameterContainer.h>
-
-#include "array-operations.h"
-#include "vector-operations.h"
-#include "spmv.h"
-
-using namespace TNL;
-using namespace TNL::benchmarks;
-
-
-// TODO: should benchmarks check the result of the computation?
-
-
-template< typename Real >
-void
-runBlasBenchmarks( Benchmark & benchmark,
-                   Benchmark::MetadataMap metadata,
-                   const std::size_t & minSize,
-                   const std::size_t & maxSize,
-                   const double & sizeStepFactor,
-                   const unsigned & loops,
-                   const unsigned & elementsPerRow )
-{
-    const String precision = getType< Real >();
-    metadata["precision"] = precision;
-
-    // Array operations
-    benchmark.newBenchmark( String("Array operations (") + precision + ")",
-                            metadata );
-    for( std::size_t size = minSize; size <= maxSize; size *= 2 ) {
-        benchmark.setMetadataColumns( Benchmark::MetadataColumns({
-           {"size", size},
-        } ));
-        benchmarkArrayOperations< Real >( benchmark, loops, size );
-    }
-
-    // Vector operations
-    benchmark.newBenchmark( String("Vector operations (") + precision + ")",
-                            metadata );
-    for( std::size_t size = minSize; size <= maxSize; size *= sizeStepFactor ) {
-        benchmark.setMetadataColumns( Benchmark::MetadataColumns({
-           {"size", size},
-        } ));
-        benchmarkVectorOperations< Real >( benchmark, loops, size );
-    }
-
-    // Sparse matrix-vector multiplication
-    benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")",
-                            metadata );
-    for( std::size_t size = minSize; size <= maxSize; size *= 2 ) {
-        benchmark.setMetadataColumns( Benchmark::MetadataColumns({
-            {"rows", size},
-            {"columns", size},
-            {"elements per row", elementsPerRow},
-        } ));
-        benchmarkSpmvSynthetic< Real >( benchmark, loops, size, elementsPerRow );
-    }
-}
-
-void
-setupConfig( Config::ConfigDescription & config )
-{
-    config.addDelimiter( "Benchmark settings:" );
-    config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-blas.log");
-    config.addEntry< String >( "output-mode", "Mode for opening the log file.", "overwrite" );
-    config.addEntryEnum( "append" );
-    config.addEntryEnum( "overwrite" );
-    config.addEntry< String >( "precision", "Precision of the arithmetics.", "double" );
-    config.addEntryEnum( "float" );
-    config.addEntryEnum( "double" );
-    config.addEntryEnum( "all" );
-    config.addEntry< int >( "min-size", "Minimum size of arrays/vectors used in the benchmark.", 100000 );
-    config.addEntry< int >( "max-size", "Minimum size of arrays/vectors used in the benchmark.", 10000000 );
-    config.addEntry< int >( "size-step-factor", "Factor determining the size of arrays/vectors used in the benchmark. First size is min-size and each following size is stepFactor*previousSize, up to max-size.", 2 );
-    config.addEntry< int >( "loops", "Number of iterations for every computation.", 10 );
-    config.addEntry< int >( "elements-per-row", "Number of elements per row of the sparse matrix used in the matrix-vector multiplication benchmark.", 5 );
-    config.addEntry< int >( "verbose", "Verbose mode.", 1 );
-
-    config.addDelimiter( "Device settings:" );
-    Devices::Host::configSetup( config );
-    Devices::Cuda::configSetup( config );
-}
-
-int
-main( int argc, char* argv[] )
-{
-    Config::ParameterContainer parameters;
-    Config::ConfigDescription conf_desc;
-
-    setupConfig( conf_desc );
-
-    if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) {
-        conf_desc.printUsage( argv[ 0 ] );
-        return 1;
-    }
-
-    Devices::Host::setup( parameters );
-    Devices::Cuda::setup( parameters );
-
-    const String & logFileName = parameters.getParameter< String >( "log-file" );
-    const String & outputMode = parameters.getParameter< String >( "output-mode" );
-    const String & precision = parameters.getParameter< String >( "precision" );
-    // FIXME: getParameter< std::size_t >() does not work with parameters added with addEntry< int >(),
-    // which have a default value. The workaround below works for int values, but it is not possible
-    // to pass 64-bit integer values
-//    const std::size_t minSize = parameters.getParameter< std::size_t >( "min-size" );
-//    const std::size_t maxSize = parameters.getParameter< std::size_t >( "max-size" );
-    const std::size_t minSize = parameters.getParameter< int >( "min-size" );
-    const std::size_t maxSize = parameters.getParameter< int >( "max-size" );
-    const unsigned sizeStepFactor = parameters.getParameter< unsigned >( "size-step-factor" );
-    const unsigned loops = parameters.getParameter< unsigned >( "loops" );
-    const unsigned elementsPerRow = parameters.getParameter< unsigned >( "elements-per-row" );
-    const unsigned verbose = parameters.getParameter< unsigned >( "verbose" );
-
-    if( sizeStepFactor <= 1 ) {
-        std::cerr << "The value of --size-step-factor must be greater than 1." << std::endl;
-        return EXIT_FAILURE;
-    }
-
-    // open log file
-    auto mode = std::ios::out;
-    if( outputMode == "append" )
-        mode |= std::ios::app;
-    std::ofstream logFile( logFileName.getString(), mode );
-
-    // init benchmark and common metadata
-    Benchmark benchmark( loops, verbose );
-
-    // prepare global metadata
-    const int cpu_id = 0;
-    Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id );
-    String cacheInfo = String( cacheSizes.L1data ) + ", "
-                        + String( cacheSizes.L1instruction ) + ", "
-                        + String( cacheSizes.L2 ) + ", "
-                        + String( cacheSizes.L3 );
-#ifdef HAVE_CUDA
-    const int activeGPU = Devices::CudaDeviceInfo::getActiveDevice();
-    const String deviceArch = String( Devices::CudaDeviceInfo::getArchitectureMajor( activeGPU ) ) + "." +
-                              String( Devices::CudaDeviceInfo::getArchitectureMinor( activeGPU ) );
-#endif
-    Benchmark::MetadataMap metadata {
-        { "host name", Devices::SystemInfo::getHostname() },
-        { "architecture", Devices::SystemInfo::getArchitecture() },
-        { "system", Devices::SystemInfo::getSystemName() },
-        { "system release", Devices::SystemInfo::getSystemRelease() },
-        { "start time", Devices::SystemInfo::getCurrentTime() },
-        { "CPU model name", Devices::SystemInfo::getCPUModelName( cpu_id ) },
-        { "CPU cores", Devices::SystemInfo::getNumberOfCores( cpu_id ) },
-        { "CPU threads per core", Devices::SystemInfo::getNumberOfThreads( cpu_id ) / Devices::SystemInfo::getNumberOfCores( cpu_id ) },
-        { "CPU max frequency (MHz)", Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 },
-        { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo },
-#ifdef HAVE_CUDA
-        { "GPU name", Devices::CudaDeviceInfo::getDeviceName( activeGPU ) },
-        { "GPU architecture", deviceArch },
-        { "GPU CUDA cores", Devices::CudaDeviceInfo::getCudaCores( activeGPU ) },
-        { "GPU clock rate (MHz)", (double) Devices::CudaDeviceInfo::getClockRate( activeGPU ) / 1e3 },
-        { "GPU global memory (GB)", (double) Devices::CudaDeviceInfo::getGlobalMemory( activeGPU ) / 1e9 },
-        { "GPU memory clock rate (MHz)", (double) Devices::CudaDeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 },
-        { "GPU memory ECC enabled", Devices::CudaDeviceInfo::getECCEnabled( activeGPU ) },
-#endif
-    };
-
-    if( precision == "all" || precision == "float" )
-        runBlasBenchmarks< float >( benchmark, metadata, minSize, maxSize, sizeStepFactor, loops, elementsPerRow );
-    if( precision == "all" || precision == "double" )
-        runBlasBenchmarks< double >( benchmark, metadata, minSize, maxSize, sizeStepFactor, loops, elementsPerRow );
-
-    if( ! benchmark.save( logFile ) ) {
-        std::cerr << "Failed to write the benchmark results to file '" << parameters.getParameter< String >( "log-file" ) << "'." << std::endl;
-        return EXIT_FAILURE;
-    }
-
-    return EXIT_SUCCESS;
-}
diff --git a/tests/benchmarks/vector-operations.h b/tests/benchmarks/vector-operations.h
deleted file mode 100644
index cdf2443964a5bb33354de6e0ec0688018c7128a1..0000000000000000000000000000000000000000
--- a/tests/benchmarks/vector-operations.h
+++ /dev/null
@@ -1,442 +0,0 @@
-/***************************************************************************
-                          vector-operations.h  -  description
-                             -------------------
-    begin                : Dec 30, 2015
-    copyright            : (C) 2015 by Tomas Oberhuber et al.
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-// Implemented by: Jakub Klinkovsky
-
-#pragma once
-
-#include <stdlib.h> // srand48
-
-#include "benchmarks.h"
-
-#include <TNL/Containers/Vector.h>
-
-#ifdef HAVE_CUDA
-#include "cublasWrappers.h"
-#endif
-
-namespace TNL
-{
-namespace benchmarks
-{
-
-template< typename Real = double,
-          typename Index = int >
-bool
-benchmarkVectorOperations( Benchmark & benchmark,
-                           const int & loops,
-                           const long & size )
-{
-    typedef Containers::Vector< Real, Devices::Host, Index > HostVector;
-    typedef Containers::Vector< Real, Devices::Cuda, Index > CudaVector;
-    using namespace std;
-
-    double datasetSize = ( double ) ( loops * size ) * sizeof( Real ) / oneGB;
-
-    HostVector hostVector, hostVector2;
-    CudaVector deviceVector, deviceVector2;
-    hostVector.setSize( size );
-    hostVector2.setSize( size );
-#ifdef HAVE_CUDA
-    deviceVector.setSize( size );
-    deviceVector2.setSize( size );
-#endif
-
-    Real resultHost, resultDevice;
-
-#ifdef HAVE_CUDA
-    cublasHandle_t cublasHandle;
-    cublasCreate( &cublasHandle );
-#endif
-
-
-    // reset functions
-    // (Make sure to always use some in benchmarks, even if it's not necessary
-    // to assure correct result - it helps to clear cache and avoid optimizations
-    // of the benchmark loop.)
-    auto reset1 = [&]() {
-        hostVector.setValue( 1.0 );
-#ifdef HAVE_CUDA
-        deviceVector.setValue( 1.0 );
-#endif
-        // A relatively harmless call to keep the compiler from realizing we
-        // don't actually do any useful work with the result of the reduciton.
-        srand48(resultHost);
-        resultHost = resultDevice = 0.0;
-    };
-    auto reset2 = [&]() {
-        hostVector2.setValue( 1.0 );
-#ifdef HAVE_CUDA
-        deviceVector2.setValue( 1.0 );
-#endif
-    };
-    auto reset12 = [&]() {
-        reset1();
-        reset2();
-    };
-
-
-    reset12();
-
-
-    auto maxHost = [&]() {
-        resultHost = hostVector.max();
-    };
-    auto maxHostGeneral = [&]() {
-        Real result( 0 );
-        Containers::Algorithms::ParallelReductionMax< Real > operation;
-        Containers::Algorithms::Reduction< Devices::Host >::reduce(
-              operation,
-              hostVector.getSize(),
-              hostVector.getData(),
-              ( Real* ) 0,
-              result );
-        return result;
-    };
-    auto maxCuda = [&]() {
-        resultDevice = deviceVector.max();
-    };
-    benchmark.setOperation( "max", datasetSize );
-    benchmark.time( reset1, "CPU", maxHost );
-    benchmark.time( reset1, "CPU (general)", maxHostGeneral );
-#ifdef HAVE_CUDA
-    benchmark.time( reset1, "GPU", maxCuda );
-#endif
-
-
-    auto minHost = [&]() {
-        resultHost = hostVector.min();
-    };
-    auto minHostGeneral = [&]() {
-        Real result( 0 );
-        Containers::Algorithms::ParallelReductionMin< Real > operation;
-        Containers::Algorithms::Reduction< Devices::Host >::reduce(
-              operation,
-              hostVector.getSize(),
-              hostVector.getData(),
-              ( Real* ) 0,
-              result );
-        return result;
-    };
-    auto minCuda = [&]() {
-        resultDevice = deviceVector.min();
-    };
-    benchmark.setOperation( "min", datasetSize );
-    benchmark.time( reset1, "CPU", minHost );
-    benchmark.time( reset1, "CPU (general)", minHostGeneral );
-#ifdef HAVE_CUDA
-    benchmark.time( reset1, "GPU", minCuda );
-#endif
-
-
-    auto absMaxHost = [&]() {
-        resultHost = hostVector.absMax();
-    };
-    auto absMaxHostGeneral = [&]() {
-        Real result( 0 );
-        Containers::Algorithms::ParallelReductionAbsMax< Real > operation;
-        Containers::Algorithms::Reduction< Devices::Host >::reduce(
-              operation,
-              hostVector.getSize(),
-              hostVector.getData(),
-              ( Real* ) 0,
-              result );
-        return result;
-    };
-    auto absMaxCuda = [&]() {
-        resultDevice = deviceVector.absMax();
-    };
-#ifdef HAVE_CUDA
-    auto absMaxCublas = [&]() {
-        int index = 0;
-        cublasIgamax( cublasHandle, size,
-                      deviceVector.getData(), 1,
-                      &index );
-        resultDevice = deviceVector.getElement( index );
-    };
-#endif
-    benchmark.setOperation( "absMax", datasetSize );
-    benchmark.time( reset1, "CPU", absMaxHost );
-    benchmark.time( reset1, "CPU (general)", absMaxHostGeneral );
-#ifdef HAVE_CUDA
-    benchmark.time( reset1, "GPU", absMaxCuda );
-    benchmark.time( reset1, "cuBLAS", absMaxCublas );
-#endif
-
-
-    auto absMinHost = [&]() {
-        resultHost = hostVector.absMin();
-    };
-    auto absMinHostGeneral = [&]() {
-        Real result( 0 );
-        Containers::Algorithms::ParallelReductionAbsMin< Real > operation;
-        Containers::Algorithms::Reduction< Devices::Host >::reduce(
-              operation,
-              hostVector.getSize(),
-              hostVector.getData(),
-              ( Real* ) 0,
-              result );
-        return result;
-    };
-    auto absMinCuda = [&]() {
-        resultDevice = deviceVector.absMin();
-    };
-#ifdef HAVE_CUDA
-    auto absMinCublas = [&]() {
-        int index = 0;
-        cublasIgamin( cublasHandle, size,
-                      deviceVector.getData(), 1,
-                      &index );
-        resultDevice = deviceVector.getElement( index );
-    };
-#endif
-    benchmark.setOperation( "absMin", datasetSize );
-    benchmark.time( reset1, "CPU", absMinHost );
-    benchmark.time( reset1, "CPU (general)", absMinHostGeneral );
-#ifdef HAVE_CUDA
-    benchmark.time( reset1, "GPU", absMinCuda );
-    benchmark.time( reset1, "cuBLAS", absMinCublas );
-#endif
-
-
-    auto sumHost = [&]() {
-        resultHost = hostVector.sum();
-    };
-    auto sumHostGeneral = [&]() {
-        Real result( 0 );
-        Containers::Algorithms::ParallelReductionSum< Real > operation;
-        Containers::Algorithms::Reduction< Devices::Host >::reduce(
-              operation,
-              hostVector.getSize(),
-              hostVector.getData(),
-              ( Real* ) 0,
-              result );
-        return result;
-    };
-    auto sumCuda = [&]() {
-        resultDevice = deviceVector.sum();
-    };
-    benchmark.setOperation( "sum", datasetSize );
-    benchmark.time( reset1, "CPU", sumHost );
-    benchmark.time( reset1, "CPU (general)", sumHostGeneral );
-#ifdef HAVE_CUDA
-    benchmark.time( reset1, "GPU", sumCuda );
-#endif
-
-
-    auto l1normHost = [&]() {
-        resultHost = hostVector.lpNorm( 1.0 );
-    };
-    auto l1normHostGeneral = [&]() {
-        Real result( 0 );
-        Containers::Algorithms::ParallelReductionAbsSum< Real > operation;
-        Containers::Algorithms::Reduction< Devices::Host >::reduce(
-              operation,
-              hostVector.getSize(),
-              hostVector.getData(),
-              ( Real* ) 0,
-              result );
-        return result;
-    };
-    auto l1normCuda = [&]() {
-        resultDevice = deviceVector.lpNorm( 1.0 );
-    };
-#ifdef HAVE_CUDA
-    auto l1normCublas = [&]() {
-        cublasGasum( cublasHandle, size,
-                     deviceVector.getData(), 1,
-                     &resultDevice );
-    };
-#endif
-    benchmark.setOperation( "l1 norm", datasetSize );
-    benchmark.time( reset1, "CPU", l1normHost );
-    benchmark.time( reset1, "CPU (general)", l1normHostGeneral );
-#ifdef HAVE_CUDA
-    benchmark.time( reset1, "GPU", l1normCuda );
-    benchmark.time( reset1, "cuBLAS", l1normCublas );
-#endif
-
-
-    auto l2normHost = [&]() {
-        resultHost = hostVector.lpNorm( 2.0 );
-    };
-    auto l2normHostGeneral = [&]() {
-        Real result( 0 );
-        Containers::Algorithms::ParallelReductionL2Norm< Real > operation;
-        Containers::Algorithms::Reduction< Devices::Host >::reduce(
-              operation,
-              hostVector.getSize(),
-              hostVector.getData(),
-              ( Real* ) 0,
-              result );
-        return result;
-    };
-    auto l2normCuda = [&]() {
-        resultDevice = deviceVector.lpNorm( 2.0 );
-    };
-#ifdef HAVE_CUDA
-    auto l2normCublas = [&]() {
-        cublasGnrm2( cublasHandle, size,
-                     deviceVector.getData(), 1,
-                     &resultDevice );
-    };
-#endif
-    benchmark.setOperation( "l2 norm", datasetSize );
-    benchmark.time( reset1, "CPU", l2normHost );
-    benchmark.time( reset1, "CPU (general)", l2normHostGeneral );
-#ifdef HAVE_CUDA
-    benchmark.time( reset1, "GPU", l2normCuda );
-    benchmark.time( reset1, "cuBLAS", l2normCublas );
-#endif
-
-
-    auto l3normHost = [&]() {
-        resultHost = hostVector.lpNorm( 3.0 );
-    };
-    auto l3normHostGeneral = [&]() {
-        Real result( 0 );
-        Containers::Algorithms::ParallelReductionLpNorm< Real > operation;
-        operation.setPower( 3.0 );
-        Containers::Algorithms::Reduction< Devices::Host >::reduce(
-              operation,
-              hostVector.getSize(),
-              hostVector.getData(),
-              ( Real* ) 0,
-              result );
-        return result;
-    };
-    auto l3normCuda = [&]() {
-        resultDevice = deviceVector.lpNorm( 3.0 );
-    };
-    benchmark.setOperation( "l3 norm", datasetSize );
-    benchmark.time( reset1, "CPU", l3normHost );
-    benchmark.time( reset1, "CPU (general)", l3normHostGeneral );
-#ifdef HAVE_CUDA
-    benchmark.time( reset1, "GPU", l3normCuda );
-#endif
-
-
-    auto scalarProductHost = [&]() {
-        resultHost = hostVector.scalarProduct( hostVector2 );
-    };
-    auto scalarProductHostGeneral = [&]() {
-        Real result( 0 );
-        Containers::Algorithms::ParallelReductionScalarProduct< Real, Real > operation;
-        Containers::Algorithms::Reduction< Devices::Host >::reduce(
-              operation,
-              hostVector.getSize(),
-              hostVector.getData(),
-              hostVector2.getData(),
-              result );
-        return result;
-    };
-    auto scalarProductCuda = [&]() {
-        resultDevice = deviceVector.scalarProduct( deviceVector2 );
-    };
-#ifdef HAVE_CUDA
-    auto scalarProductCublas = [&]() {
-        cublasGdot( cublasHandle, size,
-                    deviceVector.getData(), 1,
-                    deviceVector2.getData(), 1,
-                    &resultDevice );
-    };
-#endif
-    benchmark.setOperation( "scalar product", 2 * datasetSize );
-    benchmark.time( reset1, "CPU", scalarProductHost );
-    benchmark.time( reset1, "CPU (general)", scalarProductHostGeneral );
-#ifdef HAVE_CUDA
-    benchmark.time( reset1, "GPU", scalarProductCuda );
-    benchmark.time( reset1, "cuBLAS", scalarProductCublas );
-#endif
-
-    /*
-   std::cout << "Benchmarking prefix-sum:" << std::endl;
-    timer.reset();
-    timer.start();
-    hostVector.computePrefixSum();
-    timer.stop();
-    timeHost = timer.getTime();
-    bandwidth = 2 * datasetSize / loops / timer.getTime();
-   std::cout << "  CPU: bandwidth: " << bandwidth << " GB/sec, time: " << timer.getTime() << " sec." << std::endl;
-
-    timer.reset();
-    timer.start();
-    deviceVector.computePrefixSum();
-    timer.stop();
-    timeDevice = timer.getTime();
-    bandwidth = 2 * datasetSize / loops / timer.getTime();
-   std::cout << "  GPU: bandwidth: " << bandwidth << " GB/sec, time: " << timer.getTime() << " sec." << std::endl;
-   std::cout << "  CPU/GPU speedup: " << timeHost / timeDevice << std::endl;
-
-    HostVector auxHostVector;
-    auxHostVector.setLike( deviceVector );
-    auxHostVector = deviceVector;
-    for( int i = 0; i < size; i++ )
-       if( hostVector.getElement( i ) != auxHostVector.getElement( i ) )
-       {
-          std::cerr << "Error in prefix sum at position " << i << ":  " << hostVector.getElement( i ) << " != " << auxHostVector.getElement( i ) << std::endl;
-       }
-    */
-
-
-    auto multiplyHost = [&]() {
-        hostVector *= 0.5;
-    };
-    auto multiplyCuda = [&]() {
-        deviceVector *= 0.5;
-    };
-#ifdef HAVE_CUDA
-    auto multiplyCublas = [&]() {
-        const Real alpha = 0.5;
-        cublasGscal( cublasHandle, size,
-                     &alpha,
-                     deviceVector.getData(), 1 );
-    };
-#endif
-    benchmark.setOperation( "scalar multiplication", 2 * datasetSize );
-    benchmark.time( reset1, "CPU", multiplyHost );
-#ifdef HAVE_CUDA
-    benchmark.time( reset1, "GPU", multiplyCuda );
-    benchmark.time( reset1, "cuBLAS", multiplyCublas );
-#endif
-
-
-    auto addVectorHost = [&]() {
-        hostVector.addVector( hostVector2 );
-    };
-    auto addVectorCuda = [&]() {
-        deviceVector.addVector( deviceVector2 );
-    };
-#ifdef HAVE_CUDA
-    auto addVectorCublas = [&]() {
-        const Real alpha = 1.0;
-        cublasGaxpy( cublasHandle, size,
-                     &alpha,
-                     deviceVector2.getData(), 1,
-                     deviceVector.getData(), 1 );
-    };
-#endif
-    benchmark.setOperation( "vector addition", 3 * datasetSize );
-    benchmark.time( reset1, "CPU", addVectorHost );
-#ifdef HAVE_CUDA
-    benchmark.time( reset1, "GPU", addVectorCuda );
-    benchmark.time( reset1, "cuBLAS", addVectorCublas );
-#endif
-
-
-#ifdef HAVE_CUDA
-    cublasDestroy( cublasHandle );
-#endif
-
-    return true;
-}
-
-} // namespace benchmarks
-} // namespace tnl
diff --git a/tests/mpi/GPUmeshFunctionEvaluateTest.cu b/tests/mpi/GPUmeshFunctionEvaluateTest.cu
index 9ed867636d6c307276045325ac4b1f0e4976b06a..1dcdb95207bbe4896e8a55311658edd1b7d9d440 100644
--- a/tests/mpi/GPUmeshFunctionEvaluateTest.cu
+++ b/tests/mpi/GPUmeshFunctionEvaluateTest.cu
@@ -4,11 +4,12 @@
 #if defined(HAVE_MPI) && defined(HAVE_CUDA)
 
 #include <TNL/Timer.h>
-#include <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 #include <TNL/Containers/Array.h>
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Communicators/MpiCommunicator.h>
 #include <TNL/Communicators/NoDistrCommunicator.h>
+#include <TNL/Communicators/ScopedInitializer.h>
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Meshes/DistributedMeshes/DistributedMesh.h>
 #include <TNL/Meshes/DistributedMeshes/SubdomainOverlapsGetter.h>
@@ -56,7 +57,7 @@ int main ( int argc, char *argv[])
   typedef LinearFunction<double,DIMENSION> LinearFunctionType;
   typedef ConstFunction<double,DIMENSION> ConstFunctionType;
   
-  CommunicatorType::Init(argc,argv);
+  Communicators::ScopedInitializer< CommunicatorType > mpi(argc, argv);
 
   int size=10;
   int cycles=1;
@@ -105,8 +106,8 @@ int main ( int argc, char *argv[])
    SubdomainOverlapsGetter< MeshType, CommunicatorType >::getOverlaps( &distributedGrid, lowerOverlap, upperOverlap, 1 );
    distributedGrid.setOverlaps( lowerOverlap, upperOverlap );
    
-   SharedPointer<MeshType> gridptr;
-   SharedPointer<MeshFunctionType> meshFunctionptr;
+   Pointers::SharedPointer<MeshType> gridptr;
+   Pointers::SharedPointer<MeshFunctionType> meshFunctionptr;
    MeshFunctionEvaluator< MeshFunctionType, LinearFunctionType > linearFunctionEvaluator;
    MeshFunctionEvaluator< MeshFunctionType, ConstFunctionType > constFunctionEvaluator;
  
@@ -118,8 +119,8 @@ int main ( int argc, char *argv[])
   
   meshFunctionptr->bind(gridptr,dof);  
   
-  SharedPointer< LinearFunctionType, Device > linearFunctionPtr;
-  SharedPointer< ConstFunctionType, Device > constFunctionPtr; 
+  Pointers::SharedPointer< LinearFunctionType, Device > linearFunctionPtr;
+  Pointers::SharedPointer< ConstFunctionType, Device > constFunctionPtr; 
    
   setup.stop();
   
@@ -165,12 +166,8 @@ int main ( int argc, char *argv[])
     cout <<"sync: "<<sync.getRealTime()<<endl;
     cout<<"all: "<<all.getRealTime()<<endl<<endl;
   }
-  
-
-  CommunicatorType::Finalize();
 
   return 0;
-
 }
 
 #else
diff --git a/tests/mpi/MeshFunctionEvaluateTest.cpp b/tests/mpi/MeshFunctionEvaluateTest.cpp
index 7cfb45185769402a4d207d7f3ad2be76ed56977c..3c06a34cb7e9713b253e8dd1f11dce1f794381a7 100644
--- a/tests/mpi/MeshFunctionEvaluateTest.cpp
+++ b/tests/mpi/MeshFunctionEvaluateTest.cpp
@@ -16,12 +16,13 @@ using namespace std;
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Communicators/MpiCommunicator.h>
 #include <TNL/Communicators/NoDistrCommunicator.h>
+#include <TNL/Communicators/ScopedInitializer.h>
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Meshes/DistributedMeshes/DistributedMesh.h>
 #include <TNL/Meshes/DistributedMeshes/SubdomainOverlapsGetter.h>
 
 #include <TNL/Timer.h>
-#include  <TNL/SharedPointer.h>
+#include <TNL/Pointers/SharedPointer.h>
 
 //#define DIMENSION 3
 //#define OUTPUT 
@@ -62,7 +63,7 @@ int main ( int argc, char *argv[])
    typedef LinearFunction<double,DIMENSION> LinearFunctionType;
    typedef ConstFunction<double,DIMENSION> ConstFunctionType;
   
-   CommunicatorType::Init(argc,argv);
+   Communicators::ScopedInitializer< CommunicatorType > mpi(argc, argv);
 
    int size=9;
    int cycles=1;
@@ -111,8 +112,8 @@ int main ( int argc, char *argv[])
    SubdomainOverlapsGetter< MeshType, CommunicatorType >::getOverlaps( &distributedGrid, lowerOverlap, upperOverlap, 1 );
    distributedGrid.setOverlaps( lowerOverlap, upperOverlap );
 
-   SharedPointer<MeshType> gridptr;
-   SharedPointer<MeshFunctionType> meshFunctionptr;
+   Pointers::SharedPointer<MeshType> gridptr;
+   Pointers::SharedPointer<MeshFunctionType> meshFunctionptr;
    MeshFunctionEvaluator< MeshFunctionType, LinearFunctionType > linearFunctionEvaluator;
    MeshFunctionEvaluator< MeshFunctionType, ConstFunctionType > constFunctionEvaluator;
  
@@ -124,8 +125,8 @@ int main ( int argc, char *argv[])
   
    meshFunctionptr->bind(gridptr,dof);  
   
-   SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
-   SharedPointer< ConstFunctionType, Host > constFunctionPtr; 
+   Pointers::SharedPointer< LinearFunctionType, Host > linearFunctionPtr;
+   Pointers::SharedPointer< ConstFunctionType, Host > constFunctionPtr; 
    
   
   
@@ -173,7 +174,6 @@ int main ( int argc, char *argv[])
       cout <<"sync: "<<sync.getRealTime()<<endl;
       cout<<"all: "<<all.getRealTime()<<endl<<endl;
    }
-   CommunicatorType::Finalize();
 #else
   std::cout<<"MPI not Supported." << std::endl;
 #endif
diff --git a/tests/mpi/mpiio-save-load-test.cpp b/tests/mpi/mpiio-save-load-test.cpp
index 5733f355949be17bf8e001d81ee9d053e3c5f712..0fa7ee7f65d8756779d2ffb0548eadfc3d9c4b90 100644
--- a/tests/mpi/mpiio-save-load-test.cpp
+++ b/tests/mpi/mpiio-save-load-test.cpp
@@ -2,6 +2,7 @@
 #define MPIIO
 
 #include <TNL/Communicators/MpiCommunicator.h>
+#include <TNL/Communicators/ScopedInitializer.h>
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Meshes/DistributedMeshes/DistributedMesh.h>
 #include <TNL/Meshes/DistributedMeshes/DistributedGridIO.h>
@@ -38,9 +39,9 @@ int main(int argc, char **argv)
         typedef typename DistributedGridType::CoordinatesType CoordinatesType;
         typedef LinearFunction<double,DIM> LinearFunctionType;
 
-        CommunicatorType::Init(argc, argv);
+        Communicators::ScopedInitializer< CommunicatorType > mpi_init(argc, argv);
 
-        SharedPointer< LinearFunctionType, Device > linearFunctionPtr;
+        Pointers::SharedPointer< LinearFunctionType, Device > linearFunctionPtr;
         MeshFunctionEvaluator< MeshFunctionType, LinearFunctionType > linearFunctionEvaluator;    
                 
         //save distributed meshfunction into files
@@ -66,8 +67,8 @@ int main(int argc, char **argv)
         SubdomainOverlapsGetter< MeshType, CommunicatorType >::getOverlaps( &distributedGrid, lowerOverlap, upperOverlap, 1 );
         distributedGrid.setOverlaps( lowerOverlap, upperOverlap );
 
-        SharedPointer<MeshType> gridptr;
-        SharedPointer<MeshFunctionType> meshFunctionptr;
+        Pointers::SharedPointer<MeshType> gridptr;
+        Pointers::SharedPointer<MeshFunctionType> meshFunctionptr;
         distributedGrid.setupGrid(*gridptr);
        
         DofType dofsave(gridptr->template getEntitiesCount< Cell >());
@@ -92,9 +93,6 @@ int main(int argc, char **argv)
             else
                 std::cout <<"Ok!"<<std::endl;
         }
-
-        CommunicatorType::Finalize();
-
 }
 
 #else
diff --git a/tests/mpi/mpiio-save-test.h b/tests/mpi/mpiio-save-test.h
index c51b54f79b4cdd8e044b4fdd560db9f65fef603b..a824bd5b74c917f8d65de00a57b90526e536424e 100644
--- a/tests/mpi/mpiio-save-test.h
+++ b/tests/mpi/mpiio-save-test.h
@@ -2,6 +2,7 @@
 
 #define MPIIO
 #include <TNL/Communicators/MpiCommunicator.h>
+#include <TNL/Communicators/ScopedInitializer.h>
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Meshes/DistributedMeshes/DistributedMesh.h>
 #include <TNL/Meshes/DistributedMeshes/DistributedGridIO.h>
@@ -38,9 +39,9 @@ int main(int argc, char **argv)
         typedef typename DistributedGridType::CoordinatesType CoordinatesType;
         typedef LinearFunction<double,DIM> LinearFunctionType;
 
-        CommunicatorType::Init(argc, argv);
+        Communicators::ScopedInitializer< CommunicatorType > mpi_init(argc, argv);
 
-        SharedPointer< LinearFunctionType, Device > linearFunctionPtr;
+        Pointers::SharedPointer< LinearFunctionType, Device > linearFunctionPtr;
         MeshFunctionEvaluator< MeshFunctionType, LinearFunctionType > linearFunctionEvaluator;    
                 
         //save distributed meshfunction into files
@@ -69,8 +70,8 @@ int main(int argc, char **argv)
         SubdomainOverlapsGetter< MeshType, CommunicatorType >::getOverlaps( &distributedGrid, lowerOverlap, upperOverlap, 1 );
         distributedGrid.setOverlaps( lowerOverlap, upperOverlap );
 
-        SharedPointer<MeshType> gridptr;
-        SharedPointer<MeshFunctionType> meshFunctionptr;
+        Pointers::SharedPointer<MeshType> gridptr;
+        Pointers::SharedPointer<MeshFunctionType> meshFunctionptr;
         distributedGrid.setupGrid(*gridptr);
        
         DofType dof(gridptr->template getEntitiesCount< Cell >());
@@ -81,9 +82,6 @@ int main(int argc, char **argv)
         
         String fileName=String("./meshFunction.tnl");
         DistributedGridIO<MeshFunctionType,MpiIO> ::save(fileName, *meshFunctionptr );
-
-        CommunicatorType::Finalize();
-
 }
 
 #else
diff --git a/tests/unit-tests/functions/tnlOperatorFunctionTest.h b/tests/unit-tests/functions/tnlOperatorFunctionTest.h
index 62cb017f83966c1e7c82a1347ca1f202c9fd245a..7495385cff30f9721c843775b3426d2127dc0d50 100644
--- a/tests/unit-tests/functions/tnlOperatorFunctionTest.h
+++ b/tests/unit-tests/functions/tnlOperatorFunctionTest.h
@@ -43,7 +43,7 @@ class OperatorFunctionTest
    typedef typename MeshType::PointType PointType;
    typedef Functions::Analytic::ExpBump< MeshType::getMeshDimension(), RealType > TestFunctionType;
    typedef Functions::MeshFunction< MeshType, MeshType::getMeshDimension() > MeshFunctionType;
-   typedef SharedPointer< MeshType > MeshPointer;
+   typedef Pointers::SharedPointer<  MeshType > MeshPointer;
 
    OperatorFunctionTest(){};
 
@@ -90,7 +90,7 @@ class OperatorFunctionTest
  
    void testWithBoundaryConditions()
    {
-      SharedPointer< MeshType > mesh;
+      Pointers::SharedPointer<  MeshType > mesh;
       typedef Operators::DirichletBoundaryConditions< MeshType > BoundaryConditionsType;
       typedef Functions::OperatorFunction< Operator, MeshFunctionType, BoundaryConditionsType, EvaluateOnFly > OperatorFunctionType;
       mesh->setDimensions( CoordinatesType( 25 ) );
diff --git a/tests/unit-tests/matrices/tnlChunkedEllpackMatrixTester.h b/tests/unit-tests/matrices/tnlChunkedEllpackMatrixTester.h
index a98a85ab103f1354481a1d67f926f7f4b2c650db..56785a238ca634467f309c8bb6c068449ec5adda 100644
--- a/tests/unit-tests/matrices/tnlChunkedEllpackMatrixTester.h
+++ b/tests/unit-tests/matrices/tnlChunkedEllpackMatrixTester.h
@@ -272,7 +272,7 @@ class ChunkedEllpackTester : public CppUnit :: TestCase
    }
 };
 #else /* HAVE_CPPUNIT */
-template< typename ElementType, typename Device, typename IndexType >
+template< typename ValueType, typename Device, typename IndexType >
 class ChunkedEllpackTester{};
 #endif /* HAVE_CPPUNIT */
 
diff --git a/tests/unit-tests/operators/tnlApproximationError.h b/tests/unit-tests/operators/tnlApproximationError.h
index fcbf8404275f9be76d78fb4f3b9ef574dce3e46f..dee907698cd3d24e92f3bd9c4004e02baf2e7995 100644
--- a/tests/unit-tests/operators/tnlApproximationError.h
+++ b/tests/unit-tests/operators/tnlApproximationError.h
@@ -34,7 +34,7 @@ class tnlApproximationError
       typedef typename MeshType::DeviceType DeviceType;
       typedef typename MeshType::IndexType IndexType;
       typedef typename MeshType::PointType PointType;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer<  MeshType > MeshPointer;
       typedef Functions::Analytic::Constant< MeshType::meshDimension, RealType > ConstantType;
       typedef Operators::DirichletBoundaryConditions< MeshType, Function  > BoundaryConditionsType;
 
diff --git a/tests/unit-tests/operators/tnlOperatorCompositionTest.h b/tests/unit-tests/operators/tnlOperatorCompositionTest.h
index 0a380191e4ac0c5806eedbc8e60a4ad846c83960..6a8631621fa12b238268e6a308f24fab734f3b9a 100644
--- a/tests/unit-tests/operators/tnlOperatorCompositionTest.h
+++ b/tests/unit-tests/operators/tnlOperatorCompositionTest.h
@@ -65,7 +65,7 @@ class OperatorCompositionTest
  
    void test()
    {      
-      SharedPointer< MeshType > mesh;
+      Pointers::SharedPointer<  MeshType > mesh;
       mesh->setDimensions( CoordinatesType( 25 ) );
       mesh->setDomain( PointType( -1.0 ), PointType( 2.0 ) );
       TestFunctionType testFunction;
diff --git a/tests/unit-tests/operators/tnlPDEOperatorEocTest.h b/tests/unit-tests/operators/tnlPDEOperatorEocTest.h
index 2d5aefcc38f0bd201771d93f55f2e9bfa17e8526..49399aa2d1a81658a379baf32da2599bb34037d0 100644
--- a/tests/unit-tests/operators/tnlPDEOperatorEocTest.h
+++ b/tests/unit-tests/operators/tnlPDEOperatorEocTest.h
@@ -35,7 +35,7 @@ class tnlPDEOperatorEocTest
       typedef TestFunction TestFunctionType;
       typedef ExactOperator ExactOperatorType;
       typedef typename ApproximateOperator::MeshType MeshType;
-      typedef SharedPointer< MeshType > MeshPointer;
+      typedef Pointers::SharedPointer<  MeshType > MeshPointer;
       typedef typename ApproximateOperator::RealType RealType;
       typedef typename ApproximateOperator::IndexType IndexType;